# Data Exploration & Metrics - NYC Mobility & Weather Analytics

## Setup

In [11]:
import duckdb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import numpy as np

plt.style.use('default')
PROJECT_ROOT = Path.cwd().parent
DB_PATH = PROJECT_ROOT / 'data' / 'nyc_mobility.duckdb'
conn = duckdb.connect(str(DB_PATH), read_only=True)
print('Connected to DuckDB')

Connected to DuckDB


## Hourly Patterns

In [12]:
# CORRECT: Using t.hour (not hour_of_day)
hourly = conn.execute("""
    SELECT t.hour, ft.trip_type, COUNT(*) as trips
    FROM core_core.fct_trips ft
    JOIN core_core.dim_time t ON ft.time_key = t.time_key
    GROUP BY t.hour, ft.trip_type
""").fetchdf()
display(hourly.head())

Unnamed: 0,hour,trip_type,trips
0,11,yellow_taxi,596211
1,15,yellow_taxi,465586
2,21,yellow_taxi,66040
3,0,fhv,136737
4,9,fhv,132777


## Daily Trends

In [13]:
# CORRECT: Using d.date (not date_actual)
daily = conn.execute("""
    SELECT d.date, ft.trip_type, COUNT(*) as trips
    FROM core_core.fct_trips ft
    JOIN core_core.dim_date d ON ft.date_key = d.date_key
    GROUP BY d.date, ft.trip_type
""").fetchdf()
display(daily.head())

Unnamed: 0,date,trip_type,trips
0,2025-10-18,yellow_taxi,152824
1,2025-10-24,yellow_taxi,164415
2,2025-10-27,yellow_taxi,120407
3,2025-11-16,yellow_taxi,112250
4,2025-11-11,yellow_taxi,129609


## Day of Week

In [14]:
# CORRECT: Using d.day_name (not day_of_week_name)
dow = conn.execute("""
    SELECT d.day_name, ft.trip_type, COUNT(*) as trips
    FROM core_core.fct_trips ft
    JOIN core_core.dim_date d ON ft.date_key = d.date_key
    GROUP BY d.day_name, ft.trip_type
""").fetchdf()
display(dow.head())

Unnamed: 0,day_name,trip_type,trips
0,Sunday,yellow_taxi,979004
1,Tuesday,yellow_taxi,1099225
2,Wednesday,fhv,441727
3,Thursday,fhv,437656
4,Tuesday,citibike,189445


In [None]:
conn.close()
print('Connection closed')