## Fetch hourly weather for trips
This notebook loads cleaned trip data, rounds start times to the nearest hour, queries Open-Meteo for temperature, rain, and weather code, and merges the results back to the trips.

In [None]:
import pandas as pd
import requests_cache
from retry_requests import retry
import openmeteo_requests

In [None]:
cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

In [None]:
df = pd.read_csv('jupyter/cleaned_trip_data.csv', parse_dates=['start_time'])
# localize times to Los Angeles and convert to UTC after rounding
local = df['start_time'].dt.tz_localize('America/Los_Angeles')
df['time_rounded'] = local.dt.round('h').dt.tz_convert('UTC')
df_display = df.copy()
df_display['time_rounded'] = df_display['time_rounded'].dt.strftime('%Y-%m-%d %H:%M:%S%z')
df_display.head()

In [None]:
selected_hours = df['time_rounded'].drop_duplicates().sort_values().head(50)
filtered = df[df['time_rounded'].isin(selected_hours)]
requests_df = filtered[['start_lat', 'start_lon', 'time_rounded']].drop_duplicates()
requests_df.head()

In [None]:
def fetch_hourly_weather(lat, lon, timestamp):
    url = 'https://api.open-meteo.com/v1/forecast'
    day = timestamp.strftime('%Y-%m-%d')
    params = {
        'latitude': lat,
        'longitude': lon,
        'hourly': ['temperature_2m', 'rain', 'weather_code'],
        'start_date': day,
        'end_date': day,
        'timezone': 'UTC',
        'timeformat': 'unixtime'
    }
    responses = openmeteo.weather_api(url, params=params)
    hourly = responses[0].Hourly()
    times = pd.to_datetime(hourly.Time(), unit='s', utc=True)
    ts = timestamp.tz_convert('UTC') if timestamp.tzinfo else timestamp.tz_localize('UTC')
    match = (times == ts).nonzero()[0]
    if len(match) == 0:
        raise ValueError(f'Requested time {ts} not available in API response')
    idx = int(match[0])
    temperature = hourly.Variables(0).ValuesAsNumpy()[idx]
    rain = hourly.Variables(1).ValuesAsNumpy()[idx]
    code = hourly.Variables(2).ValuesAsNumpy()[idx]
    return temperature, rain, code

In [None]:
weather_records = []
for _, row in requests_df.iterrows():
    temp, rain, code = fetch_hourly_weather(row['start_lat'], row['start_lon'], row['time_rounded'])
    weather_records.append({
        'start_lat': row['start_lat'],
        'start_lon': row['start_lon'],
        'time_rounded': row['time_rounded'],
        'temperature_2m': temp,
        'rain': rain,
        'weather_code': code,
    })
weather_df = pd.DataFrame(weather_records)
result = filtered.merge(weather_df, on=['start_lat', 'start_lon', 'time_rounded'], how='left')
result.head()