# Fetch Hourly Weather for Bike Trips
This notebook verifies code that collects hourly weather data from Open-Meteo and merges it with the cleaned bike trip dataset.

In [None]:
import pandas as pd
import numpy as np
import requests_cache
from retry_requests import retry
import openmeteo_requests
from concurrent.futures import ThreadPoolExecutor, as_completed

# Read cleaned trip data (adjust path if needed)
trip_df = pd.read_csv('cleaned_trip_data.csv', parse_dates=['start_time'])

# Treat start_time as local Los Angeles time
local_time = trip_df['start_time'].dt.tz_localize('America/Los_Angeles')
trip_df['date_only'] = local_time.dt.strftime('%Y-%m-%d')
trip_df['time_only_rounded'] = local_time.dt.round('h').dt.strftime('%H:%M:%S')

# Unique start locations for weather queries
stations = trip_df[['start_lat', 'start_lon']].drop_duplicates().reset_index(drop=True)
chunk_size = 50
batches = [stations.iloc[i:i+chunk_size].to_dict('records')
           for i in range(0, len(stations), chunk_size)]

cache = requests_cache.CachedSession('.cache', expire_after=3600)
client = openmeteo_requests.Client(session=retry(cache, retries=5, backoff_factor=0.2))


In [None]:
# Helper to request one batch of locations from Open-Meteo
def fetch_batch(batch):
    latitudes = [p['start_lat'] for p in batch]
    longitudes = [p['start_lon'] for p in batch]
    params = {
        'latitude': latitudes,
        'longitude': longitudes,
        'start_date': trip_df['date_only'].min(),
        'end_date': trip_df['date_only'].max(),
        'hourly': ['temperature_2m', 'rain', 'weather_code'],
        'timezone': 'auto',
        'timeformat': 'iso8601'
    }
    responses = client.weather_api(
        'https://historical-forecast-api.open-meteo.com/v1/forecast',
        params=params
    )

    records = []
    for loc, resp in zip(batch, responses):
        hr = resp.Hourly()
        start_epoch = hr.Time()
        interval = hr.Interval()
        temp = np.atleast_1d(hr.Variables(0).ValuesAsNumpy())
        rain = np.atleast_1d(hr.Variables(1).ValuesAsNumpy())
        code = np.atleast_1d(hr.Variables(2).ValuesAsNumpy())
        n = len(temp)
        times = pd.date_range(
            start=pd.to_datetime(start_epoch, unit='s'),
            periods=n,
            freq=pd.Timedelta(seconds=interval)
        )
        df_loc = pd.DataFrame({
            'start_lat': [loc['start_lat']] * n,
            'start_lon': [loc['start_lon']] * n,
            'date_only': times.strftime('%Y-%m-%d'),
            'time_only_rounded': times.strftime('%H:%M:%S'),
            'temperature_2m': temp,
            'rain': rain,
            'weather_code': code,
        })
        records.append(df_loc)
    return records


In [None]:
# Collect weather for all batches (API access required)
all_records = []
with ThreadPoolExecutor(max_workers=5) as ex:
    futures = [ex.submit(fetch_batch, b) for b in batches]
    for future in as_completed(futures):
        all_records.extend(future.result())

weather_df = pd.concat(all_records, ignore_index=True)

# Merge back onto the trip data
result = trip_df.merge(
    weather_df,
    on=['start_lat', 'start_lon', 'date_only', 'time_only_rounded'],
    how='left'
)
result.head()
