## Fetch hourly weather for trips
This notebook loads cleaned trip data, rounds start times to the nearest hour, queries Open-Meteo for temperature, rain, and weather code, and merges the results back to the trips.

In [None]:
import pandas as pd
import numpy as np
import requests_cache
from concurrent.futures import ThreadPoolExecutor, as_completed

# 1) Trips einlesen und Merge-Keys bauen
df = pd.read_csv('cleaned_trip_data.csv', parse_dates=['start_time'])
df['date_only']         = df['start_time'].dt.strftime('%Y-%m-%d')
df['time_only_rounded'] = df['start_time'].dt.round('h').dt.strftime('%H:%M:%S')

# 2) Unique (lat, lon) extrahieren
stations = (
    df[['start_lat','start_lon']]
      .drop_duplicates()
      .reset_index(drop=True)
)

# 3) In Batches aufteilen (z.B. 50 Stationen pro Request)
chunk_size = 50
batches = [
    stations.iloc[i:i+chunk_size].to_dict('records')
    for i in range(0, len(stations), chunk_size)
]

# 4) Session mit Cache (1h)
session = requests_cache.CachedSession('.cache', expire_after=3600)

# 5) Batch-Fetcher
def fetch_batch(batch):
    # Koordinaten als Komma-Strings
    lat_str = ",".join(str(p['start_lat']) for p in batch)
    lon_str = ",".join(str(p['start_lon']) for p in batch)

    params = {
        "latitude":   lat_str,
        "longitude":  lon_str,
        "start_date": df['date_only'].min(),
        "end_date":   df['date_only'].max(),
        "hourly":     "temperature_2m,rain,weathercode",
        "timezone":   "auto",   # → liefert lokale Iso-Strings
    }

    # JSON-Request
    url  = "https://historical-forecast-api.open-meteo.com/v1/forecast"
    r    = session.get(url, params=params)
    r.raise_for_status()
    js   = r.json()

    # Falls das API-Resultat eine Liste ist, nimm diese,
    # sonst wickle das Einzel-Objekt in eine Liste
    forecasts = js if isinstance(js, list) else [js]

    # Die Zeit‐Achse ist für alle Punkte gleich:
    times = pd.to_datetime(forecasts[0]["hourly"]["time"])
    n     = len(times)

    records = []
    for idx, loc in enumerate(batch):
        data_hourly = forecasts[idx]["hourly"]

        temp_arr = np.array(data_hourly["temperature_2m"])
        rain_arr = np.array(data_hourly["rain"])
        code_arr = np.array(data_hourly["weathercode"])

        # Datum/Uhrzeit-Strings in lokaler Zeit
        date_strs = times.strftime('%Y-%m-%d')
        time_strs = times.strftime('%H:%M:%S')

        # Baue Mini-DataFrame für diese Station
        df_loc = pd.DataFrame({
            "start_lat":         [loc['start_lat']] * n,
            "start_lon":         [loc['start_lon']] * n,
            "date_only":         date_strs,
            "time_only_rounded": time_strs,
            "temperature_2m":    temp_arr,
            "rain":              rain_arr,
            "weather_code":      code_arr,
        })
        records.append(df_loc)

    return records

# 6) Parallel alle Batches abarbeiten
all_dfs = []
with ThreadPoolExecutor(max_workers=5) as exe:
    futures = [exe.submit(fetch_batch, b) for b in batches]
    for fut in as_completed(futures):
        all_dfs.extend(fut.result())

# 7) Zu einem DataFrame zusammenführen
weather_full = pd.concat(all_dfs, ignore_index=True)

# 8) Rückmerge ins Original-Frame
result = df.merge(
    weather_full,
    on=['start_lat','start_lon','date_only','time_only_rounded'],
    how='left'
)

# 9) Kontrolle
print(result.head())


In [None]:
result[result['rain'] > 0]

Unnamed: 0,duration,start_time,end_time,start_station,start_lat,start_lon,end_station,end_lat,end_lon,bike_id,bike_type,date_only,time_only_rounded,temperature_2m,rain,weather_code
2066,12,2025-01-03 08:57:00,2025-01-03 09:09:00,4652,34.027050,-118.485184,4546,34.031399,-118.453629,29600,electric,2025-01-03,09:00:00,10.0,0.1,51
2075,31,2025-01-03 09:09:00,2025-01-03 09:40:00,4652,34.027050,-118.485184,4652,34.027050,-118.485184,16086,standard,2025-01-03,09:00:00,10.0,0.1,51
23937,25,2025-01-25 21:57:00,2025-01-25 22:22:00,4573,34.030472,-118.420982,4575,34.000309,-118.402527,25378,electric,2025-01-25,22:00:00,12.0,1.7,61
23944,10,2025-01-25 22:10:00,2025-01-25 22:20:00,4549,34.022449,-118.438332,4564,34.035351,-118.434143,6419,standard,2025-01-25,22:00:00,12.1,1.7,61
23952,13,2025-01-25 22:29:00,2025-01-25 22:42:00,4553,33.999580,-118.441360,4207,34.000881,-118.468910,25429,electric,2025-01-25,22:00:00,12.7,0.1,51
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94945,4,2025-03-31 08:12:00,2025-03-31 08:16:00,4248,34.028351,-118.288673,4245,34.024040,-118.283409,12285,standard,2025-03-31,08:00:00,13.6,0.1,51
94946,18,2025-03-31 08:17:00,2025-03-31 08:35:00,3068,34.053200,-118.250954,4245,34.024040,-118.283409,13868,standard,2025-03-31,08:00:00,13.0,0.2,51
94947,2,2025-03-31 08:19:00,2025-03-31 08:21:00,4254,34.028679,-118.284111,4273,34.025860,-118.284103,12443,standard,2025-03-31,08:00:00,13.6,0.1,51
94949,6,2025-03-31 08:21:00,2025-03-31 08:27:00,4273,34.025860,-118.284103,4249,34.020302,-118.281181,20246,standard,2025-03-31,08:00:00,13.6,0.1,51
