## Fetch hourly weather for trips
This notebook loads cleaned trip data, rounds start times to the nearest hour, queries Open-Meteo for temperature, rain, and weather code, and merges the results back to the trips.

In [7]:
import pandas as pd
import numpy as np
import requests_cache
from concurrent.futures import ThreadPoolExecutor, as_completed

# 1) Trips einlesen und Merge-Keys bauen
df = pd.read_csv('cleaned_trip_data.csv', parse_dates=['start_time'])
df['date_only']         = df['start_time'].dt.strftime('%Y-%m-%d')
df['time_only_rounded'] = df['start_time'].dt.round('h').dt.strftime('%H:%M:%S')

# 2) Unique (lat, lon) extrahieren
stations = (
    df[['start_lat','start_lon']]
      .drop_duplicates()
      .reset_index(drop=True)
)

# 3) In Batches aufteilen (z.B. 50 Stationen pro Request)
chunk_size = 50
batches = [
    stations.iloc[i:i+chunk_size].to_dict('records')
    for i in range(0, len(stations), chunk_size)
]

# 4) Session mit Cache (1h)
session = requests_cache.CachedSession('.cache', expire_after=3600)

# 5) Batch-Fetcher
def fetch_batch(batch):
    # Koordinaten als Komma-Strings
    lat_str = ",".join(str(p['start_lat']) for p in batch)
    lon_str = ",".join(str(p['start_lon']) for p in batch)

    params = {
        "latitude":   lat_str,
        "longitude":  lon_str,
        "start_date": df['date_only'].min(),
        "end_date":   df['date_only'].max(),
        "hourly":     "temperature_2m,rain,weathercode",
        "timezone":   "auto",   # → liefert lokale Iso-Strings
    }

    # JSON-Request
    url  = "https://historical-forecast-api.open-meteo.com/v1/forecast"
    r    = session.get(url, params=params)
    r.raise_for_status()
    js   = r.json()

    # Falls das API-Resultat eine Liste ist, nimm diese,
    # sonst wickle das Einzel-Objekt in eine Liste
    forecasts = js if isinstance(js, list) else [js]

    # Die Zeit‐Achse ist für alle Punkte gleich:
    times = pd.to_datetime(forecasts[0]["hourly"]["time"])
    n     = len(times)

    records = []
    for idx, loc in enumerate(batch):
        data_hourly = forecasts[idx]["hourly"]

        temp_arr = np.array(data_hourly["temperature_2m"])
        rain_arr = np.array(data_hourly["rain"])
        code_arr = np.array(data_hourly["weathercode"])

        # Datum/Uhrzeit-Strings in lokaler Zeit
        date_strs = times.strftime('%Y-%m-%d')
        time_strs = times.strftime('%H:%M:%S')

        # Baue Mini-DataFrame für diese Station
        df_loc = pd.DataFrame({
            "start_lat":         [loc['start_lat']] * n,
            "start_lon":         [loc['start_lon']] * n,
            "date_only":         date_strs,
            "time_only_rounded": time_strs,
            "temperature_2m":    temp_arr,
            "rain":              rain_arr,
            "weather_code":      code_arr,
        })
        records.append(df_loc)

    return records

# 6) Parallel alle Batches abarbeiten
all_dfs = []
with ThreadPoolExecutor(max_workers=5) as exe:
    futures = [exe.submit(fetch_batch, b) for b in batches]
    for fut in as_completed(futures):
        all_dfs.extend(fut.result())

# 7) Zu einem DataFrame zusammenführen
weather_full = pd.concat(all_dfs, ignore_index=True)

# 8) Rückmerge ins Original-Frame
result = df.merge(
    weather_full,
    on=['start_lat','start_lon','date_only','time_only_rounded'],
    how='left'
)

# 9) Kontrolle
print(result.head())


   duration          start_time             end_time  start_station  \
0         5 2025-01-01 00:12:00  2025-01-01 00:17:00           3030   
1         7 2025-01-01 00:12:00  2025-01-01 00:19:00           4558   
2        11 2025-01-01 00:13:00  2025-01-01 00:24:00           4212   
3        11 2025-01-01 00:13:00  2025-01-01 00:24:00           4212   
4        13 2025-01-01 00:27:00  2025-01-01 00:40:00           4472   

   start_lat   start_lon  end_station    end_lat     end_lon  bike_id  \
0  34.051941 -118.243530         4491  34.047440 -118.247940    13668   
1  34.025688 -118.395302         4569  34.026550 -118.408463    30021   
2  33.988129 -118.471741         4206  33.998341 -118.461014    14923   
3  33.988129 -118.471741         4206  33.998341 -118.461014    26704   
4  34.092602 -118.280930         4509  34.101639 -118.309174    14790   

  bike_type   date_only time_only_rounded  temperature_2m  rain  weather_code  
0  standard  2025-01-01          00:00:00             

In [8]:
result[result['rain'] > 0]

Unnamed: 0,duration,start_time,end_time,start_station,start_lat,start_lon,end_station,end_lat,end_lon,bike_id,bike_type,date_only,time_only_rounded,temperature_2m,rain,weather_code
2066,12,2025-01-03 08:57:00,2025-01-03 09:09:00,4652,34.027050,-118.485184,4546,34.031399,-118.453629,29600,electric,2025-01-03,09:00:00,10.0,0.1,51
2075,31,2025-01-03 09:09:00,2025-01-03 09:40:00,4652,34.027050,-118.485184,4652,34.027050,-118.485184,16086,standard,2025-01-03,09:00:00,10.0,0.1,51
23937,25,2025-01-25 21:57:00,2025-01-25 22:22:00,4573,34.030472,-118.420982,4575,34.000309,-118.402527,25378,electric,2025-01-25,22:00:00,12.0,1.7,61
23944,10,2025-01-25 22:10:00,2025-01-25 22:20:00,4549,34.022449,-118.438332,4564,34.035351,-118.434143,6419,standard,2025-01-25,22:00:00,12.1,1.7,61
23952,13,2025-01-25 22:29:00,2025-01-25 22:42:00,4553,33.999580,-118.441360,4207,34.000881,-118.468910,25429,electric,2025-01-25,22:00:00,12.7,0.1,51
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94945,4,2025-03-31 08:12:00,2025-03-31 08:16:00,4248,34.028351,-118.288673,4245,34.024040,-118.283409,12285,standard,2025-03-31,08:00:00,13.6,0.1,51
94946,18,2025-03-31 08:17:00,2025-03-31 08:35:00,3068,34.053200,-118.250954,4245,34.024040,-118.283409,13868,standard,2025-03-31,08:00:00,13.0,0.2,51
94947,2,2025-03-31 08:19:00,2025-03-31 08:21:00,4254,34.028679,-118.284111,4273,34.025860,-118.284103,12443,standard,2025-03-31,08:00:00,13.6,0.1,51
94949,6,2025-03-31 08:21:00,2025-03-31 08:27:00,4273,34.025860,-118.284103,4249,34.020302,-118.281181,20246,standard,2025-03-31,08:00:00,13.6,0.1,51


In [9]:

# 1) Calendar week (ISO standard, 1–53)
result['calendar_week'] = result['start_time'].dt.isocalendar().week

# 2) Weekday name and number (Monday=1 … Sunday=7)
result['weekday_name'] = result['start_time'].dt.day_name()        # e.g. "Monday"
result['weekday_num']  = result['start_time'].dt.weekday + 1        # Monday=1 … Sunday=7

# 3) Season (meteorological: Winter, Spring, Summer, Fall)
seasons = {0: 'Winter', 1: 'Spring', 2: 'Summer', 3: 'Fall'}
# (month % 12)//3 → 0 for Dec–Feb, 1 for Mar–May, 2 for Jun–Aug, 3 for Sep–Nov
result['season'] = (result['start_time'].dt.month % 12 // 3).map(seasons)

# 4) Time of day category
def time_of_day(hour):
    if 5 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 17:
        return 'Afternoon'
    elif 17 <= hour < 21:
        return 'Evening'
    else:
        return 'Night'

result['time_of_day'] = result['start_time'].dt.hour.map(time_of_day)

# Inspect the new columns



In [10]:
result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95916 entries, 0 to 95915
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   duration           95916 non-null  int64         
 1   start_time         95916 non-null  datetime64[ns]
 2   end_time           95916 non-null  object        
 3   start_station      95916 non-null  int64         
 4   start_lat          95916 non-null  float64       
 5   start_lon          95916 non-null  float64       
 6   end_station        95916 non-null  int64         
 7   end_lat            95916 non-null  float64       
 8   end_lon            95916 non-null  float64       
 9   bike_id            95916 non-null  int64         
 10  bike_type          95916 non-null  object        
 11  date_only          95916 non-null  object        
 12  time_only_rounded  95916 non-null  object        
 13  temperature_2m     95916 non-null  float64       
 14  rain  

In [11]:
result[10:20]

Unnamed: 0,duration,start_time,end_time,start_station,start_lat,start_lon,end_station,end_lat,end_lon,bike_id,...,date_only,time_only_rounded,temperature_2m,rain,weather_code,calendar_week,weekday_name,weekday_num,season,time_of_day
10,43,2025-01-01 00:49:00,2025-01-01 01:32:00,4547,33.9897,-118.448883,4215,34.014309,-118.491341,12014,...,2025-01-01,01:00:00,12.4,0.0,3,1,Wednesday,3,Winter,Night
11,12,2025-01-01 00:52:00,2025-01-01 01:04:00,4211,33.984928,-118.469963,4553,33.99958,-118.44136,26417,...,2025-01-01,01:00:00,12.2,0.0,3,1,Wednesday,3,Winter,Night
12,20,2025-01-01 00:53:00,2025-01-01 01:13:00,4439,34.090969,-118.286247,4534,34.10186,-118.32811,21986,...,2025-01-01,01:00:00,7.8,0.0,3,1,Wednesday,3,Winter,Night
13,17,2025-01-01 00:53:00,2025-01-01 01:10:00,4672,34.045021,-118.253189,4447,34.0616,-118.28199,23796,...,2025-01-01,01:00:00,9.0,0.0,45,1,Wednesday,3,Winter,Night
14,14,2025-01-01 00:56:00,2025-01-01 01:10:00,4563,34.017448,-118.409569,4552,33.990971,-118.421303,26554,...,2025-01-01,01:00:00,11.9,0.0,3,1,Wednesday,3,Winter,Night
15,25,2025-01-01 00:58:00,2025-01-01 01:23:00,3042,34.049301,-118.238808,3026,34.063179,-118.24588,6330,...,2025-01-01,01:00:00,8.7,0.0,45,1,Wednesday,3,Winter,Night
16,4,2025-01-01 00:59:00,2025-01-01 01:03:00,3047,34.039982,-118.266403,3056,34.03746,-118.265381,13979,...,2025-01-01,01:00:00,10.5,0.0,45,1,Wednesday,3,Winter,Night
17,22,2025-01-01 01:02:00,2025-01-01 01:24:00,4548,34.026829,-118.393517,4554,34.00351,-118.43438,13837,...,2025-01-01,01:00:00,11.7,0.0,3,1,Wednesday,3,Winter,Night
18,16,2025-01-01 01:03:00,2025-01-01 01:19:00,3056,34.03746,-118.265381,3040,34.05357,-118.266357,30425,...,2025-01-01,01:00:00,10.5,0.0,45,1,Wednesday,3,Winter,Night
19,22,2025-01-01 01:05:00,2025-01-01 01:27:00,4547,33.9897,-118.448883,4210,33.984341,-118.47155,14088,...,2025-01-01,01:00:00,12.4,0.0,3,1,Wednesday,3,Winter,Night
