##### Client
----
fetch weather data from API, capture ingestion + API metadata, handle errors, support concurrent requests for multiple locations


In [8]:
import asyncio
from datetime import datetime, timezone
from typing import List, Optional

import httpx

In [5]:
async def fetch_weather_single(
        latitude: float,
        longitude: float,
        hourly_variables: list[str] | None = None,
        daily_variables: list[str] | None = None,
        forecast_days: int = 7,
        timezone_str: str = "UTC",
        base_url: str = "https://api.open-meteo.com/v1/forecast",
        timeout: int = 30,
    ) -> dict:

    """ Fetch weather data from a single location
    
    Returns dict with:
    -  data: API response
    -  ingestion_metadata: client-side metadata
    -  api_metadata: API-side metadata from response
    """

    params = {
        "latitude": latitude,
        "longitude": longitude,
        "forecast_days": forecast_days,
        "timezone": timezone_str,
    }

    if hourly_variables:
        params["hourly"] = ",".join(hourly_variables)
    if daily_variables:
        params["daily"] = ",".join(daily_variables)

    # record timestamp before request
    ingestion_timestamp = datetime.now(timezone.utc)

    # make async request
    async with httpx.AsyncClient(timeout=timeout) as client:
        response = await client.get(base_url, params=params)
    
    raw_data = response.json()

    # Error checks
    if raw_data.get("error"):
        raise ValueError(f"API Error: {raw_data['reason']}")
    
    # Build return dict with metadata
    return {
        "data": raw_data,
        "ingestion_metadata": {
            "timestamp_utc": ingestion_timestamp.isoformat(),
            "request_url": str(response.url),
            "elapsed_ms": response.elapsed.total_seconds() * 1000,
            "status_code": response.status_code,
        },

        "api_metadata": {
            "latitude": raw_data["latitude"],
            "longitude": raw_data["longitude"],
            "elevation": raw_data["elevation"],
            "generationtime_ms": raw_data["generationtime_ms"],
            "timezone": raw_data["timezone"],
            "utc_offset_seconds": raw_data["utc_offset_seconds"],
        },
    }

# define async function to fetch multiple locations
async def fetch_weather_multiple(
        locations: list[dict],
        hourly_variables: list[str] | None = None,
        daily_variables: list[str] | None = None,
        max_concurrent_requests: int = 5,
    ) -> list[dict] | Exception:

    """ Fetch weather data for multiple locations concurrently 

        Returns list of reslts or raises Exception (one per location).
    
    """

    semaphore = asyncio.Semaphore(max_concurrent_requests)

    async def fetch_with_limit(location: dict) -> dict | Exception:
        async with semaphore:
            try:
                result = await fetch_weather_single(
                    latitude=location["latitude"],
                    longitude=location["longitude"],
                    hourly_variables=hourly_variables,
                    daily_variables=daily_variables,
                )
                # Add mapping info so data can be traced back to location
                result["location"] = location 
                return result
            except Exception as e:
                # If one fails, return error and location with issue
                return {"error": str(e), "location": location, "status": "failed"}

    tasks = [fetch_with_limit(location) for location in locations]
    results = await asyncio.gather(*tasks, return_exceptions=True)
    return results

In [68]:
# single location 
result = await fetch_weather_single(51.5074, -0.1278, hourly_variables=["temperature_2m"])
print(result)

{'data': {'latitude': 51.5, 'longitude': -0.120000124, 'generationtime_ms': 0.051975250244140625, 'utc_offset_seconds': 0, 'timezone': 'GMT', 'timezone_abbreviation': 'GMT', 'elevation': 16.0, 'hourly_units': {'time': 'iso8601', 'temperature_2m': '°C'}, 'hourly': {'time': ['2025-12-23T00:00', '2025-12-23T01:00', '2025-12-23T02:00', '2025-12-23T03:00', '2025-12-23T04:00', '2025-12-23T05:00', '2025-12-23T06:00', '2025-12-23T07:00', '2025-12-23T08:00', '2025-12-23T09:00', '2025-12-23T10:00', '2025-12-23T11:00', '2025-12-23T12:00', '2025-12-23T13:00', '2025-12-23T14:00', '2025-12-23T15:00', '2025-12-23T16:00', '2025-12-23T17:00', '2025-12-23T18:00', '2025-12-23T19:00', '2025-12-23T20:00', '2025-12-23T21:00', '2025-12-23T22:00', '2025-12-23T23:00', '2025-12-24T00:00', '2025-12-24T01:00', '2025-12-24T02:00', '2025-12-24T03:00', '2025-12-24T04:00', '2025-12-24T05:00', '2025-12-24T06:00', '2025-12-24T07:00', '2025-12-24T08:00', '2025-12-24T09:00', '2025-12-24T10:00', '2025-12-24T11:00', '2025-

In [65]:
# multiple locations
locations = [
    {"name": "London", "latitude": 51.5074, "longitude": -0.1278},
    {"name": "New York", "latitude": 40.7128, "longitude": -74.0060},
    {"name": "Tokyo", "latitude": 35.6895, "longitude": 139.6917},
]
results = await fetch_weather_multiple(
    locations,
    hourly_variables=["temperature_2m", "precipitation"],
)
print(results)

[{'data': {'latitude': 51.5, 'longitude': -0.120000124, 'generationtime_ms': 0.15413761138916016, 'utc_offset_seconds': 0, 'timezone': 'GMT', 'timezone_abbreviation': 'GMT', 'elevation': 16.0, 'hourly_units': {'time': 'iso8601', 'temperature_2m': '°C', 'precipitation': 'mm'}, 'hourly': {'time': ['2025-12-23T00:00', '2025-12-23T01:00', '2025-12-23T02:00', '2025-12-23T03:00', '2025-12-23T04:00', '2025-12-23T05:00', '2025-12-23T06:00', '2025-12-23T07:00', '2025-12-23T08:00', '2025-12-23T09:00', '2025-12-23T10:00', '2025-12-23T11:00', '2025-12-23T12:00', '2025-12-23T13:00', '2025-12-23T14:00', '2025-12-23T15:00', '2025-12-23T16:00', '2025-12-23T17:00', '2025-12-23T18:00', '2025-12-23T19:00', '2025-12-23T20:00', '2025-12-23T21:00', '2025-12-23T22:00', '2025-12-23T23:00', '2025-12-24T00:00', '2025-12-24T01:00', '2025-12-24T02:00', '2025-12-24T03:00', '2025-12-24T04:00', '2025-12-24T05:00', '2025-12-24T06:00', '2025-12-24T07:00', '2025-12-24T08:00', '2025-12-24T09:00', '2025-12-24T10:00', '20

In [50]:
for result in results:
        print(result["ingestion_metadata"])
        print(result["api_metadata"])

{'timestamp_utc': '2025-12-23T20:28:49.054956+00:00', 'request_url': 'https://api.open-meteo.com/v1/forecast?latitude=51.5074&longitude=-0.1278&forecast_days=7&timezone=UTC&hourly=temperature_2m%2Cprecipitation', 'elapsed_ms': 502.07900000000006, 'status_code': 200}
{'latitude': 51.5, 'longitude': -0.120000124, 'elevation': 16.0, 'generationtime_ms': 0.1392364501953125, 'timezone': 'GMT', 'utc_offset_seconds': 0}
{'timestamp_utc': '2025-12-23T20:28:49.250654+00:00', 'request_url': 'https://api.open-meteo.com/v1/forecast?latitude=40.7128&longitude=-74.006&forecast_days=7&timezone=UTC&hourly=temperature_2m%2Cprecipitation', 'elapsed_ms': 315.605, 'status_code': 200}
{'latitude': 40.710335, 'longitude': -73.99309, 'elevation': 32.0, 'generationtime_ms': 0.0756978988647461, 'timezone': 'GMT', 'utc_offset_seconds': 0}
{'timestamp_utc': '2025-12-23T20:28:49.436213+00:00', 'request_url': 'https://api.open-meteo.com/v1/forecast?latitude=35.6895&longitude=139.6917&forecast_days=7&timezone=UTC&h

In [None]:
import polars as pl

df = pl.from_dicts(results)

df_full = (
    df
    .unnest("data")
    .unnest("hourly")
    .explode(["temperature_2m", "precipitation", "time"])
    
    .with_columns([
        pl.col("api_metadata").struct.rename_fields([f"api_{f}" for f in df.schema["api_metadata"].fields]),
        pl.col("ingestion_metadata").struct.rename_fields([f"ingest_{f}" for f in df.schema["ingestion_metadata"].fields]),
        pl.col("location").struct.rename_fields([f"loc_{f}" for f in df.schema["location"].fields]),
    ])
    .unnest("api_metadata", "ingestion_metadata", "location")
)

df_full.tail(3)

latitude,longitude,generationtime_ms,utc_offset_seconds,timezone,timezone_abbreviation,elevation,hourly_units,time,temperature_2m,precipitation,"ingest_Field('timestamp_utc', String)","ingest_Field('request_url', String)","ingest_Field('elapsed_ms', Float64)","ingest_Field('status_code', Int64)","api_Field('latitude', Float64)","api_Field('longitude', Float64)","api_Field('elevation', Float64)","api_Field('generationtime_ms', Float64)","api_Field('timezone', String)","api_Field('utc_offset_seconds', Int64)","loc_Field('name', String)","loc_Field('latitude', Float64)","loc_Field('longitude', Float64)"
f64,f64,f64,i64,str,str,f64,struct[3],str,f64,f64,str,str,f64,i64,f64,f64,f64,f64,str,i64,str,f64,f64
35.7,139.6875,0.110984,0,"""GMT""","""GMT""",40.0,"{""iso8601"",""°C"",""mm""}","""2025-12-29T21:00""",0.3,0.0,"""2025-12-23T20:28:49.436213+00:…","""https://api.open-meteo.com/v1/…",125.809,200,35.7,139.6875,40.0,0.110984,"""GMT""",0,"""Tokyo""",35.6895,139.6917
35.7,139.6875,0.110984,0,"""GMT""","""GMT""",40.0,"{""iso8601"",""°C"",""mm""}","""2025-12-29T22:00""",0.6,0.0,"""2025-12-23T20:28:49.436213+00:…","""https://api.open-meteo.com/v1/…",125.809,200,35.7,139.6875,40.0,0.110984,"""GMT""",0,"""Tokyo""",35.6895,139.6917
35.7,139.6875,0.110984,0,"""GMT""","""GMT""",40.0,"{""iso8601"",""°C"",""mm""}","""2025-12-29T23:00""",1.2,0.0,"""2025-12-23T20:28:49.436213+00:…","""https://api.open-meteo.com/v1/…",125.809,200,35.7,139.6875,40.0,0.110984,"""GMT""",0,"""Tokyo""",35.6895,139.6917


In [56]:
df_full.schema

Schema([('latitude', Float64),
        ('longitude', Float64),
        ('generationtime_ms', Float64),
        ('utc_offset_seconds', Int64),
        ('timezone', String),
        ('timezone_abbreviation', String),
        ('elevation', Float64),
        ('hourly_units',
         Struct({'time': String, 'temperature_2m': String, 'precipitation': String})),
        ('time', String),
        ('temperature_2m', Float64),
        ('precipitation', Float64),
        ("ingest_Field('timestamp_utc', String)", String),
        ("ingest_Field('request_url', String)", String),
        ("ingest_Field('elapsed_ms', Float64)", Float64),
        ("ingest_Field('status_code', Int64)", Int64),
        ("api_Field('latitude', Float64)", Float64),
        ("api_Field('longitude', Float64)", Float64),
        ("api_Field('elevation', Float64)", Float64),
        ("api_Field('generationtime_ms', Float64)", Float64),
        ("api_Field('timezone', String)", String),
        ("api_Field('utc_offset_seconds

In [61]:
tokyo_df = df_full.filter(pl.col("loc_Field('name', String)") == "Tokyo")
tokyo_df.head(1)

latitude,longitude,generationtime_ms,utc_offset_seconds,timezone,timezone_abbreviation,elevation,hourly_units,time,temperature_2m,precipitation,"ingest_Field('timestamp_utc', String)","ingest_Field('request_url', String)","ingest_Field('elapsed_ms', Float64)","ingest_Field('status_code', Int64)","api_Field('latitude', Float64)","api_Field('longitude', Float64)","api_Field('elevation', Float64)","api_Field('generationtime_ms', Float64)","api_Field('timezone', String)","api_Field('utc_offset_seconds', Int64)","loc_Field('name', String)","loc_Field('latitude', Float64)","loc_Field('longitude', Float64)"
f64,f64,f64,i64,str,str,f64,struct[3],str,f64,f64,str,str,f64,i64,f64,f64,f64,f64,str,i64,str,f64,f64
35.7,139.6875,0.110984,0,"""GMT""","""GMT""",40.0,"{""iso8601"",""°C"",""mm""}","""2025-12-23T00:00""",3.2,0.0,"""2025-12-23T20:28:49.436213+00:…","""https://api.open-meteo.com/v1/…",125.809,200,35.7,139.6875,40.0,0.110984,"""GMT""",0,"""Tokyo""",35.6895,139.6917


##### Transform
----

Convert nested API response into flat rows to be stored later in pipeline.


In [29]:
import polars as pl

In [77]:
def transform_hourly(result: dict) -> pl.DataFrame:
    """ Transform hourly API response to flat DataFrame """

    data = result["data"]
    hourly = data["hourly"]
    ingestion_metadata = result["ingestion_metadata"]
    api_metadata = result["api_metadata"]
    location = result.get("location", {})

    # Build Dataframe from hourly data - start with time
    data = {"time": hourly["time"]}

    # Add each variable (except time)
    for key, values in hourly.items():
        if key != "time":
            data[key] = values
    
    df = pl.DataFrame(data)

    # Parse time column to datetime
    df = df.with_columns(pl.col("time").str.to_datetime().alias("time"))

    # Add location cols 
    df = df.with_columns(
        pl.lit(location.get("name", "unknown")).alias("location_name"),
        pl.lit(location.get("latitude")).alias("latitude"),
        pl.lit(location.get("longitude")).alias("longitude"),
    )

    # Add API metadata cols
    df = df.with_columns(
        pl.lit(api_metadata["latitude"]).alias("api_latitude"),
        pl.lit(api_metadata["longitude"]).alias("api_longitude"),
        pl.lit(api_metadata["elevation"]).alias("api_elevation"),
        pl.lit(api_metadata["timezone"]).alias("api_timezone"),
        pl.lit(api_metadata["generationtime_ms"]).alias("api_generationtime_ms"),
    )

    # Add ingestion metadata cols
    df = df.with_columns(
        pl.lit(ingestion_metadata["timestamp_utc"]).alias("ingestion_timestamp_utc"),
        pl.lit(ingestion_metadata["elapsed_ms"]).alias("ingest_elapsed_ms"),
        pl.lit(ingestion_metadata["request_url"]).alias("request_url"),
    )

    return df


def transform_daily(result: dict) -> pl.DataFrame:
    """ Transform daily API response to flat DataFrame """
   
    data = result["data"]
    daily = data["daily"]
    ingestion = result["ingestion_metadata"]
    api_meta = result["api_metadata"]
    location = result.get("location", {})
    
    # Build Dataframe from daily data - start with time
    data = {"date": daily["time"]}
    
    # Add each variable (except time)
    for key, values in daily.items():
        if key != "time":
            data[key] = values
    
    df = pl.DataFrame(data)
    
    # Parse date column to datetime
    df = df.with_columns(pl.col("date").str.to_date().alias("date"))
    
    # Add location cols
    df = df.with_columns(
        pl.lit(location.get("name", "unknown")).alias("location_name"),
        pl.lit(location.get("latitude")).alias("requested_latitude"),
        pl.lit(location.get("longitude")).alias("requested_longitude"),
    )
    
    # Add API metadata cols
    df = df.with_columns(
        pl.lit(api_meta["latitude"]).alias("api_latitude"),
        pl.lit(api_meta["longitude"]).alias("api_longitude"),
        pl.lit(api_meta["elevation"]).alias("api_elevation"),
        pl.lit(api_meta["timezone"]).alias("api_timezone"),
        pl.lit(api_meta["generationtime_ms"]).alias("api_generationtime_ms"),
    )
    
    # Add ingestion metadata cols
    df = df.with_columns(
        pl.lit(ingestion["timestamp_utc"]).alias("ingestion_timestamp"),
        pl.lit(ingestion["elapsed_ms"]).alias("request_elapsed_ms"),
        pl.lit(ingestion["request_url"]).alias("request_url"),
    )
    
    return df


def get_partition_path(location_name: str, interval: str, ingestion_timestamp: str) -> str:
    """ Generate partition path for storage.
    
    Format: {interval}/location={name}/date={YYYY-MM-DD}/
    """
    dt = datetime.fromisoformat(ingestion_timestamp.replace("Z", "+00:00"))
    date_str = dt.strftime("%Y-%m-%d")
    return f"{interval}/location={location_name}/date={date_str}"
    

In [88]:
# single location 
result_single_hourly = await fetch_weather_single(51.5074, -0.1278, hourly_variables=["temperature_2m"])
result_single_daily = await fetch_weather_single(51.5074, -0.1278, daily_variables=["temperature_2m_max", "temperature_2m_min"])

# multiple locations - locations already defined above
results_multiple_hourly = await fetch_weather_multiple(
    locations,
    hourly_variables=["temperature_2m", "precipitation"],
)

results_multiple_daily = await fetch_weather_multiple(
    locations,
    daily_variables=["temperature_2m_max", "temperature_2m_min", "precipitation_sum"],
)

In [92]:
# Single locations dfs
df_single_hourly = transform_hourly(result_single_hourly)
df_single_daily = transform_daily(result_single_daily)

In [94]:
df_single_daily

date,temperature_2m_max,temperature_2m_min,location_name,requested_latitude,requested_longitude,api_latitude,api_longitude,api_elevation,api_timezone,api_generationtime_ms,ingestion_timestamp,request_elapsed_ms,request_url
date,f64,f64,str,null,null,f64,f64,f64,str,f64,str,f64,str
2025-12-23,9.0,6.1,"""unknown""",,,51.5,-0.12,16.0,"""GMT""",0.085354,"""2025-12-23T21:17:03.825919+00:…",101.569,"""https://api.open-meteo.com/v1/…"
2025-12-24,6.2,4.2,"""unknown""",,,51.5,-0.12,16.0,"""GMT""",0.085354,"""2025-12-23T21:17:03.825919+00:…",101.569,"""https://api.open-meteo.com/v1/…"
2025-12-25,4.9,1.5,"""unknown""",,,51.5,-0.12,16.0,"""GMT""",0.085354,"""2025-12-23T21:17:03.825919+00:…",101.569,"""https://api.open-meteo.com/v1/…"
2025-12-26,5.4,1.6,"""unknown""",,,51.5,-0.12,16.0,"""GMT""",0.085354,"""2025-12-23T21:17:03.825919+00:…",101.569,"""https://api.open-meteo.com/v1/…"
2025-12-27,8.3,2.0,"""unknown""",,,51.5,-0.12,16.0,"""GMT""",0.085354,"""2025-12-23T21:17:03.825919+00:…",101.569,"""https://api.open-meteo.com/v1/…"
2025-12-28,7.8,1.7,"""unknown""",,,51.5,-0.12,16.0,"""GMT""",0.085354,"""2025-12-23T21:17:03.825919+00:…",101.569,"""https://api.open-meteo.com/v1/…"
2025-12-29,3.0,0.1,"""unknown""",,,51.5,-0.12,16.0,"""GMT""",0.085354,"""2025-12-23T21:17:03.825919+00:…",101.569,"""https://api.open-meteo.com/v1/…"
