# Download weather data
Details and Documentation available at https://open-meteo.com/en/docs/historical-weather-api#latitude=40.71&longitude=-74.01&start_date=2022-08-31&end_date=2024-01-31&hourly=temperature_2m,precipitation,rain,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,windspeed_10m,winddirection_10m&timezone=America%2FNew_York

In [1]:
import openmeteo_requests

import requests_cache

from retry_requests import retry
import os
import pandas as pd

In [2]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

In [3]:
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    "latitude": 40.71,
    "longitude": -74.01,
    "start_date": "2022-01-01",
    "end_date": "2022-07-01",
    "hourly": ["temperature_2m", "relative_humidity_2m", "rain", "snowfall", "wind_speed_10m"],
    "timezone": "America/New_York"
}
responses = openmeteo.weather_api(url, params=params)
response = responses[0]

In [4]:
# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_rain = hourly.Variables(2).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(3).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
    start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
    end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
    freq=pd.Timedelta(seconds=hourly.Interval()),
    inclusive="left"
), "temperature_2m": hourly_temperature_2m, "relative_humidity_2m": hourly_relative_humidity_2m, "rain": hourly_rain,
    "snowfall": hourly_snowfall, "wind_speed_10m": hourly_wind_speed_10m}

In [5]:
hourly_dataframe = pd.DataFrame(data = hourly_data)
hourly_dataframe.head()

Unnamed: 0,date,temperature_2m,relative_humidity_2m,rain,snowfall,wind_speed_10m
0,2022-01-01 04:00:00+00:00,7.6785,100.0,0.0,0.0,4.198285
1,2022-01-01 05:00:00+00:00,7.8285,99.660194,0.0,0.0,5.091168
2,2022-01-01 06:00:00+00:00,7.9285,99.660461,0.0,0.0,4.198285
3,2022-01-01 07:00:00+00:00,7.6285,99.659645,0.0,0.0,6.36905
4,2022-01-01 08:00:00+00:00,7.5785,99.659515,0.1,0.0,4.73506


In [6]:
output_relative_dir = '../data/landing/weather'
if not os.path.exists(output_relative_dir):
    os.makedirs(output_relative_dir)

In [7]:
hourly_dataframe.to_csv(f"{output_relative_dir}/hourly_weather.csv", index=False)