In [1]:
###Weather API Import###
# pip install openmeteo-requests
# pip install requests-cache retry-requests numpy pandas

In [5]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import os

# Create a data directory within the repo
data_dir = "weather_data"
os.makedirs(data_dir, exist_ok=True)

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    "latitude": [40.707298764534166, 41.884490770621035, 34.05114289187591],
    "longitude": [-74.0121054695436, -87.62446337710055, -118.25782581922437],
    "start_date": "2000-02-02",
    "end_date": "2025-02-02",
    "daily": ["temperature_2m_mean", "temperature_2m_max", "temperature_2m_min", "wind_speed_10m_max", "wind_gusts_10m_max", "rain_sum", "snowfall_sum", "precipitation_sum", "daylight_duration", "precipitation_hours", "sunshine_duration"],
    "hourly": ["temperature_2m", "precipitation"],
    "timezone": "America/New_York"
}
responses = openmeteo.weather_api(url, params=params)

# Create empty DataFrames to store all locations data
all_hourly_dataframes = []
all_daily_dataframes = []

# Define city names
city_names = ["New_York", "Chicago", "Los_Angeles"]

# Process all locations with a for-loop
for i, response in enumerate(responses):
    location_name = city_names[i]
    
    print(f"\nLocation: {location_name}")
    print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
    print(f"Elevation {response.Elevation()} m asl")
    print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
    print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

    # Process hourly data
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()

    hourly_data = {"date": pd.date_range(
        start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
        end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
        freq = pd.Timedelta(seconds = hourly.Interval()),
        inclusive = "left"
    )}

    hourly_data["location"] = location_name
    hourly_data["temperature_2m"] = hourly_temperature_2m
    hourly_data["precipitation"] = hourly_precipitation

    hourly_dataframe = pd.DataFrame(data = hourly_data)
    all_hourly_dataframes.append(hourly_dataframe)
    
    # Export individual city hourly data to CSV
    hourly_file_path = os.path.join(data_dir, f"{location_name}_hourly_data.csv")
    hourly_dataframe.to_csv(hourly_file_path, index=False)
    print(f"Exported hourly data for {location_name} to {hourly_file_path}")
    
    # Print sample of hourly data
    print("\nHourly data sample:")
    print(hourly_dataframe.head())

    # Process daily data
    daily = response.Daily()
    daily_temperature_2m_mean = daily.Variables(0).ValuesAsNumpy()
    daily_temperature_2m_max = daily.Variables(1).ValuesAsNumpy()
    daily_temperature_2m_min = daily.Variables(2).ValuesAsNumpy()
    daily_wind_speed_10m_max = daily.Variables(3).ValuesAsNumpy()
    daily_wind_gusts_10m_max = daily.Variables(4).ValuesAsNumpy()
    daily_rain_sum = daily.Variables(5).ValuesAsNumpy()
    daily_snowfall_sum = daily.Variables(6).ValuesAsNumpy()
    daily_precipitation_sum = daily.Variables(7).ValuesAsNumpy()
    daily_daylight_duration = daily.Variables(8).ValuesAsNumpy()
    daily_precipitation_hours = daily.Variables(9).ValuesAsNumpy()
    daily_sunshine_duration = daily.Variables(10).ValuesAsNumpy()

    daily_data = {"date": pd.date_range(
        start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
        end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
        freq = pd.Timedelta(seconds = daily.Interval()),
        inclusive = "left"
    )}

    daily_data["location"] = location_name
    daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
    daily_data["temperature_2m_max"] = daily_temperature_2m_max
    daily_data["temperature_2m_min"] = daily_temperature_2m_min
    daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
    daily_data["wind_gusts_10m_max"] = daily_wind_gusts_10m_max
    daily_data["rain_sum"] = daily_rain_sum
    daily_data["snowfall_sum"] = daily_snowfall_sum
    daily_data["precipitation_sum"] = daily_precipitation_sum
    daily_data["daylight_duration"] = daily_daylight_duration
    daily_data["precipitation_hours"] = daily_precipitation_hours
    daily_data["sunshine_duration"] = daily_sunshine_duration

    daily_dataframe = pd.DataFrame(data = daily_data)
    all_daily_dataframes.append(daily_dataframe)
    
    # Export individual city daily data to CSV
    daily_file_path = os.path.join(data_dir, f"{location_name}_daily_data.csv")
    daily_dataframe.to_csv(daily_file_path, index=False)
    print(f"Exported daily data for {location_name} to {daily_file_path}")
    
    # Print sample of daily data
    print("\nDaily data sample:")
    print(daily_dataframe.head())

# Combine all dataframes
combined_hourly_data = pd.concat(all_hourly_dataframes)
combined_daily_data = pd.concat(all_daily_dataframes)

# Export combined datasets
combined_hourly_path = os.path.join(data_dir, "combined_hourly_weather_data.csv")
combined_daily_path = os.path.join(data_dir, "combined_daily_weather_data.csv")

combined_hourly_data.to_csv(combined_hourly_path, index=False)
combined_daily_data.to_csv(combined_daily_path, index=False)

print(f"\nExported combined hourly data to {combined_hourly_path}")
print(f"Exported combined daily data to {combined_daily_path}")
print("\nCombined hourly data shape:", combined_hourly_data.shape)
print("Combined daily data shape:", combined_daily_data.shape)


Location: New_York
Coordinates 40.738136291503906°N -74.04254150390625°E
Elevation 102.0 m asl
Timezone b'America/New_York' b'GMT-4'
Timezone difference to GMT+0 -14400 s
Exported hourly data for New_York to weather_data\New_York_hourly_data.csv

Hourly data sample:
                       date  location  temperature_2m  precipitation
0 2000-02-02 04:00:00+00:00  New_York          -5.298            0.0
1 2000-02-02 05:00:00+00:00  New_York          -5.548            0.0
2 2000-02-02 06:00:00+00:00  New_York          -5.648            0.0
3 2000-02-02 07:00:00+00:00  New_York          -5.998            0.0
4 2000-02-02 08:00:00+00:00  New_York          -6.398            0.0
Exported daily data for New_York to weather_data\New_York_daily_data.csv

Daily data sample:
                       date  location  temperature_2m_mean  \
0 2000-02-02 04:00:00+00:00  New_York            -6.195917   
1 2000-02-03 04:00:00+00:00  New_York            -6.373001   
2 2000-02-04 04:00:00+00:00  New_York  