In [2]:
# https://www.ncei.noaa.gov

import requests
import json
import pandas as pd
import os
import time

# Ensure the output directory exists
SAVE_DIR = "../../data/raw data"
SUB_FOLDER = "2019"
os.makedirs(SAVE_DIR, exist_ok=True)

# NOAA API Configuration
API_TOKEN = "IBhbHCgHsIqASycwSyTBmSuPdsEDOOZI"  # Replace with your NOAA API Key
BASE_URL = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"

# **Chicago O'Hare Intl Airport Station ID**
ORD_STATION_ID = "GHCND:USW00094846"

# **List of complete weather features to collect**
WEATHER_FEATURES = [
    "TMAX", "TMIN", "TAVG",  # Temperature
    "PRCP", "SNOW", "SNWD",  # Precipitation & Snow
    "AWND", "WSFG", "WDFG",  # Wind
    "PSUN", "RHAVG", "ACMC", # Humidity & Cloud Cover
    "SLP"                    # Atmospheric Pressure
]

# Step 1: Fetch Weather Data from O'Hare (ORD)
all_weather_data = []

print(f"🔍 Fetching weather data for Chicago O'Hare Intl Airport ({ORD_STATION_ID})...")

PARAMS = {
    "datasetid": "GHCND",
    "stationid": ORD_STATION_ID,
    "startdate": "2018-11-01",
    "enddate": "2019-02-21",
    "datatypeid": WEATHER_FEATURES,  # Fetch all features
    "limit": 1000,
    "units": "metric"
}

offset = 1
while True:
    PARAMS["offset"] = offset
    response = requests.get(BASE_URL, headers={"token": API_TOKEN}, params=PARAMS)

    if response.status_code != 200:
        print(f"❌ Failed request for {ORD_STATION_ID}: {response.status_code}, {response.text}")
        break

    response_json = response.json()

    if "results" not in response_json:
        print(f"⚠️ No 'results' key found for station {ORD_STATION_ID}. Full response: {response_json}")
        break

    page_data = response_json["results"]
    if not page_data:
        break  # Stop if no more data

    all_weather_data.extend(page_data)
    print(f"📡 Fetched {len(page_data)} records from {ORD_STATION_ID}...")

    offset += 1000  # Move to next batch
    time.sleep(0.1)  # Avoid API rate limits

# Step 2: Convert Data to CSV
weather_data = []
for record in all_weather_data:
    weather_data.append([record["date"][:10], record["station"], record["datatype"], record["value"]])

df = pd.DataFrame(weather_data, columns=["Date", "Station", "Type", "Value"])
df_pivot = df.pivot(index=["Date", "Station"], columns="Type", values="Value")  # Reshape format
df_pivot.reset_index(inplace=True)  # Reset index

# Rename columns for clarity
df_pivot.rename(columns={
    "TMAX": "Max_Temp_C", "TMIN": "Min_Temp_C", "TAVG": "Avg_Temp_C",
    "PRCP": "Precipitation_mm", "SNOW": "Snowfall_mm", "SNWD": "Snow_Depth_mm",
    "AWND": "Avg_Wind_Speed_mps", "WSFG": "Max_Wind_Gust_mps", "WDFG": "Max_Wind_Gust_Direction",
    "PSUN": "Sunshine_Percentage", "RHAVG": "Avg_Relative_Humidity", "ACMC": "Avg_Cloud_Cover",
    "SLP": "Sea_Level_Pressure_hPa"
}, inplace=True)

# Save to CSV file
csv_file = os.path.join(SAVE_DIR, SUB_FOLDER, "chicago_weather.csv")
df_pivot.to_csv(csv_file, index=False)
print(f"✅ Weather data saved in CSV format: {csv_file}")

🔍 Fetching weather data for Chicago O'Hare Intl Airport (GHCND:USW00094846)...
📡 Fetched 791 records from GHCND:USW00094846...
⚠️ No 'results' key found for station GHCND:USW00094846. Full response: {}
✅ Weather data saved in CSV format: ../../data/raw data/2019/chicago_weather.csv
