In [3]:
# Here we call the API to create our dataset for the EDA notebook.
# EDA notebook for Madrid air pollution and weather (last 5 days)

# Imports
import requests
import pandas as pd
from datetime import datetime

# Coordinates for Madrid center
latitude = 40.4168
longitude = -3.7038

# Time range
start_date = "2025-05-10"
end_date = "2025-05-15"

# Fetch Air Quality Data
aq_url = "https://air-quality-api.open-meteo.com/v1/air-quality"
aq_params = {
    "latitude": latitude,
    "longitude": longitude,
    "hourly": "pm10,pm2_5,carbon_monoxide,nitrogen_dioxide,ozone",
    "start": f"{start_date}T00:00",
    "end": f"{end_date}T23:00",
    "timezone": "auto"
}
aq_response = requests.get(aq_url, params=aq_params)
aq_data = aq_response.json()

#print(aq_response.status_code)
#print(aq_response.url)         # See the actual request
#print(aq_response.json())      # See raw JSON (just a bit)
#print(aq_data.keys())
#for key in aq_data["hourly"]:
#    print(key, len(aq_data["hourly"][key]))



# Convert to DataFrame
df_aq = pd.DataFrame(aq_data['hourly'])
df_aq['time'] = pd.to_datetime(df_aq['time'])

# Fetch Weather Data
weather_url = "https://archive-api.open-meteo.com/v1/archive"
weather_params = {
    "latitude": latitude,
    "longitude": longitude,
    "start_date": start_date,
    "end_date": end_date,
    "hourly": "temperature_2m,wind_speed_10m",
    "timezone": "auto"
}
weather_response = requests.get(weather_url, params=weather_params)
weather_data = weather_response.json()

#print(weather_response.status_code)
#print(weather_response.url)
#print(weather_response.json())
#print(weather_data.keys())
#for key in weather_data["hourly"]:
#    print(key, len(weather_data["hourly"][key]))



# Convert to DataFrame
df_weather = pd.DataFrame(weather_data['hourly'])
df_weather['time'] = pd.to_datetime(df_weather['time'])

# Merge on time
df = pd.merge(df_aq, df_weather, on='time')

# Optional: set datetime index
#df.set_index('time', inplace=True)

# Quick look
df.head()


Unnamed: 0,time,pm10,pm2_5,carbon_monoxide,nitrogen_dioxide,ozone,temperature_2m,wind_speed_10m
0,2025-05-15 00:00:00,14.2,12.1,165.0,31.7,53.0,,
1,2025-05-15 01:00:00,12.0,10.5,151.0,24.0,51.0,,
2,2025-05-15 02:00:00,10.8,9.0,140.0,18.7,49.0,,
3,2025-05-15 03:00:00,9.5,8.2,130.0,14.5,47.0,,
4,2025-05-15 04:00:00,8.7,7.4,128.0,11.1,47.0,,


In [4]:
df.to_parquet("../data/merged_recent_pollution_weather.parquet", index=False)
