# Historical Air Quality Dataset (2020 - 2025)
Collect Historical Dataset on Air Population from [OpenWeather API](https://openweathermap.org/api/air-pollution)

In [1]:
# import libraries 
import os
from pathlib import Path
from datetime import datetime
import sys
import requests
import pandas as pd
import json
from dotenv import load_dotenv


sys.path.insert(0, str(Path().resolve().parent / "src"))

from paths import  PARENT_DIR, RAW_DATA_DIR

# load environment
load_dotenv()


API_KEY = os.getenv("OPEN_WEATHER_API_KEY") # open weather API key.


# Lagos latitude and longitude
LATITUDE = 6.5244
LONGITUDE = 3.3792

/Users/macbook/Desktop/horlarDEV/Air_MLOPs/data


In [5]:
def fetch_hourly_weather_data_from_API(start_date: str, end_date:str) -> str:
    
    """
    Fetch raw weather data from the OPENWEATHER API for the specified date.

    Parameters:
        start_date (int): start date of the data.
        end_date (int): end date of the data.

    Returns:
        pd.DataFrame: A DataFrame containing the fetched data.
    """
    
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")
    
    # API URL and parameters
    URL = "http://api.openweathermap.org/data/2.5/air_pollution/history"
    params = {
        "lat": LATITUDE,
        "lon": LONGITUDE,
        "start": int(start.timestamp()),
        "end": int(end.timestamp()),
        "appid": API_KEY
    }

    
    try:
        # Make GET request
        response = requests.get(URL, params=params)
        response.raise_for_status()  # Raise HTTPError for bad responses

        # Parse JSON response
        data = response.json()
        
        
        if not Path(RAW_DATA_DIR).exists():
            os.mkdir(RAW_DATA_DIR)
        
        file_path = RAW_DATA_DIR / f"weather_{start.strftime('%Y%m%d')}_to_{end.strftime('%Y%m%d')}.json"
        
        # Save JSON response to a file
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)
            

        print(f"Data successfully fetched and saved to {file_path}")    
            
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")

In [13]:
if __name__ == "__main__":
    # start date 
    START_DATE = "2020-1-1"
    END_DATE = "2025-02-01"
    
    print(f"Collecting weather data from API ...") 

    fetch_hourly_weather_data_from_API(start_date=START_DATE, end_date=END_DATE)

Collecting weather data from API ...
Data successfully fetched and saved to /Users/macbook/Desktop/horlarDEV/Air_MLOPs/data/bronze/weather_20200101_to_20250201.json
