In [1]:
import sys
import os

# Get the current working directory
current_dir = os.getcwd()

# Add the parent directory to sys.path
parent_dir = os.path.abspath(os.path.join(current_dir, '..'))
sys.path.insert(0, parent_dir)
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
from urllib.request import urlretrieve
from scipy.stats import pearsonr

from config.config import DATA_PATH


https://open-meteo.com/en/docs/historical-forecast-api#start_date=2024-09-04

In [2]:
import requests
import json
import pandas as pd

In [3]:
weather_geos = pd.read_csv(DATA_PATH + '/interim/weather_geos.csv')

In [4]:
weather_geos

Unnamed: 0,AIRPORT,AIRPORT_ID,DISPLAY_AIRPORT_NAME,LATITUDE,LONGITUDE
0,ATL,10397,Hartsfield-Jackson Atlanta International,33.636667,-84.427778
1,DEN,11292,Denver International,39.861667,-104.673056
2,ORD,13930,Chicago O'Hare International,41.976944,-87.908056
3,DFW,11298,Dallas/Fort Worth International,32.897222,-97.037778
4,CLT,11057,Charlotte Douglas International,35.213611,-80.949167
5,LAX,12892,Los Angeles International,33.9425,-118.408056
6,LAS,12889,Harry Reid International,36.08,-115.152222
7,SEA,14747,Seattle/Tacoma International,47.45,-122.311667
8,PHX,14107,Phoenix Sky Harbor International,33.434167,-112.011667
9,LGA,12953,LaGuardia,40.777222,-73.8725


In [5]:
pd.set_option('display.max_columns', None)

In [10]:
def get_hourly_weather_data(row):

    start_date = '2023-01-01'
    end_date = '2024-01-02'
    features = 'temperature_2m,precipitation,snowfall,snow_depth,weather_code,visibility,wind_speed_10m,wind_direction_10m,wind_gusts_10m'
    units = 'temperature_unit=fahrenheit&wind_speed_unit=mph&precipitation_unit=inch'

    latitude = row['LATITUDE']
    longitude = row['LONGITUDE']
    airport = row['AIRPORT']
    airport_name = row['DISPLAY_AIRPORT_NAME']
    airport_id = row['AIRPORT_ID']

    response = requests.get(f'https://historical-forecast-api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}8&start_date={start_date}&end_date={end_date}&hourly={features}&{units}')

    # Check if API call successfull and execute response
    if response.status_code == 200:

        data = response.text
        parse_json = json.loads(data)


        # Extract metadata by stopping at 'hourly'
        metadata = {}
        for key, value in parse_json.items():
            if key == 'hourly':
                break
            metadata[key] = value

        # Save metadata as JSON
        with open(DATA_PATH + f'/interim/hourly_weather_data/{airport}_metadata.json', 'w') as json_file:
            json.dump(metadata, json_file, indent=4)

        # Process hourly data
        hourly_data = parse_json['hourly']

        # Create a DataFrame from the extracted hourly data
        weather_df = pd.DataFrame({
            'time': hourly_data['time'],
            'temperature_2m': hourly_data['temperature_2m'],
            'precipitation': hourly_data['precipitation'],
            'snowfall': hourly_data['snowfall'],
            'snow_depth': hourly_data['snow_depth'],
            'weather_code': hourly_data['weather_code'],
            'visibility': hourly_data['visibility'],
            'wind_speed_10m': hourly_data['wind_speed_10m'],
            'wind_direction_10m': hourly_data['wind_direction_10m'],
            'wind_gusts_10m': hourly_data['wind_gusts_10m']
        })

        # Convert the 'time' column to datetime format
        weather_df['time'] = pd.to_datetime(weather_df['time'])

        # Add airport identifier
        weather_df['airport'] = airport
        weather_df['airport_id'] = airport_id

        # Save dataframe as csv file
        weather_df.to_csv(DATA_PATH + f'/interim/hourly_weather_data/{airport}_weather.csv', index=False)
        print(f'Hourly weather data was saved for {airport_name}')
    
    else:
        print(f'Failed to retrieve data for {airport_name}. Status code: {response.status_code}')

In [11]:
for index, row in weather_geos.iterrows():
    get_hourly_weather_data(row)

Hourly weather data was saved for Hartsfield-Jackson Atlanta International
Hourly weather data was saved for Denver International
Hourly weather data was saved for Chicago O'Hare International
Hourly weather data was saved for Dallas/Fort Worth International
Hourly weather data was saved for Charlotte Douglas International
Hourly weather data was saved for Los Angeles International
Hourly weather data was saved for Harry Reid International
Hourly weather data was saved for Seattle/Tacoma International
Hourly weather data was saved for Phoenix Sky Harbor International
Hourly weather data was saved for LaGuardia
Hourly weather data was saved for Orlando International
Hourly weather data was saved for George Bush Intercontinental/Houston
Hourly weather data was saved for Newark Liberty International
Hourly weather data was saved for Ronald Reagan Washington National
Hourly weather data was saved for Logan International
Hourly weather data was saved for John F. Kennedy International
Hourly

In [12]:
weather = pd.read_csv(DATA_PATH + '/interim/hourly_weather_data/ATL_weather.csv')

In [13]:
weather.head()

Unnamed: 0,time,temperature_2m,precipitation,snowfall,snow_depth,weather_code,visibility,wind_speed_10m,wind_direction_10m,wind_gusts_10m,airport,airport_id
0,2023-01-01 00:00:00,59.1,0.0,0.0,0.0,3,41338.582,2.7,171,6.0,ATL,10397
1,2023-01-01 01:00:00,57.0,0.0,0.0,0.0,0,40026.246,1.7,157,5.6,ATL,10397
2,2023-01-01 02:00:00,55.3,0.0,0.0,0.0,0,40026.246,1.0,207,5.1,ATL,10397
3,2023-01-01 03:00:00,53.5,0.0,0.0,0.0,1,40026.246,2.7,215,6.5,ATL,10397
4,2023-01-01 04:00:00,55.2,0.0,0.0,0.0,0,40026.246,3.0,234,8.1,ATL,10397
