# Weather Data Retrieval

## 1. Load libraries

In [1]:
# we will need the credentials we saved in the .env file
from dotenv import dotenv_values
from sqlalchemy import create_engine, types
from sqlalchemy.dialects.postgresql import JSON as postgres_json
import pandas as pd
import requests
import json
from datetime import datetime

## 2. Select Airports and Time Period
Since the blizzard primarily affected three states, we will filter out the relevant airports for our analysis. 

- NY - John F. Kennedy Airport (weather station code 74486)

- Washington D.C - Dulles International Airport 72403 (weather station code 74486)

- Philadelphia - Philadelphia International Airport 72408 (weather station code 74486)

In [2]:
airport_staids = {
    'JFK': 74486
    ,'IAD': 72403
    ,'PHL': 72408
}

In [3]:
period_start = "2014-10-01"
period_end = "2016-03-31"

## 3. Retrieving keys from `.env` file
Note that for this step, you need to have a .env file in the same folder where the repository is

In [4]:
# retrieving keys and using keys
config = dotenv_values()
api_key = config['X-RapidAPI-Key'] # align the key label with your .env file

## 4. Get data from API

In [5]:
#  let's catch each response in a dictionary. create an empty dictionary with the following keys:
weather_dict = {'extracted_at':[], 
                'airport_code':[], 
                'station_id':[], 
                'extracted_data':[]
               }

# API CALL daily (station) - for the syntax: see the rapidapi interface
url = "https://meteostat.p.rapidapi.com/stations/daily"

headers = {
        "X-RapidAPI-Key": api_key,
        "X-RapidAPI-Host": "meteostat.p.rapidapi.com"
}

# for-loop for the querystrings
for airport in airport_staids:
   
    querystring = {
        "station":airport_staids[airport]
        ,"start":period_start
        ,"end":period_end
        ,"model":"true"
    }
    
    # making one call with the current querystring
    response = requests.get(url, headers=headers, params=querystring)
                
    # appending data to the dictionary:
    weather_dict['extracted_at'].append(datetime.now())                # timestamp, 
    weather_dict['airport_code'].append(airport)                       # airport code    
    weather_dict['station_id'].append(airport_staids[airport])         # weather Station ID
    weather_dict['extracted_data'].append(json.loads(response.text))   # JSON string

### _Store in DataFrame_

In [6]:
weather_daily_df = pd.DataFrame(weather_dict)
weather_daily_df

Unnamed: 0,extracted_at,airport_code,station_id,extracted_data
0,2024-07-25 12:33:09.186581,JFK,74486,"{'meta': {'generated': '2024-07-25 10:33:08'},..."
1,2024-07-25 12:33:09.886724,IAD,72403,"{'meta': {'generated': '2024-07-25 10:33:09'},..."
2,2024-07-25 12:33:10.536594,PHL,72408,"{'meta': {'generated': '2024-07-25 10:33:10'},..."


Turning the data from the API call into a data frame: 3 dataframes where created (one for each airport).

Then, these data frames were concatenated together into a merged data frame containing data from all airports.

In [8]:
# using pd.json_normalize() twice to get to the weather_stats of one airport under 'data'

df_JFK = pd.json_normalize(pd.json_normalize(weather_daily_df['extracted_data']).loc[0, 'data'])
df_JFK['Airport'] = 'JFK'
df_JFK

Unnamed: 0,date,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun,Airport
0,2014-10-01,17.7,16.7,19.4,0.0,0.0,22.0,18.4,,1016.8,,JFK
1,2014-10-02,17.4,16.1,20.0,0.0,0.0,,13.3,,1019.9,,JFK
2,2014-10-03,16.8,12.8,20.0,0.0,0.0,89.0,11.5,,1017.2,,JFK
3,2014-10-04,18.8,11.1,22.2,17.8,0.0,175.0,26.3,,1004.1,,JFK
4,2014-10-05,12.7,8.3,16.1,0.0,0.0,273.0,20.9,,1010.7,,JFK
...,...,...,...,...,...,...,...,...,...,...,...,...
543,2016-03-27,6.8,4.4,9.4,0.0,0.0,96.0,15.8,,1027.8,,JFK
544,2016-03-28,9.2,6.1,17.8,12.2,0.0,11.0,26.3,,1007.3,,JFK
545,2016-03-29,10.4,4.4,12.8,0.0,0.0,302.0,39.6,,1013.3,,JFK
546,2016-03-30,6.4,2.2,9.4,0.0,0.0,197.0,19.4,,1026.2,,JFK


In [9]:
df_IAD = pd.json_normalize(pd.json_normalize(weather_daily_df['extracted_data']).loc[1, 'data'])
df_IAD['Airport'] = 'IAD'
df_IAD

Unnamed: 0,date,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun,Airport
0,2014-10-01,18.2,14.4,22.2,1.0,0.0,,6.5,,1016.1,,IAD
1,2014-10-02,17.7,12.2,23.3,0.0,0.0,,4.7,,1019.3,,IAD
2,2014-10-03,17.6,15.6,20.6,2.3,0.0,,14.4,,1014.5,,IAD
3,2014-10-04,17.6,2.8,19.4,2.3,0.0,305.0,16.6,,1005.2,,IAD
4,2014-10-05,8.6,0.6,16.1,0.0,0.0,,7.6,,1012.5,,IAD
...,...,...,...,...,...,...,...,...,...,...,...,...
543,2016-03-27,10.9,6.1,13.3,0.8,0.0,,7.6,,1024.1,,IAD
544,2016-03-28,12.2,8.9,20.0,9.7,0.0,263.0,25.2,,1009.9,,IAD
545,2016-03-29,11.3,2.8,15.0,0.0,0.0,313.0,21.6,,1019.3,,IAD
546,2016-03-30,8.3,-2.7,18.3,0.0,0.0,,13.3,,1025.1,,IAD


In [10]:
df_PHL = pd.json_normalize(pd.json_normalize(weather_daily_df['extracted_data']).loc[2, 'data'])
df_PHL['Airport'] = 'PHL'
df_PHL

Unnamed: 0,date,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun,Airport
0,2014-10-01,19.4,16.7,22.8,0.0,0.0,54.0,12.6,,1016.4,,PHL
1,2014-10-02,19.1,16.7,22.8,0.0,0.0,74.0,11.5,,1019.7,,PHL
2,2014-10-03,18.5,15.0,22.2,1.5,0.0,,10.4,,1016.3,,PHL
3,2014-10-04,18.7,10.0,21.1,4.3,0.0,268.0,19.4,,1004.2,,PHL
4,2014-10-05,11.8,6.1,16.1,0.0,0.0,254.0,13.3,,1012.1,,PHL
...,...,...,...,...,...,...,...,...,...,...,...,...
543,2016-03-27,10.1,6.7,15.6,0.5,0.0,85.0,15.5,,1026.1,,PHL
544,2016-03-28,11.3,8.3,21.1,11.2,0.0,350.0,25.9,,1007.4,,PHL
545,2016-03-29,11.8,6.7,15.0,0.0,0.0,309.0,26.3,,1016.4,,PHL
546,2016-03-30,9.1,2.8,15.0,0.0,0.0,171.0,15.1,,1026.1,,PHL


### _Combining data frames_

In [11]:
weather_data_merged = pd.concat([df_JFK, df_IAD,df_PHL], ignore_index=True)
weather_data_merged

  weather_data_merged = pd.concat([df_JFK, df_IAD,df_PHL], ignore_index=True)


Unnamed: 0,date,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun,Airport
0,2014-10-01,17.7,16.7,19.4,0.0,0.0,22.0,18.4,,1016.8,,JFK
1,2014-10-02,17.4,16.1,20.0,0.0,0.0,,13.3,,1019.9,,JFK
2,2014-10-03,16.8,12.8,20.0,0.0,0.0,89.0,11.5,,1017.2,,JFK
3,2014-10-04,18.8,11.1,22.2,17.8,0.0,175.0,26.3,,1004.1,,JFK
4,2014-10-05,12.7,8.3,16.1,0.0,0.0,273.0,20.9,,1010.7,,JFK
...,...,...,...,...,...,...,...,...,...,...,...,...
1639,2016-03-27,10.1,6.7,15.6,0.5,0.0,85.0,15.5,,1026.1,,PHL
1640,2016-03-28,11.3,8.3,21.1,11.2,0.0,350.0,25.9,,1007.4,,PHL
1641,2016-03-29,11.8,6.7,15.0,0.0,0.0,309.0,26.3,,1016.4,,PHL
1642,2016-03-30,9.1,2.8,15.0,0.0,0.0,171.0,15.1,,1026.1,,PHL


### _Export as CSV_

In [13]:
weather_data_merged.to_csv('./data/weather_data_merged.csv', index=False)

## 5. Importing to Postgres Database

Useful link to understand variables: https://dev.meteostat.net/api/stations/daily.html#response

### _Retrieve keys from the .env file_

In [14]:
config = dotenv_values()
 
pg_user = config['POSTGRES_USER'] # align the key labels with your .env file
pg_host = config['POSTGRES_HOST']
pg_port = config['POSTGRES_PORT']
pg_db = config['POSTGRES_DB']
pg_schema = config['POSTGRES_SCHEMA']
pg_pass = config['POSTGRES_PASS']

### _Load data_

In [15]:
# updating the url
url = f'postgresql://{pg_user}:{pg_pass}@{pg_host}:{pg_port}/{pg_db}'

# creating the engine
engine = create_engine(url, echo=False)

In [16]:
# defining data types for the SQL table
dtype_dict = {
    'extracted_at':types.DateTime,
    'airport_code': types.String,
    'station_id': types.Integer,
    'extracted_data':postgres_json
             }

In [17]:
# writing dataframe to DB
weather_daily_df.to_sql(name = 'weather_daily_raw', 
                       con = engine, 
                       schema = pg_schema, # pandas is allowing to specify, in which schema the table shall be created
                       if_exists='replace', 
                       dtype=dtype_dict,
                       index=False
                      )

3