In [None]:
import pandas as pd
import numpy as np
import requests
import json
from datetime import datetime

In [None]:
# Set token
noaa_token = 'JCUyoQlOpfnoagRJvYxbeBcFnCpjldyP'

In [None]:
def get_weather_data(start_year, last_year, station_id, data_type):
    # Weather data retrieved using the method described here:
    # https://towardsdatascience.com/getting-weather-data-in-3-easy-steps-8dc10cc5c859
    # Weather station URL: https://www.ncdc.noaa.gov/cdo-web/datasets/GHCND/stations/GHCND:USC00115097/detail

    print("Extracting data from station:", station_id)
    print("https://www.ncdc.noaa.gov/cdo-web/datasets/GHCND/stations/"+station_id+"/detail")
    
    # initialize combined weather dataframe
    df_combined_weather = pd.DataFrame()

    for i in range(len(data_type)):
        
        # initalize and set to null lists and dataframe for operations
        dates_observed = []
        observed_values = []
        df_observations = pd.DataFrame()
        
        print('\nExtracting:', data_type[i])
        
        # retrieve temperature observations
        for year in range(start_year, last_year+1):
            year = str(year)

            # make the api call
            r = requests.get('https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&datatypeid='+data_type[i]+'&limit=1000&stationid='+station_id+'&startdate='+year+'-01-01&enddate='+year+'-12-31', headers={'token':noaa_token})

            # load the api response as a json
            d = json.loads(r.text)    

            # get all items in the response which are observations
            observations = [item for item in d['results'] if item['datatype']==data_type[i]]

            # get the date field from all observations
            dates_observed += [item['date'] for item in observations]

            # get the actual observed values from the list of observations
            observed_values += [item['value'] for item in observations]

            print('Extracting data for ' + year + '.')

        df_observations['date'] = [datetime.strptime(d, "%Y-%m-%dT%H:%M:%S") for d in dates_observed]
        df_observations[data_type[i]] = observed_values
        
        if (i == 0):
            df_combined_weather = df_observations
        else:
            df_combined_weather = df_combined_weather.merge(df_observations, on='date', how='inner')
    
    
    print("\nData extraction completed.")
    
    # set date as the index
    df_combined_weather.index = df_combined_weather.date
    df_combined_weather = df_combined_weather.drop('date', axis=1)
    
    return df_combined_weather

In [None]:
weather_df = get_weather_data(start_year=2007,
                 last_year=2021,
                 station_id='GHCND:USC00115097',
                 data_type=['TOBS', 'TMIN', 'TMAX', 'PRCP', 'SNOW', 'SNWD'])

Extracting data from station: GHCND:USC00115097
https://www.ncdc.noaa.gov/cdo-web/datasets/GHCND/stations/GHCND:USC00115097/detail

Extracting: TOBS
Extracting data for 2007.
Extracting data for 2008.
Extracting data for 2009.
Extracting data for 2010.
Extracting data for 2011.
Extracting data for 2012.
Extracting data for 2013.
Extracting data for 2014.
Extracting data for 2015.
Extracting data for 2016.
Extracting data for 2017.
Extracting data for 2018.
Extracting data for 2019.
Extracting data for 2020.
Extracting data for 2021.

Extracting: TMIN
Extracting data for 2007.
Extracting data for 2008.
Extracting data for 2009.
Extracting data for 2010.
Extracting data for 2011.
Extracting data for 2012.
Extracting data for 2013.
Extracting data for 2014.
Extracting data for 2015.
Extracting data for 2016.
Extracting data for 2017.
Extracting data for 2018.
Extracting data for 2019.
Extracting data for 2020.
Extracting data for 2021.

Extracting: TMAX
Extracting data for 2007.
Extractin

In [None]:
weather_df

Unnamed: 0_level_0,TOBS,TMIN,TMAX,PRCP,SNOW,SNWD
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2007-04-02,150,56,161,13,0,0
2007-04-03,206,83,211,3,0,0
2007-04-04,-17,-22,206,28,0,0
2007-04-05,-33,-33,0,0,0,0
2007-04-06,-44,-50,50,0,0,0
...,...,...,...,...,...,...
2021-04-15,44,28,117,0,0,0
2021-04-16,39,0,133,0,0,0
2021-04-17,-6,-11,156,0,0,0
2021-04-18,6,-6,139,0,0,0


In [None]:
weather_df.to_csv('chicago_weather_data.csv')