In [None]:
import requests 
from my_api_info import get_noaa_token
from time import sleep
import numpy as np
import pandas as pd

In [None]:
def get_month_snowfall(YYYY_MM: str) -> requests.models.Response:
    """
    Given an input string representing a month (in the format 
    YYYY-MM, e.g. '2024-01' for January of 2024), returns Response
    from NOAA API with desired monthly snowfall for each MI 
    station.
    """
    # Desired date range
    start_date = YYYY_MM + '-01'

    # API specifications
    base_url = 'https://www.ncei.noaa.gov/cdo-web/api/v2'
    extension = '/data'
    url = base_url + extension

    headers = {'token':get_noaa_token()}
    parameters = {'datasetid':'GSOM',
                'startdate':start_date,
                'enddate':start_date,
                'units':'metric',
                'datatypeid':'SNOW',
                'station':'GHCND:US',
                'locationid':'FIPS:' + '26',
                'limit':1000,
                'includemetadata':'false'}
    r = requests.get(url,
                    headers=headers,
                    params=parameters)

    return r

In [None]:
# Create the DataFrame which will store all the rainfall data
master_df = pd.DataFrame(
    [],
    index=pd.Index([], name='station'),
    columns=pd.Index([], name='date')
)
master_df.index.name = 'station'

In [None]:
# This is the main scraping loop. Just modify the starting 
# segment of the YYYY and the range.

failed_months = 0

for j in range(80, 100):
    YYYY = '19' + str(j)
    for i in range(1,13):
        # NOAA API requests limited to 5 per second; I'm 
        # just being cautious here
        sleep(1)

        if i < 10: 
            MM = '0' + str(i)
        else:
            MM = str(i)
        YYYY_MM = YYYY + '-' + MM
        r = get_month_snowfall(YYYY_MM)
        status_code = r.status_code
        print(f'Year {j} Month {i}: {status_code}')

        if status_code == 200: # Make sure we've succeeded in our request
            # Create DataFrame
            df = pd.DataFrame.from_dict(r.json()['results'])
            df = df.drop(['datatype', 'attributes'], axis=1)
            df['date'] = df['date'].apply(lambda s : s.split('T')[0][:-3])
            # Check for duplicates in the station IDs
            dups_list = [x for x in df.station.duplicated() if x == True]
            if dups_list != []: 
                print(f'When i = {i}, found duplicate stations!!')
            # Rearrange DataFrame
            df = df.pivot(index='station', columns='date', values='value') 

            # Add DataFrame to master_df, looping through newly-found df
            for station in df.index: 
                master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]  

        else: # If there was an error in accessing the API 
            # First wait a bit and then try again; then just add NaN 
            print(f'Retrying {YYYY_MM}...')
            sleep(5)
            r = get_month_snowfall(YYYY_MM)
            status_code = r.status_code
            print(f'Year {j} Month {i}: {status_code}')

            if status_code == 200: # Make sure we've succeeded in our request
                # Create DataFrame
                df = pd.DataFrame.from_dict(r.json()['results'])
                df = df.drop(['datatype', 'attributes'], axis=1)
                df['date'] = df['date'].apply(lambda s : s.split('T')[0][:-3])
                # Check for duplicates in the station IDs
                dups_list = [x for x in df.station.duplicated() if x == True]
                if dups_list != []: 
                    print(f'When i = {i}, found duplicate stations!!')
                # Rearrange DataFrame
                df = df.pivot(index='station', columns='date', values='value') 

                # Add DataFrame to master_df, looping through newly-found df
                for station in df.index: 
                    master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]  

            else: # Second time it fails, add NaNs
                for station in master_df.index:
                    master_df.loc[station, YYYY_MM] = np.nan
                print(f'Inserting NaN column to master_df for {YYYY_MM}')
                failed_months += 1

    

In [None]:
print(f'There were {failed_months} failed months.')

In [None]:
# Display final DataFrame
master_df.head(10)

In [None]:
# uncomment to save as CSV file
#master_df.to_csv('../data/1980-1999_snowfall.csv')