<a href="https://colab.research.google.com/github/bieri2/ATMS597-Project6-GroupB/blob/master/project_6_ghcn_download.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Import necessary modules
import numpy as np
import requests
import pandas as pd
import calendar
import seaborn as sns
import datetime
import matplotlib.pyplot as plt
from   matplotlib.pyplot import cm
import matplotlib as mpl

In [0]:
# Define function to download data from NCEI
# Function adapted from Stefanie Moline: https://github.com/stefmolin/Hands-On-Data-Analysis-with-Pandas/

def make_request(endpoint, token, payload=None):
    """
    Make a request to a specific endpoint on the NCDC Web API
    passing headers and optional payload.
    
    Parameters:
        - endpoint : The endpoint of the API you want to 
                    make a GET request to (string).
        - token : User-specific access token from NCEI (string)
        - payload : A dictionary of data to pass along 
                   with the request.
    
    Returns:
        Response object.
    """
    return requests.get(
        f'https://www.ncdc.noaa.gov/cdo-web/api/v2/{endpoint}',
        headers={
            'token': token 
        },
        params=payload
    )

In [0]:
def get_ghcn(stationid, token, start, end):
    """
    A Python function that takes in a station ID, start year, and
    end year and downloads the necessary data from
    the Global Historical Climatology Network.
    
    Parameters:
        - stationid (string) : Station ID.
        - token (string) : User-specific access token from NCEI.
        - start (integer) : Start year.
        - end (integer) : End year.
    
    Returns:
        GHCN dataset as specified that can then be read as a dataframe.
    """ 
    
    # Define empty list
    results = []
    for i in range(start, end):
        print('fetching data for ' + str(i))
        response = make_request(
                  'data', token, 
                   {
                  'datasetid' : 'GHCND', # Global Historical Climatology Network - Daily (GHCND) dataset
                  'stationid' : f'{stationid}', # Input station ID
                  'startdate' : datetime.date(i, 1, 1), # Input start date as Datetime object
                  'enddate'   : datetime.date(i, 12, 31), # Input end date as Datetime object
                  'datatypeid': ['PRCP'], # Input requested variables
                  'units'     : 'metric', 
                  'limit'     : 1000
                   }
          )

        if response.ok:
            # we extend the list instead of appending to avoid getting a nested list
            try:
                results.extend(response.json()['results'])
            except KeyError:
                print('No results for this year: ' + str(i))
                continue

    return results

In [0]:
def make_dataframe(data):
    '''
    This function take the input data and returns a DataFrame for the purpose of 
    computing mean values and creating a Climate Stripes graph. 

    Parameters:
        - data (list) : A list of dictionaries with TMAX and TMIN data returned from the stripes_inputs function.

    Returns: 
        - df (DataFrame) : A pandas DataFrame with a DateTime index and TMAX and TMIN columns to be used to plot 
                 Climate Stripes.
    '''
    ## Create data-frame with all data:
    df = pd.DataFrame(data)
    ## Set date column as index
    df = df.set_index(pd.to_datetime(df['date'])).drop(columns = 'date')

    return df

In [0]:
## Execute code below if this is the main script
if __name__ == '__main__':

    ## Define user-specific token
    token        = 'fqMFYJqPpWnQIARJzXBQhWPbYEtJwpoH'

    ## Define start and end times
    starttime = 1980
    endtime   = 2020

    ## Define station ID
    stationid = 'GHCND:AR000877500' # Station in Uruguay
    ## Read in data from GHCN
    data      = get_ghcn(stationid, token, starttime, endtime)
    ## Make properly formatted DataFrame
    df        = make_dataframe(data)

fetching data for 1980
fetching data for 1981
fetching data for 1982
fetching data for 1983
fetching data for 1984
fetching data for 1985
fetching data for 1986
fetching data for 1987
fetching data for 1988
fetching data for 1989
fetching data for 1990
fetching data for 1991
fetching data for 1992
fetching data for 1993
fetching data for 1994
fetching data for 1995
fetching data for 1996
fetching data for 1997
fetching data for 1998
fetching data for 1999
fetching data for 2000
fetching data for 2001
fetching data for 2002
fetching data for 2003
fetching data for 2004
fetching data for 2005
fetching data for 2006
fetching data for 2007
fetching data for 2008
fetching data for 2009
fetching data for 2010
fetching data for 2011
fetching data for 2012
fetching data for 2013
fetching data for 2014
fetching data for 2015
fetching data for 2016
fetching data for 2017
fetching data for 2018
fetching data for 2019


In [0]:
df.to_csv('/content/drive/My Drive/Project6_Data/ghcn_data.csv')