<a href="https://colab.research.google.com/github/bieri2/ATMS597-Project2-GroupA/blob/master/groupa_project2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import requests
import pandas as pd
import calendar
import datetime
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import matplotlib as mpl

In [0]:
# Define function to download data from NCEI
# Function adapted from Stefanie Moline: https://github.com/stefmolin/Hands-On-Data-Analysis-with-Pandas/

def make_request(endpoint, token, payload=None):
    """
    Make a request to a specific endpoint on the NCDC Web API
    passing headers and optional payload.
    
    Parameters:
        - endpoint: The endpoint of the API you want to 
                    make a GET request to (string).
        - payload: A dictionary of data to pass along 
                   with the request.
    
    Returns:
        Response object.
    """
    return requests.get(
        f'https://www.ncdc.noaa.gov/cdo-web/api/v2/{endpoint}',
        headers={
            'token': token 
        },
        params=payload
    )

In [0]:
def add_a_month(date):
    """
    A Python function that takes in a datetime object and returns
    the next month.
    
    Parameters:
        - date : Input Datetime object.
    
    Returns:
        Output Datetime object, with next month added.
    """    
    
    month = date.month - 1 + 1
    year = date.year + month // 12
    month = month % 12 + 1
    day = min(date.day, calendar.monthrange(year,month)[1])
    return datetime.date(year, month, day)

In [0]:
def stripes_inputs(siteid, token, start, end, tunit):
    """
    A Python function that takes in a station ID, start Datetime,
    end Datetime, time unit and downloads the necessary data from
    the Global Historical Climatology Network.
    
    Parameters:
        - siteid : Site ID (string).
        - start : Start Datetime (Datetime object).
        - end : End Datetime (Datetime object).
        - tunit : time unit (string), may be either 'y' for year, 'm' for 
         month, 'w' for week or 'd' for day.
    
    Returns:
        GHCN dataset as specified that can then be read as a dataframe.
    """ 
    
    results = []
    while start < end :
          response = make_request(
              'data', token, 
              {
                  'datasetid' : 'GHCND', # Global Historical Climatology Network - Daily (GHCND) dataset
                  'stationid' : f'{siteid}', 
                  'startdate' : start,
                  'enddate'   : start,
                  'datatypeid': ['TMAX','TMIN'],
                  'units'     : 'metric'
              }
          )

          if response.ok:
               # we extend the list instead of appending to avoid getting a nested list
               results.extend(response.json()['results'])

          # update the current date to avoid an infinite loop at a given time interval
          if (tunit=='y'):
               start += datetime.timedelta(days=365)
          elif (tunit=='m'):
               start += add_a_month(start)
          elif (tunit=='w'):
               start += datetime.timedelta(weeks=tdelta)
          else:
               start += datetime.timedelta(days=1)
    return results

In [0]:
## Method to plot Climate (Warming) Stripes and/or Time Series (if asked) :
def plot_stripes(Tmax, Tmin, t, station_name, plot_tseries):
    """
    A Python function that takes in max/min data for a given station and 
    plots Climate Stripes and/or Time-Series, if user asks for one.
    
    Parameters:
        - Tmax : Array/List of Maximum Temperature Values.
        - Tmin : Array/List of Minimum Temperature Values.
        - t : Array/List of years in the record (might have multiple
          same values if there are monthly/weekly sub-data).
        - station_name : String name of the Station.
        - plot_tseries : Character variable with user choice for either
        plotting or not plotting Time-series line. Either 'y' or 'n'.
    
    Returns:
        Output Image as either a Climate Stripes with/without a time series.
    """  

    ## call figure and define plot titles
    fig = plt.figure(figsize=(8,6))
    plt.title(station_name, fontsize=14)
    ax = plt.gca()

    ## Calculate temporal intervals and temperature anomalies :
    num_t = len(np.unique(t))
    start = t[0]
    AvT = (Tmax+Tmin)/2
    Tav = []
    for i in np.arange(0,num_t):
        Tav.append(np.average(AvT[np.where(t==start+i)]))
    
    ## calculate anomalies
    MeanT = np.nanmean(Tav)
    Tanoms = Tav-MeanT

    ## Store the anomalies as a 2D matrix for the stripes: 
    heatmap = np.zeros((len(Tav),len(Tav)))
    for i in range(0,num_t):
        heatmap[:,i] = Tanoms[i]  

    ## calculate fraction of maximum T for the time-series :
    X = np.arange(num_t)
    points = Tav/np.nanmax(AvT)*len(Tav)

    ## Plot stripes and time-series if necessary :   
    plt.imshow(heatmap[:,:], origin = 'lower', cmap = 'seismic', vmin = np.nanmin(Tanoms), vmax = np.nanmax(Tanoms))
    if (plot_tseries == 'y'):
        plt.plot(X, points, marker = 'o', color='yellow') 
    plt.axis('off') # Suppress the axes
 
    # plt.savefig(station_name+'_ClimateStripes_'+str(start)+'_'+str(end)+'.png', bbox_inches='tight',dpi=400)
    plt.show()

In [0]:
if __name__ == '__main__':
    ## Read Example Data
    #station_name='Austin'
    #t = np.loadtxt('ClimateStripesData.txt', skiprows=2, usecols=[0])
    #Tmax = np.loadtxt('ClimateStripesData.txt', skiprows=2, usecols=[2])
    #Tmin = np.loadtxt('ClimateStripesData.txt', skiprows=2, usecols=[3])

    starttime = datetime.date(2010, 1, 1)
    endtime   = datetime.date(2010, 1, 5)
    data = stripes_inputs('GHCND:USC00281335', 'fqMFYJqPpWnQIARJzXBQhWPbYEtJwpoH', 
                          starttime, endtime, 'd')

    ## Create data-frame:
    df = pd.DataFrame(data[0::2])
    df = df.rename(columns={'value':'TMAX'})
    df['TMIN'] = [data[1::2][x]['value'] for x in range(0,len(data[1::2]))]
    df = df.set_index(pd.to_datetime(df['date'])).drop(columns='date')
    df_avg = df.resample('MS').mean()

    #plot_stripes(Tmax = df['TMAX'].values, Tmin = df['TMIN'].values, 
    #             t = df.index.year.values, station_name = 'New York', plot_tseries = 'y')