<a href="https://colab.research.google.com/github/cathyxinchangli/ATMS-597-SP-2020/blob/master/ATMS-597-SP-2020-Project-2/Project2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import numpy as np
import pandas as pd
import datetime

import matplotlib as mlp
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
from matplotlib.colors import ListedColormap
import matplotlib.dates as mdates

%matplotlib inline

In [5]:
def make_request(endpoint, payload=None):
    """
    Make a request to a specific endpoint on the weather API
    passing headers and optional payload.
    
    Parameters:
        - endpoint: The endpoint of the API you want to 
                    make a GET request to.
        - payload: A dictionary of data to pass along 
                   with the request.
    
    Returns:
        Response object.
    ---
    S. Moline, 'Hands on Data Analysis with Pandas'
    https://github.com/stefmolin/Hands-On-Data-Analysis-with-Pandas/blob/master/ch_04/0-weather_data_collection.ipynb
    """
    return requests.get(
        f'https://www.ncdc.noaa.gov/cdo-web/api/v2/{endpoint}',
        headers={
            'token': 'HfZphTTGbXckpmEuqevAXQZFRCTEJgkJ' # obtained by X. Li, 2/4/2020
        },
        params=payload
    )

def fetch_data(locationid, startdate, enddate, endpoint="data",
               datasetid="GHCND", stationid=None, units="metric", limit=1000):
    response = make_request(
        endpoint,
        {
            "datasetid": datasetid,
            "datatypeid": ["TMAX", "TMIN"],
            "locationid": locationid,
            "stationid": stationid,
            "startdate": startdate,
            "enddate": enddate,
            "unit": units,
            "limit": limit,
        })
    response = pd.DataFrame(response.json()["results"])
    return response

def loop_request(locationid, startdate, enddate, endpoint="data",
                 datasetid="GHCND", stationid=None, units="metric", limit=1000):
    if startdate.year == enddate.year:
        return fetch_data(locationid, startdate, enddate, endpoint, datasetid, 
                          stationid, units, limit)
    else:
        enddate_0 = datetime.date(startdate.year, 12, 31)
        df_tmp_0 = fetch_data(locationid, startdate, enddate_0, endpoint, datasetid, 
                            stationid, units, limit)
        for year in range(startdate.year + 1, enddate.year):
            startdate_tmp = datetime.date(year, 1, 1)
            enddate_tmp = datetime.date(year + 1, 1, 1)
            df_tmp = fetch_data(locationid, startdate_tmp, enddate_tmp, endpoint, datasetid, 
                                stationid, units, limit)
            df_tmp_0 = pd.concat([df_tmp_0, df_tmp])
        startdate_1 = datetime.date(enddate.year, 1, 1)
        df_tmp_1 = fetch_data(locationid, startdate_1, enddate, endpoint, datasetid, 
                                stationid, units, limit)
        df_tmp_0 = pd.concat([df_tmp_0, df_tmp_1])
        return df_tmp_0
        
def clean_up(datain, key):
    '''
    S. Nesbitt
    '''
    datain[key][(np.abs(datain[key])==9999)] = np.nan
    datain.dropna(inplace=True)
    datain[key] = datain[key].astype("float") / 10.
    datain["date"] = pd.to_datetime(df["date"], infer_datetime_format=True)
    datain.index = datain["date"]
    return datain
    

In [6]:
df = loop_request(locationid="CITY:US360019", startdate=datetime.date(2015, 12, 1), enddate=datetime.date(2018, 2, 1), stationid="GHCND:USC00280907")
df = clean_up(df, "value")
display(df.describe(), df.head(), df.tail())

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,value
count,1586.0
mean,10.880706
std,11.426793
min,-19.4
25%,2.2
50%,10.6
75%,18.9
max,35.6


Unnamed: 0_level_0,date,datatype,station,attributes,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-12-01,2015-12-01,TMAX,GHCND:USC00280907,",,7,0700",6.1
2015-12-01,2015-12-01,TMIN,GHCND:USC00280907,",,7,0700",-1.7
2015-12-02,2015-12-02,TMAX,GHCND:USC00280907,",,7,0700",8.3
2015-12-02,2015-12-02,TMIN,GHCND:USC00280907,",,7,0700",5.6
2015-12-03,2015-12-03,TMAX,GHCND:USC00280907,",,7,0700",11.1


Unnamed: 0_level_0,date,datatype,station,attributes,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-30,2018-01-30,TMIN,GHCND:USC00280907,",,7,0700",-1.7
2018-01-31,2018-01-31,TMAX,GHCND:USC00280907,",,7,0700",6.7
2018-01-31,2018-01-31,TMIN,GHCND:USC00280907,",,7,0700",-10.0
2018-02-01,2018-02-01,TMAX,GHCND:USC00280907,",,7,0700",5.6
2018-02-01,2018-02-01,TMIN,GHCND:USC00280907,",,7,0700",-8.9


In [None]:
df_mean = df.resample("7D").mean()
display(df_mean, df_mean.describe())
# freq="W" starts with Sunday
df["W_num"] = (df["date"].dt.dayofyear - 1) // 7
df.groupby("W_num").mean()
#df.drop(df["date"].dt.dayofyear > 364, inplace=True)
df.tail(20)
df.index.year

In [None]:
def is_leap_and_29Feb_and_last_day(s):
    return ((s.index.year % 4 == 0) & ((s.index.year % 100 != 0) | (s.index.year % 400 == 0)) & (s.index.month == 2) & (s.index.day == 29)) | ((s.index.month == 12) & (s.index.day == 31))

def cal_anomaly(df, freq="W"):
    mask = is_leap_and_29Feb_and_last_day(df)
    df_W = df[~mask]
    
    df_W = df_W.resample("W").mean()
        
        

In [None]:
def stripe_plot(df, freq="W"):
    df_mean = df.resample(freq).mean()
    if freq == "W":
        date_interval = 7
    
    temp_min = df_mean.loc[:, "value"].min()
    temp_max = df_mean.loc[:, "value"].max()
    buffer = 1.
    
    cmap = ListedColormap([
        '#08306b', '#08519c', '#2171b5', '#4292c6',
        '#6baed6', '#9ecae1', '#c6dbef', '#deebf7',
        '#fee0d2', '#fcbba1', '#fc9272', '#fb6a4a',
        '#ef3b2c', '#cb181d', '#a50f15', '#67000d',
    ])
    fig = plt.figure(figsize=(12, 5))
    ax = fig.add_axes([0.1, 0.12, 0.9, 0.88])
    date_str_list = ['{}'.format(d) for d in df['date'][0:730:2]]
    date_corrected = mdates.datestr2num(date_str_list)

    col = PatchCollection([
        Rectangle((y, min_temp-buffer), 7, temp_delta+2*buffer) # need to change 7 to other values when resampled at other frequencies
        for y in date_corrected.astype(int)[::7]
    ])

In [None]:
[0, 1, 2, 3, 4, 5][::3]