## Concatenate weather data obtained using NOAA API

- missing data in the EVAP dataset is filled by taking an average of the other years
- return an aggregated dataset with hierarchical indexes

In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

In [2]:
cwd = os.getcwd()
datadir = '/'.join(cwd.split('/')[0:-1]) + '/data/'

combined_state_names = ['Alaska', 'Connecticut', 'Delaware', 'Maryland', 'Massachusetts', 
                   'Nevada', 'New Hampshire', 'New Mexico', 'Oklahoma', 'Rhode Island']
states = pd.read_csv(datadir+'external/'+'US_states.csv',index_col=0)
all_states = list(set(states.index) - set(combined_state_names) - {'District of Columbia'})

In [3]:
all_labels = ['TAVG','PRCP','SNOW','DP10','EVAP','AWND']
all_dfs = {}
for i in all_labels:
    all_dfs[i] = pd.read_csv(datadir+'external/'+i+'201501-201809.csv',index_col=0).iloc[:,1:-9]

In [4]:
a = pd.Series()
for i in all_dfs:
    dframe_i = all_dfs[i].groupby(pd.PeriodIndex(all_dfs[i].columns, freq='Y'), axis=1).mean()
    if i == 'EVAP': #filling NaNs in EVAP with yearly average
        dframe_i = dframe_i.T.fillna(dframe_i.mean(axis=1)).T
    combined_states = dframe_i.loc[combined_state_names,:].mean()
    dframe_i_processed = dframe_i.loc[list(all_states),:].copy()
    dframe_i_processed.loc['Other States',:] = combined_states
    concated = pd.concat([a,dframe_i_processed.stack().rename(i)],axis=1)
    a = concated
concated = concated.iloc[:,1:]

In [5]:
concated.index.levels

FrozenList([['Oregon', 'California', 'Illinois', 'Montana', 'Michigan', 'Wisconsin', 'Florida', 'Hawaii', 'Missouri', 'Mississippi', 'Indiana', 'New Jersey', 'Arizona', 'Colorado', 'West Virginia', 'Minnesota', 'Louisiana', 'Vermont', 'Kansas', 'Kentucky', 'Arkansas', 'Iowa', 'Maine', 'Tennessee', 'North Carolina', 'New York', 'Wyoming', 'North Dakota', 'Pennsylvania', 'Alabama', 'Ohio', 'Texas', 'Nebraska', 'South Carolina', 'Virginia', 'South Dakota', 'Washington', 'Utah', 'Idaho', 'Georgia', 'Other States'], [2015, 2016, 2017]])

In [6]:
concated.to_csv(datadir+'interim/'+'weather_2015-2017.csv')

In [8]:
concated.describe()

Unnamed: 0,TAVG,PRCP,SNOW,DP10,EVAP,AWND
count,123.0,123.0,123.0,123.0,123.0,123.0
mean,54.249609,3.393369,2.367241,5.970446,6.752498,7.51123
std,8.614845,1.185794,2.173441,1.460268,6.97618,1.780203
min,41.650725,0.878576,0.0,2.050376,2.050376,4.865138
25%,46.822516,2.61106,0.531528,5.04714,4.723922,6.08548
50%,53.175829,3.509808,1.992233,6.245793,6.083248,7.383034
75%,60.137129,4.122886,3.329974,6.976418,6.966594,8.704885
max,74.697925,6.008707,9.810074,8.930555,72.81,13.659859


In [9]:
concated

Unnamed: 0,Unnamed: 1,TAVG,PRCP,SNOW,DP10,EVAP,AWND
Oregon,2015,50.685073,3.538181,0.979934,6.805906,7.977238,5.253983
Oregon,2016,49.058756,4.176896,1.611711,8.836773,8.018797,5.633025
Oregon,2017,48.252982,4.325047,2.630246,8.731434,8.731434,5.621605
California,2015,59.962052,1.268527,0.780166,2.383932,6.381611,6.058473
California,2016,59.178019,2.302194,1.094336,3.537553,2.630887,6.387527
California,2017,59.367843,2.618533,2.387031,3.593648,3.593648,6.443608
Illinois,2015,52.180614,3.843437,2.895723,6.362248,2.923000,8.699537
Illinois,2016,54.122068,3.262236,1.581601,5.893576,6.399013,8.466667
Illinois,2017,53.577671,3.254214,0.797189,5.560487,5.560487,8.820370
Montana,2015,44.432807,1.522242,3.796998,4.898928,6.453929,8.324756
