In [72]:
import pandas as pd
import numpy as np

In [73]:
# Export dates
# Data currently runs '1892-04-08' to '2015-06-30'

start_date = pd.to_datetime('2015-01-01')
end_date = pd.to_datetime('2015-06-30')

In [75]:
# Import historical weather data

df = pd.read_csv('BZN-weather-data_4.1895-6.2015.csv',
                        na_values=['-9999','9999'],
                        index_col='DATE',
                        parse_dates=True)

In [76]:
# Select columns of interest from full data set
df = df[['PRCP','TMAX','TMIN']]
df['day'] = df.index.day
df['month'] = df.index.month

# Calculate record high and add to output_df
record_max = df[['TMAX','month','day']].groupby(['month','day']).aggregate([max, np.mean])
record_max.columns = ['RECHIGH', 'AVGHIGH']
df = pd.merge(df, record_max, left_on=['month','day'], right_index=True, how='left').sort_index()

# Calculate record low and add to output_df
record_min = df[['TMIN','month','day']].groupby(['month','day']).aggregate([min, np.mean])
record_min.columns = ['RECLOW', 'AVGLOW']
df = pd.merge(df, record_min, left_on=['month','day'], right_index=True, how='left').sort_index()

df.head()

Unnamed: 0_level_0,PRCP,TMAX,TMIN,day,month,RECHIGH,AVGHIGH,RECLOW,AVGLOW
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1892-04-08,0,78,-150,8,4,239,109.105691,-172,-22.284553
1892-04-09,0,128,6,9,4,250,111.98374,-128,-17.00813
1892-04-10,0,100,6,10,4,244,107.577236,-150,-15.528455
1892-04-11,0,89,-6,11,4,239,113.447154,-117,-13.252033
1892-04-12,0,94,-28,12,4,244,121.804878,-156,-15.674797


In [77]:
# Strip df_output to date range of interest

df = df.loc[start_date:end_date]

In [78]:
# Read in snowpack data

# Snow water equivalent data since ~1994, snow depth data since 6-20-2002

# Snow water equivalent defined here:
# http://www.nrcs.usda.gov/wps/portal/nrcs/detail/or/snow/?cid=nrcs142p2_046155

dfSnow = pd.read_csv('bracket-creek_11-22-15.csv', index_col='Date',
                        parse_dates=True, skiprows=7)
dfSnow.rename(columns={'Snow Depth (in)':'SNWDEP'}, inplace=True)

# Select only SNWDEP column, remove nulls
dfSnow = pd.DataFrame(dfSnow['SNWDEP'])
dfSnow = dfSnow[dfSnow['SNWDEP'].notnull()]

In [79]:
df.head()

Unnamed: 0_level_0,PRCP,TMAX,TMIN,day,month,RECHIGH,AVGHIGH,RECLOW,AVGLOW
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-01-01,0,-94,-206,1,1,139,-16.92562,-333,-120.793388
2015-01-02,0,-39,-172,2,1,117,-6.809917,-311,-120.355372
2015-01-03,51,-28,-150,3,1,117,-5.454545,-311,-114.727273
2015-01-04,10,-128,-172,4,1,111,-4.123967,-317,-113.933884
2015-01-05,8,44,-156,5,1,106,-0.429752,-339,-111.041322


In [80]:
# Add SNWDEP column to df

df = pd.merge(df, dfSnow, right_index=True, left_index=True, how='left').sort_index()

In [81]:
filename = "BZN-weather_" + str(start_date.date()) + "_" + str(end_date.date()) + ".csv"
df.to_csv('data-processed/' + filename)
print filename

BZN-weather_2015-01-01_2015-06-30.csv
