In [None]:
import MesoPy
import json
import pandas as pd
import numpy as np
import missingno as msno
import os
import matplotlib.pyplot as plt
from dateutil import parser

In [None]:
%load_ext autoreload
%autoreload 1

In [None]:
%aimport station_pipeline

In [None]:
df = pd.read_csv('raw_data/Irwin_WY20.csv',
                 skiprows=[0,1,2,3,4,5,7],
                 na_values=['NAN','NA','N/A','NaN'])

df.info()

In [None]:
# change tz abbreviation to utc offset.
# abbreviations are NOT unique (across contries) and therfore unable to be inferred by Pandas.
df['Date_Time'] = df['Date_Time'].str.replace('MDT', 'UTC-6')
df['Date_Time'] = df['Date_Time'].str.replace('MST', 'UTC-7')

In [None]:
# This subset df has a timestamp that includes DST changes. All others are UTC-7
# set utc=True which incorporates dst changes, than set to utc-6, then revert to tz-unaware.
# best practice is to use tz-aware dfs, but some functions may cause problems, so there is a trade-off
dfir = df.iloc[:,0:10]
dfir.index = pd.to_datetime(dfir['Date_Time'], utc=True)
dfir.index = dfir.index.tz_convert('America/Denver')
# removes the timezone information resulting in naive local time (now matching other dfs)
dfir = dfir.tz_localize(None)
# subset to start on WY
dfir = dfir['2019-10-01':]
dfir.info()

In [None]:
dfp = df.iloc[:,11:14]
dfp.index = pd.to_datetime(dfp['Unnamed: 11'] + ' ' + dfp['Unnamed: 12']); dfp.info()

In [None]:
dfr = df.iloc[:,15:20]
dfr.index = pd.to_datetime(dfr['Unnamed: 15']); dfr.info()

## Take a look at the data in each df, see what is missing, where we need to interpolate values, etc. 

In [None]:
dfir = dfir.dropna(how='all')
dfir = dfir[~dfir.index.duplicated()]
# recast to monotonic time series
# note that snobal (IPW) expects serially complete data
dfir = dfir.asfreq('1H')
msno.matrix(dfir, freq='M')

In [None]:
dfp = dfp.dropna(how='all')
dfp.drop_duplicates(inplace=True)
dfp = dfp.asfreq('1H')
msno.matrix(dfp, freq='M')

In [None]:
dfr = dfr.dropna(how='all').copy()
dfr.drop_duplicates(inplace=True)
dfr = dfr[['Incoming) Solar_Wm2_1_Avg','Outgoing_Solar_Wm2_3_Avg']]
# coerce numeric, possibly strings from Excel file...
dfr = dfr.apply(pd.to_numeric)
dfr = dfr.asfreq('1H')
msno.matrix(dfr, freq='M')

# Processing

In [None]:
dfp.info()

In [None]:
dfir.info()

In [None]:
dfr.info()

In [None]:
# interpolate small gaps 
# note that none-floats are excluded
dfir = dfir.interpolate(method='time', axis='index')

In [None]:
msno.matrix(dfir, freq='M')

In [None]:
dfir['net_solar'] = dfr['Incoming) Solar_Wm2_1_Avg'].subtract(dfr['Outgoing_Solar_Wm2_3_Avg'])

In [None]:
dfr[['Incoming) Solar_Wm2_1_Avg','Outgoing_Solar_Wm2_3_Avg']].plot(figsize=(20,15))

In [None]:
dfir = station_pipeline.snow_density_fraction(df=dfir, 
                                              air_t_col='air_temp_set_1')

In [None]:
# subset to last radiation data
dfir = dfir[:'20200630']
dfp = dfp[:'20200630']

In [None]:
msno.matrix(dfir, freq='M')

In [None]:
dfir['air_t_K'] = np.nan
dfir['air_t_K'] = dfir['air_temp_set_1'].apply(lambda x: x + 273.15)

In [None]:
lw_list = [station_pipeline.longwave_est_2(x, y, z) for x, y, z in zip(dfir['relative_humidity_set_1'],
                                                                       dfir['net_solar'],
                                                                       dfir['air_t_K'])]

lwdf = pd.DataFrame(lw_list)
lwdf = lwdf.set_index(dfir.index)
dfir['lw_in_est'] = lwdf[0].copy()

In [None]:
msno.matrix(dfir, freq='M')

In [None]:
dfir.info()

In [None]:
dfir[['air_t_K','lw_in_est', 'relative_humidity_set_1']].plot(figsize=(20,10))

In [None]:
dfir = station_pipeline.vapor_pressure(df=dfir, dt='dew_point_temperature_set_1d')

In [None]:
dfir['soil_temp'] = 0

## Precipitation

In [None]:
dfp['precip_accum_mm'] = dfp['Precpip_Accum'] * 25.4
#dfp['precip_accum_mm'] = dfp['precip_accum_mm'].mask(dfp['precip_accum_mm'] < 0, 0)
dfp['precip_hourly_mm'] = dfp['precip_accum_mm'].diff(1)

In [None]:
# diff func misses first ts, so need to set as 0
dfp['precip_hourly_mm'].loc['2019-10-01 00:00:00'] = 0

dfp['precip_hourly_mm']

In [None]:
#dfp['precip_accum_mm'].loc['2019-10'].plot(figsize=(20,10))
dfp['precip_accum_mm'].plot(figsize=(20,10))

In [None]:
dfp['precip_hourly_mm'] = dfp['precip_hourly_mm'].mask(dfp['precip_hourly_mm'] < 0, 0)
dfp['precip_hourly_mm'].plot(figsize=(20,10))

In [None]:
dfp['fraction'] = dfir['fraction']
dfp['density'] = dfir['density']
dfp['p_temp'] = dfir['air_temp_set_1']

In [None]:
dfp['iter'] = range(len(dfp))

In [None]:
# 800 w/m^2 is snobal upper bound (IPW)
dfir['net_solar'] = dfir['net_solar'].mask(dfir['net_solar'] > 800, 800)
dfir['net_solar'] = dfir['net_solar'].mask(dfir['net_solar'] < 0, 0)

In [None]:
# set ano lower bound to 0.15 to prevent possible snobal error
dfir['wind_speed_set_1'] = dfir['wind_speed_set_1'].mask(dfir['wind_speed_set_1'] < 0.15, 0)

In [None]:
dfp_in = dfp[['iter','precip_hourly_mm','fraction','density','p_temp']].copy()

In [None]:
dfp_in.interpolate(method='time', inplace=True)
#dfp_in = dfp_in.interpolate(method='time', axis='index')

In [None]:
dfdat = dfir[['net_solar','lw_in_est','air_temp_set_1','vp','wind_speed_set_1','soil_temp']].copy()

In [None]:
dfdat = dfdat.interpolate(method='time', axis='index')

In [None]:
dfdat.plot(subplots=True, figsize=(20,15))

In [None]:
msno.matrix(dfdat, freq='M')

In [None]:
dfdat = dfdat.round(3)

dfdat.to_csv('ipw_inputs/snobal.data.input', 
              index=False,
              header=False,
              sep=' ',)

In [None]:
dfp_in.plot(subplots=True, figsize=(20,15))

In [None]:
msno.matrix(dfp_in, freq='M')

In [None]:
dfp_in.info()

In [None]:
dfdat.info()

In [None]:
#optional round floats
#dfp_in = dfp_in.round(3)

dfp_in.to_csv('ipw_inputs/snobal.ppt.input', 
              index=False,
              header=False,
              sep=' ',)

## Write other input files

In [None]:
with open('ipw_inputs/snow.properties.input', 'w') as f:
  f.write('0 0 0 0 0 0')

In [None]:
with open('ipw_inputs/inheight.input', 'w') as f:
  f.write('0 3 3 0.001 0.15')

## IPW CLI

In [None]:
dfdat['iter'] = range(len(dfdat))

In [None]:
station_pipeline.html_chart(dfdat)

In [None]:
station_pipeline.html_chart(dfp_in)