# LVL2 Script

In [1]:
#import external libraries
import pandas as pd
import os
#pd.options.mode.chained_assignment = None  # default='warn'; chained index warning
import numpy as np
import imp
import datetime
import pytz

#plotting
%matplotlib notebook

#import self-written libraries
import LVL1

In [2]:
#Data Import Tasks
#path to level 1 data
Glacier="Gulkana" #or "Gulkana"
Station="1725" #or other elevations; this depends on the naming convention of input data
timezone='America/Anchorage' #choose from pytz.all_timezones

file_label='_15min'
yr='all' #either "all" or the year you want

pth=r"Q:/Project Data/GlacierData/Benchmark_Program/Data/" +Glacier+ r"/AllYears/Wx/LVL1/emily/" + Glacier.lower()+Station+file_label+yr+".csv"

#time format and column
Local_time_column_name='Local_time'
date_format='%Y/%m/%d %H:%M'

#directory to save output data
save_dir=r"Q:/Project Data/GlacierData/Benchmark_Program/Data/" +Glacier+ r"/AllYears/Wx/LVL2/emily/"

In [3]:
#read in level1 CSV
dat=pd.read_csv(pth)
print(pth)

Q:/Project Data/GlacierData/Benchmark_Program/Data/Gulkana/AllYears/Wx/LVL1/emily/gulkana1725_15minall.csv


In [4]:
dat['local_time']=pd.to_datetime(dat[Local_time_column_name], format= date_format)
dat=dat.set_index('local_time') #set this local time as the index

In [5]:
#Give names of columns containing temperature, and precipitation data, data for general averaging, and wind direction
temp_columns=['Tpassive1', 'Tpassive2', 'TAspirated1', 'TAspirated2']
primary_temp_column='TAspirated1'
if (Glacier +Station=='Gulkana1920')| (Glacier + Station=='Wolverine1420'):
    primary_temp_column='Tpassive1'
#unaspirated_temp_columns=['Tpassive1', 'Tpassive2']

precip_columns=['StageCumulative', 'TPGCumulative']
general_data_columns=['RelHum', 'WindSpeed', 'WindGustSpeed', 'RadiationIn', 'RadiationOut', 'SnowDepth']
wind_dir_columns=['WindDir', 'VecAvgWindDir']
wind_speed_column=['WindSpeed']

In [6]:
incremental_precip_columns=[] #create names for incremental precip columns
for precip_col in precip_columns:
    col_name=precip_col.split("Cumulative")[0]+"Incremental"
    incremental_precip_columns.append(col_name)
    dat[col_name]=dat[precip_col]-dat[precip_col].shift(1)
    if not np.isnan(dat[precip_col][0]):
        dat.ix[0, col_name]=0 #set first value to 0, not NAN IF the initial cumulative series was also not null

# Fill Gaps -temp
 #### * 15 min data

In [7]:
#Temperature - for primary sensor, fill gaps. 
#  for <3 len gap, fill with linear interpolation
dat.loc[:,primary_temp_column]=dat[primary_temp_column].interpolate(method='linear', limit=3)

In [8]:
#  for gaps >3, fill with average of passive temperature sensors, as long as the passive sensors agree

#Store locations of long NAN gaps to be filled
primary_temp_null_indx=dat[primary_temp_column].isnull()

In [9]:
#List alternate temp columns
secondary_temp_columns=list(set(temp_columns)-set([primary_temp_column]))
passive_temp_columns=[s for s in temp_columns if "passive" in s]

#Subset data to alternate temperature columns
secondary_temp_dat=dat[secondary_temp_columns].copy()
#Calculate the mean of the secondary temp values
secondary_temp_dat['temp_mean']=secondary_temp_dat[secondary_temp_columns].mean(axis=1)
#Calculate standard dev. of secondary temp values
secondary_temp_dat['temp_sd']=secondary_temp_dat[secondary_temp_columns].std(axis=1)
#Calculate temperature difference between avg. of other temperatures
secondary_temp_dat['temp_diff']=abs(dat[primary_temp_column]-secondary_temp_dat.mean(axis=1))
#Calculate the median of secondary temp values
secondary_temp_dat['temp_median']=secondary_temp_dat[secondary_temp_columns].median(axis=1)
#Calculate mean of passive temperature sensors (if a sensor is missing, mean will be NAN)
secondary_temp_dat['passive_average']=dat[passive_temp_columns].mean(axis=1, skipna=False)
#Calculate difference of 2 passive sensors from one another
secondary_temp_dat['passive_difference_between_sensors']=abs(dat[passive_temp_columns[0]]-dat[passive_temp_columns[1]])

#Fill remaining gaps (>3 length) in primary timeseries with average of all other sensors
dat.loc[dat[primary_temp_column].isnull(), primary_temp_column]=secondary_temp_dat.temp_mean[dat[primary_temp_column].isnull()]

#Second round of interpolating small gaps
dat[primary_temp_column]=dat[primary_temp_column].interpolate(method='linear', limit=3) #interpolate small gaps again

In [10]:
#Find places where passive differs from aspirated
asp_more_than_2deg_diff_from_passivemean_idx=(abs(dat[primary_temp_column]- secondary_temp_dat.passive_average))>2 #difference between asp and passive mean > 2 deg

#In places where the passive AGREE with eachother, but DISAGREE with the aspirated mean, set main aspirated T to mean of passive.
passive_sensors_agree_with_eachother_2deg_idx=secondary_temp_dat.passive_difference_between_sensors<2 #passive sensors agree with eachother (<2 deg diff)
dat.loc[asp_more_than_2deg_diff_from_passivemean_idx & passive_sensors_agree_with_eachother_2deg_idx, primary_temp_column]=secondary_temp_dat.loc[asp_more_than_2deg_diff_from_passivemean_idx & passive_sensors_agree_with_eachother_2deg_idx,'passive_average'] 

In [11]:
#Final outlier strip
dat[primary_temp_column]=LVL1.hampel(dat[primary_temp_column], k=7) #this may not be neccessary

## Save final 15 minute data

In [12]:
out_columns=[primary_temp_column] +incremental_precip_columns+general_data_columns #columns to include in output
save_name=Glacier.lower()+ Station + "_15min_"+"LVL2.csv" #filename
save_pth=os.path.join(save_dir, save_name)

save_dat=dat[out_columns] #dataframe with final output

In [13]:
dat[out_columns].to_csv(save_pth, float_format='%g', date_format=date_format) #save data

# Hourly Data

In [14]:
#Create Hourly Data -
#Temperature
hourly_dat=pd.DataFrame()
for temp_col in temp_columns:
    #Create WMO Mean Temp
    hourly_dat[temp_col+"_min"]=dat[temp_col].resample('H').min()
    hourly_dat[temp_col+ "_max"]=dat[temp_col].resample('H').max()
    hourly_dat[temp_col+"_WMO"]=hourly_dat[[temp_col+"_min", temp_col+'_max']].mean(axis=1)
    #Create USGS Mean Temp
    hourly_dat[temp_col+"_USGS"]=dat[temp_col].resample('H').mean()
    
#Precipitation
for incremental_precip_col in incremental_precip_columns:
    hourly_dat[incremental_precip_col]=dat[incremental_precip_col].resample('H', label='left').sum() #all precip recieved during his hour
    
#Other Data Types (that can be aggregated with a simple mean)
for general_data_col in general_data_columns:
    hourly_dat[general_data_col]=dat[general_data_col].resample('H').mean()

#Wind Direction - this process is for data that is logged as vector-averaged 
  # for data that is not, use LVL1.vector_average_wind_direction
for wd_col in wind_dir_columns:
#Convert to raidans
    dat['wind_dir_cos']=np.cos(dat[wd_col]*(np.pi/180))
    dat['wind_dir_sin']=np.sin(dat[wd_col]*(np.pi/180))

    #Calculate mean of x and y directions in radian space
    hourly_dat['wind_dir_cos']=dat.wind_dir_cos.resample('H').mean()
    hourly_dat['wind_dir_sin']=dat.wind_dir_sin.resample('H').mean()

    #Convert back to 0-360 coordinates
    hourly_dat[wd_col]=(np.arctan2(hourly_dat.wind_dir_sin, hourly_dat.wind_dir_cos) * 180/np.pi)
    hourly_dat.loc[hourly_dat[wd_col]<0, wd_col]+=360 #add 360 where hourly dat less than 0

#### Save Data

In [15]:
out_temp_columns = [s for s in hourly_dat.columns if primary_temp_column in s]

out_columns=out_temp_columns+incremental_precip_columns+general_data_columns+ wind_dir_columns #columns to include in output
save_name=Glacier.lower()+ Station + "_hourly_"+"LVL2.csv" #filename
save_pth=os.path.join(save_dir, save_name)

hourly_dat[out_columns][:-1].to_csv(save_pth, float_format='%g') #write selected columns; omit last row (unlikely to be complete, with download)
print(save_pth)

Q:/Project Data/GlacierData/Benchmark_Program/Data/Gulkana/AllYears/Wx/LVL2/emily/gulkana1725_hourly_LVL2.csv


# Daily Data

In [16]:
daily_dat=pd.DataFrame() #create empty dataframe

#Temperature
for temp_col in temp_columns:
    #Create WMO Mean Temp
    daily_dat[temp_col+"_min"]=dat[temp_col].resample('D').min()
    daily_dat[temp_col+ "_max"]=dat[temp_col].resample('D').max()
    daily_dat[temp_col+"_WMO"]=daily_dat[[temp_col+"_min", temp_col+'_max']].mean(axis=1)
    #Create USGS Mean Temp
    daily_dat[temp_col+"_USGS"]=dat[temp_col].resample('D').mean()    
    
#Precipitation
for incremental_precip_col in incremental_precip_columns:
    daily_dat[incremental_precip_col]=dat[incremental_precip_col].resample('D', label='left').sum() #all precip recieved during this day; label on right
    
#Other Data Types (that can be aggregated with a simple mean)
for general_data_col in general_data_columns:
    daily_dat[general_data_col]=dat[general_data_col].resample('D').mean()

#Wind Direction - this process is for data that is logged as vector-averaged 
  # for data that is not, use LVL2.vector_average_wind_direction to create
for wd_col in wind_dir_columns:    
    #Convert to raidans
    dat['wind_dir_cos']=np.cos(dat[wd_col]*(np.pi/180))
    dat['wind_dir_sin']=np.sin(dat[wd_col]*(np.pi/180))

    #Calculate mean of x and y directions in radian space
    daily_dat['wind_dir_cos']=dat.wind_dir_cos.resample('D').mean()
    daily_dat['wind_dir_sin']=dat.wind_dir_sin.resample('D').mean()

    #Convert back to 0-360 coordinates
    daily_dat[wd_col]=(np.arctan2(daily_dat.wind_dir_sin, daily_dat.wind_dir_cos) * 180/np.pi)
    daily_dat.loc[daily_dat[wd_col]<0, wd_col]+=360

#### Save Data

In [17]:
out_temp_columns = [s for s in daily_dat.columns if primary_temp_column in s] #only save select temperature columns

out_columns=out_temp_columns+incremental_precip_columns+general_data_columns+ wind_dir_columns #columns to include in output
save_name=Glacier.lower()+ Station + "_daily_"+"LVL2.csv" #filename
save_pth=os.path.join(save_dir, save_name) #location to save file

In [18]:
#Set number of decimals to save for each type of data. Split into 3 separate loops for easy option of changing for each type.
for col in out_temp_columns:
    daily_dat[col]=["%.1f" %x for x in daily_dat[col]] #round temperature to 1 decimal places; store as string 

for col in incremental_precip_columns:
    daily_dat[col]=["%.1f" %x for x in daily_dat[col]] #round precip to 1 decimal place

for col in general_data_columns:
    daily_dat[col]=["%.1f" %x for x in daily_dat[col]] #round general data cols to 1 decimal place

for col in wind_dir_columns:
    daily_dat[col]=["%.0f" %x for x in daily_dat[col]] #wind direction; no decimal

In [19]:
#Rename columns to standard
# #Generic Temperature Label
# final_names=[x.replace(primary_temp_column, 'Temp') for x in out_columns]
# #Change precip label from Incremental to Precip (preserve type label)
# final_names=[x.replace("Incremental", "_Precip") for x in final_names]
# save_dat.columns=final_names

In [20]:
save_dat=daily_dat[out_columns][:-1] #remove last row; day is not complete upon download
save_dat.to_csv(save_pth, float_format='%g', date_format='%Y/%m/%d') #write selected columns; omit last row (unlikely to be complete, with download)

In [21]:
save_pth

'Q:/Project Data/GlacierData/Benchmark_Program/Data/Gulkana/AllYears/Wx/LVL2/emily/gulkana1725_daily_LVL2.csv'

In [23]:
save_dat.tail()

Unnamed: 0_level_0,TAspirated1_min,TAspirated1_max,TAspirated1_WMO,TAspirated1_USGS,StageIncremental,TPGIncremental,RelHum,WindSpeed,WindGustSpeed,RadiationIn,RadiationOut,SnowDepth,WindDir,VecAvgWindDir
local_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2017-08-17,-1.5,0.2,-0.7,-0.7,,4.0,,1.1,,86.3,20.3,2.3,,203
2017-08-18,-1.5,0.8,-0.3,-0.5,,10.2,,1.7,,111.0,18.5,2.3,,197
2017-08-19,-1.8,3.0,0.6,0.3,,3.5,,2.0,,146.4,50.8,2.3,,224
2017-08-20,-0.1,4.6,2.2,1.7,,9.1,,4.6,,95.7,24.2,1.9,,221
2017-08-21,3.7,6.0,4.8,4.8,,2.7,,3.9,,141.3,24.2,2.3,,14


KeyError: 1