Figure out how to get the data from all the snotel sites into one dataframe

Thinking averate? Or median?

In [1]:
import pandas as pd
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory
from os import listdir
from os.path import isfile, join

# Get all the seasonal snotel reports
# could just use Stations Selected, but individual files might be easier

path = '../content/csv/snotel/west_slopes_south/' # TODO: your sub-area name here
def get_seasonal_snotel_files():
    files = [path + f for f in listdir(path) if isfile(join(path, f))]
    return files

# TODO: run this block and make sure you see all the files you just added below
get_seasonal_snotel_files()

['../content/csv/snotel/west_slopes_south/1068_SawmillRidge.csv',
 '../content/csv/snotel/west_slopes_south/702_PotatoHill.csv',
 '../content/csv/snotel/west_slopes_south/642_MorseLake.csv',
 '../content/csv/snotel/west_slopes_south/418_CoralPass.csv']

Load them all into dataframes

In [2]:
def get_seasonal_snotel_dataframes():
    dfs = [pd.read_csv(f, comment='#') for f in get_seasonal_snotel_files()]
    # Rename columns
    for df in dfs:
        air_temp_max_col = [col for col in df.columns if col.endswith('Air Temperature Maximum (degF)')][0]
        delta_snow_depth_col = [col for col in df.columns if col.endswith('Change In Snow Depth (in) Start of Day Values')][0]
        delta_swe_col = [col for col in df.columns if col.endswith('Change In Snow Water Equivalent (in) Start of Day Values')][0]
        delta_temp_min_col = [col for col in df.columns if col.endswith('Change In Air Temperature Minimum (degF)')][0]
        df.rename(columns={
            air_temp_max_col: 'Max Air Temp 24hr', 
            delta_snow_depth_col: 'Total Snowfall 24hr',
            delta_swe_col: 'Delta SWE 24hr',
            delta_temp_min_col: 'Min Temp Diff 48hr'}, inplace=True)
    return dfs


get_seasonal_snotel_dataframes()[0].head()

Unnamed: 0,Date,Max Air Temp 24hr,Total Snowfall 24hr,Delta SWE 24hr,Min Temp Diff 48hr
0,2022-11-01,35.1,0,0.1,-0.2
1,2022-11-02,35.1,0,0.0,0.0
2,2022-11-03,35.1,0,0.1,0.0
3,2022-11-04,35.4,6,1.2,0.2
4,2022-11-05,35.1,-6,-1.3,-0.2


Aggregate with average

In [3]:
def drop_non_numerical(snotel_df):
    """Drop columns that should not be part of the aggregation"""
    return snotel_df.drop(['Station Id', 'Station Name', 'Latitude', 'Longitude'], axis=1)

def set_up_dataframe_for_aggregation(dfs):
    # Make sure all the columns are the same
    assert all(len(dfs[0].columns.intersection(df.columns)) == dfs[0].shape[1] for df in dfs), \
        'All dataframes must have the same columns'   
    # Drop columns not wanted in aggregation
    # dfs = [drop_non_numerical(df) for df in dfs]
    # Set the index of each dataframe to be the date
    dfs = [df.set_index('Date') for df in dfs]
    # Concatenate all the dataframes
    cat = pd.concat(dfs)
    # Group them by their index
    by_index = cat.groupby(cat.index)

    return by_index

def get_average_seasonal_snotel_dataframe(dfs=get_seasonal_snotel_dataframes()):
    dfs = set_up_dataframe_for_aggregation(dfs)
    means = dfs.mean()
    return means

# dfs = get_seasonal_snotel_dataframes()
# dfs[0] = dfs[0].drop('Date', axis=1)
# should fail   
# get_seasonal_snotel_dataframes(dfs)

get_average_seasonal_snotel_dataframe().head()

Unnamed: 0_level_0,Max Air Temp 24hr,Total Snowfall 24hr,Delta SWE 24hr,Min Temp Diff 48hr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-11-01,33.875,0.5,-0.1,-3.775
2022-11-02,33.275,0.75,-0.025,-3.975
2022-11-03,33.125,1.5,0.1,0.05
2022-11-04,40.725,4.5,1.125,7.6
2022-11-05,34.0,-3.75,-0.125,-4.675


Aggregate with median

In [4]:
def get_median_seasonal_snotel_dataframe(dfs=get_seasonal_snotel_dataframes()):
    dfs = set_up_dataframe_for_aggregation(dfs)
    meds = dfs.median()
    return meds

get_median_seasonal_snotel_dataframe().head()

Unnamed: 0_level_0,Max Air Temp 24hr,Total Snowfall 24hr,Delta SWE 24hr,Min Temp Diff 48hr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-11-01,34.0,0.5,-0.05,-4.85
2022-11-02,34.25,0.5,0.0,-4.25
2022-11-03,32.8,1.0,0.1,0.1
2022-11-04,42.1,5.0,1.25,7.75
2022-11-05,34.2,-4.5,0.1,-5.1


Pop these dataframes into a file

Sorry Victor, probably could have just used report generator for this

In [5]:
means = get_average_seasonal_snotel_dataframe()
means.to_csv(path + 'seasonal_mean.csv', sep=',')

medians = get_median_seasonal_snotel_dataframe()
medians.to_csv(path + 'seasonal_median.csv', sep=',')