In [1]:
import pandas as pd

def get_snotel_df():
    df = pd.read_csv('../content/csv/snotel/new_datasets/1012_new_metrics.csv', comment='#')
    df = df.set_index('Date')
    return df

def get_danger_df(area='West Slopes South', drop_no_rating=True):
    df = pd.read_csv('../content/csv/danger_ratings.csv')
    if drop_no_rating:
        df = df[df['Danger Rating'] != 'NO RATING']
    df = df[(df == area).any(axis=1)]
    df['Danger Rating'] = df['Danger Rating'].map({ 'NO RATING': 0, 'LOW': 1, 'MODERATE': 2, 'CONSIDERABLE': 3, 'HIGH': 4, 'EXTREME': 5 })
    try:
        df = df['Date'] = pd.to_datetime(df['Date'], format='%b %D %Y')
    except Exception as err:
        print('Cant format date')

    df = df.set_index('Date')
    return df


def get_training_data():
    snotel_df = get_snotel_df()
    danger_df = get_danger_df()
    
    def init_training_df():
        cols = [ 
            'Max Air Temp 24hr', 
            'Max Air Temp 72hr', 
            'Total Snowfall 24hr', 
            'Total Snowfall 72hr', 
            'Max Windspeed 24hr',
            'Weighted Snowfall 96hr',
            'Min Temp Diff 48hr',
            'Was Heavy Snowfall 24hr',
            'Was High Winds 24hr',
            'Sum Max Temp 72hr',
            'Delta SWE 24hr',
            'Yesterday Danger',
            'Danger Rating'
        ]
        return pd.DataFrame([], columns=cols)
    
    # Set up the correct columns
    df = init_training_df()
    # For every danger rating value
    for idx, row in danger_df.iterrows():
        date = idx
        # Compute the metrics for that date
        
        # Add them to a row and add that row to the dataframe
        df.loc[date] = [0,0,0,0,0,0,0,0,0,0,0,0,0]

    return df

print('SWE: Depth of water that would theoretically result if the entire snowpack were melted instantaneously')
print('SD: Total snow depth')
print('PA: Water year accumulated precipitation')
print('ATO: Instantaneously observed air temperature')

print('SNOTEL datasets to use: 418_new_metrics.csv, 804_new_metrics.csv, 941_new_metrics.csv, 1012_new_metrics.csv')

get_danger_df().tail()
get_training_data().head()

SWE: Depth of water that would theoretically result if the entire snowpack were melted instantaneously
SD: Total snow depth
PA: Water year accumulated precipitation
ATO: Instantaneously observed air temperature
SNOTEL datasets to use: 418_new_metrics.csv, 804_new_metrics.csv, 941_new_metrics.csv, 1012_new_metrics.csv
Cant format date
Cant format date


Unnamed: 0,Max Air Temp 24hr,Max Air Temp 72hr,Total Snowfall 24hr,Total Snowfall 72hr,Max Windspeed 24hr,Weighted Snowfall 96hr,Min Temp Diff 48hr,Was Heavy Snowfall 24hr,Was High Winds 24hr,Sum Max Temp 72hr,Delta SWE 24hr,Yesterday Danger,Danger Rating
Apr 15 2023,0,0,0,0,0,0,0,0,0,0,0,0,0
Apr 14 2023,0,0,0,0,0,0,0,0,0,0,0,0,0
Apr 13 2023,0,0,0,0,0,0,0,0,0,0,0,0,0
Apr 12 2023,0,0,0,0,0,0,0,0,0,0,0,0,0
Apr 11 2023,0,0,0,0,0,0,0,0,0,0,0,0,0


This^ is what we are hoping to achieve

todo: start here

In [2]:
get_danger_df().head()

Cant format date


Unnamed: 0_level_0,Area,Danger Rating
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
Apr 15 2023,West Slopes South,2
Apr 14 2023,West Slopes South,1
Apr 13 2023,West Slopes South,2
Apr 12 2023,West Slopes South,2
Apr 11 2023,West Slopes South,2


Here is an example dataset with the values I think we need. 
1. Calculate additional fields for this dataset (see above)
1. Combine this dataset with the other 3
    - mean
    - median
    - max

In [3]:
get_snotel_df().head()

Unnamed: 0_level_0,Swift Creek (1012) Air Temperature Maximum (degF),Swift Creek (1012) Change In Snow Depth (in) Start of Day Values,Swift Creek (1012) Change In Snow Water Equivalent (in) Start of Day Values,Swift Creek (1012) Change In Air Temperature Minimum (degF)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-11-01,34.3,0,-0.1,-5.1
2022-11-02,34.2,2,0.4,-4.1
2022-11-03,32.0,3,0.4,2.0
2022-11-04,44.2,7,1.1,5.2
2022-11-05,36.7,-8,0.0,-3.8


Calculate additional fields for this dataset:

We currently have: 
- 'Max Air Temp 24hr' 
- 'Total Snowfall 24hr'
- 'Min Temp Diff 48hr'
- 'Delta SWE 24hr'


This dataset can't do wind or danger (yet). But we CAN calculate:
- 'Max Air Temp 72hr'
- 'Total Snowfall 72hr'
- 'Weighted Snowfall 96hr'
- 'Was Heavy Snowfall 24hr'
- 'Sum Max Temp 72hr'

In [65]:
def take_snotel_df_and_calculate_columns(df):
    """Using raw SNOTEL data, calculate the above columns and return them as a dataframe"""

    # Rename the columns to be the same as listed above
    df.columns.values[0] = 'Max Air Temp 24hr'
    df.columns.values[1] = 'Total Snowfall 24hr'
    df.columns.values[2] = 'Min Temp Diff 48hr'
    df.columns.values[3] = 'Delta SWE 24hr'

    # Add the column placeholders to the dataframe (or initialize a new one); initialize as null for now
    # String shorthand
    max_temp_72 = 'Max Air Temp 72hr'
    tot_snow_72 = 'Total Snowfall 72hr'
    wgt_snow_72 = 'Weighted Snowfall 96hr'
    was_hvys_24 = 'Was Heavy Snowfall 24hr'
    sum_mtmp_72 = 'Sum Max Temp 72hr'
    df[max_temp_72] = None
    df[tot_snow_72] = None
    df[wgt_snow_72] = None
    df[was_hvys_24] = None
    df[sum_mtmp_72] = None

    # Calculate the above metrics
    one_day_ago = None # This wouldn't be necessary if I wasn't indexing on date
    two_days_ago = None
    three_days_ago = None
    for idx, day in df.iterrows():
        df.at[idx, was_hvys_24] = day['Total Snowfall 24hr'] >= 12 # todo: update threshold
        if two_days_ago is not None:
            df.at[idx, max_temp_72] = max(
                day['Max Air Temp 24hr'], 
                one_day_ago['Max Air Temp 24hr'], 
                two_days_ago['Max Air Temp 24hr'])
            df.at[idx, tot_snow_72] = \
                day['Total Snowfall 24hr'] \
                + one_day_ago['Total Snowfall 24hr'] \
                + two_days_ago['Total Snowfall 24hr']
            df.at[idx, sum_mtmp_72] = \
                day['Max Air Temp 24hr'] \
                + one_day_ago['Max Air Temp 24hr'] \
                + two_days_ago['Max Air Temp 24hr'] 
        if three_days_ago is not None:
            df.at[idx, 'Weighted Snowfall 96hr'] = \
                day['Total Snowfall 24hr'] * 1.0 \
                + one_day_ago['Total Snowfall 24hr'] * 0.75 \
                + two_days_ago['Total Snowfall 24hr'] * 0.5 \
                + three_days_ago['Total Snowfall 24hr'] * 0.25

        # Update past day placeholders
        three_days_ago = two_days_ago
        two_days_ago = one_day_ago
        one_day_ago = day
    
    # Return the new/updated dataframe
    return df

take_snotel_df_and_calculate_columns(get_snotel_df()).head()


Unnamed: 0_level_0,Max Air Temp 24hr,Total Snowfall 24hr,Min Temp Diff 48hr,Delta SWE 24hr,Max Air Temp 72hr,Total Snowfall 72hr,Weighted Snowfall 96hr,Was Heavy Snowfall 24hr,Sum Max Temp 72hr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-11-01,34.3,0,-0.1,-5.1,,,,False,
2022-11-02,34.2,2,0.4,-4.1,,,,False,
2022-11-03,32.0,3,0.4,2.0,34.3,5.0,,False,100.5
2022-11-04,44.2,7,1.1,5.2,44.2,12.0,10.25,False,110.4
2022-11-05,36.7,-8,0.0,-3.8,44.2,2.0,-0.75,False,112.9


Combine our new datasets

In [66]:
def snotel_datasets_combined_mean():
    """Calculate columns for all SNOTEL datasets as above and aggregate them using their average"""
    # Load the 4 snotel datasets with extra columns created as above

    # Combine them and get the average of all numerical values

    # Return the newly created dataframe
    return None

def snotel_datasets_combined_median():
    """Calculate columns for all SNOTEL datasets as above and aggregate them using their median"""
    pass

def snotel_datasets_combined_max():
    """Calculate columns for all SNOTEL datasets as above and aggregate them using their maximum"""
    pass

Next, do the same thing with the NWAC data to get wind information:
- 'Max Windspeed 24hr'
- 'Was High Winds 24hr'

In [67]:
def take_nwac_df_and_calculate_columns():
    """Using raw NWAC data, calculate the above columns and return them as a dataframe"""
    pass

In [68]:
def nwac_datasets_combined_mean():
    """Calculate columns for all NWAC datasets as above and aggregate them using their average"""
    # Load the nwac datasets with extra columns created

    # Combine them and get the average of all numerical values

    # Return the newly created dataframe
    return None

def nwac_datasets_combined_median():
    """Calculate columns for all NWAC datasets as above and aggregate them using their median"""
    pass

def nwac_datasets_combined_max():
    """Calculate columns for all NWAC datasets as above and aggregate them using their max"""
    pass

In [69]:
def combine_snotel_and_nwac_into_result_dataframe():
    """Join the aggregated SNOTEL, aggregated NWAC, and danger ratings datasets on their Date column"""
    pass