# This is the case study of Bow in Banff for the time period 1980 - 2023.
1. Gap fill CANSWE dataset using FROSTBYTE workflow: SWEPreprocessing  (RDRS and CAPA data used for precipitation)
2. Snow drought classification according to Heldmyer et.al. 2022.
3. Standardized SWEI calculation Huning & AghaKouchak 2020.
4. New index using SWE/P ratio.

In [1]:
import datetime
from datetime import date
from datetime import timedelta
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
import os
import sys
import xarray as xr
import pandas as pd
import netCDF4 as nc
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from scipy.integrate import trapz
from scipy.stats import norm
from scipy.interpolate import interp1d
from shapely.geometry import Point
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
# Data paths
CANSWE_path = 'C:/Users/walimunige.rupasingh/OneDrive - University of Calgary/Documents/Reasearch/Snow_drought_indicator/snowdroughtindex/data/input_data/Case_study/CANSWE-CanEEN_1928-2023_v6_updated.nc'
Bow_at_Banff = 'C:/Users/walimunige.rupasingh/OneDrive - University of Calgary/Documents/Reasearch/Snow_drought_indicator/snowdroughtindex/data/input_data/Bow_at_Banff/basins_testcases.shp'
output_path = 'C:/Users/walimunige.rupasingh/OneDrive - University of Calgary/Documents/Reasearch/Snow_drought_indicator/snowdroughtindex/data/output_data/Case_study/'
P_path = 'C:/Users/walimunige.rupasingh/OneDrive - University of Calgary/Documents/Reasearch/Snow_drought_indicator/snowdroughtindex/data/input_data/Case_study/adjusted_merged_daily_precipitation.nc'
SWE_gapfilled_test = 'C:/Users/walimunige.rupasingh/OneDrive - University of Calgary/Documents/Reasearch/Snow_drought_indicator/snowdroughtindex/data/input_data/Case_study/Test/SWE_1979_2022_gapfilled_basin05BB001_Bow.nc'

In [4]:
# Load data
CANSWE = xr.open_dataset(CANSWE_path)
P = xr.open_dataset(P_path)

# Create a dataframe to store the data
CANSWE_df = CANSWE.to_dataframe()
P_df = P.to_dataframe()


print(CANSWE_df.head())
print(P_df.head())

                              lat         lon  elevation               source  \
station_id  time                                                                
ALE-05BA801 1928-01-01  51.416668 -116.183334     1580.0  Alberta Environment   
            1928-01-02  51.416668 -116.183334     1580.0  Alberta Environment   
            1928-01-03  51.416668 -116.183334     1580.0  Alberta Environment   
            1928-01-04  51.416668 -116.183334     1580.0  Alberta Environment   
            1928-01-05  51.416668 -116.183334     1580.0  Alberta Environment   

                       station_name station_name_sec station_name_ter  \
station_id  time                                                        
ALE-05BA801 1928-01-01    BOW RIVER        BOW RIVER        BOW RIVER   
            1928-01-02    BOW RIVER        BOW RIVER        BOW RIVER   
            1928-01-03    BOW RIVER        BOW RIVER        BOW RIVER   
            1928-01-04    BOW RIVER        BOW RIVER        BOW RIV

In [5]:
display(P_df)

Unnamed: 0_level_0,coordinate_id,lon,lat,daily_precipitation
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1980-01-01,1.0,-115.625092,50.972343,0.254825
1980-01-01,2.0,-115.673279,51.057087,0.236767
1980-01-01,3.0,-115.856125,51.111439,0.303147
1980-01-01,4.0,-115.721619,51.141815,0.208841
1980-01-01,5.0,-115.586929,51.172028,0.096979
...,...,...,...,...
2024-10-29,56.0,-116.076141,51.573727,0.148780
2024-10-29,58.0,-116.398376,51.596443,0.148697
2024-10-29,60.0,-116.262238,51.627586,0.159498
2024-10-29,62.0,-116.125946,51.658566,0.224539


In [6]:
# Load the shapefile
Bow_at_Banff_shapefile = gpd.read_file(Bow_at_Banff)
Bow_at_Banff_shapefile = Bow_at_Banff_shapefile.to_crs(epsg=4326) # convert to WGS84
Bow_shapefile = Bow_at_Banff_shapefile[Bow_at_Banff_shapefile["Station_Na"]=="BOW RIVER AT BANFF"]

In [7]:
display(Bow_shapefile)

Unnamed: 0,Station_ID,Station_Na,Area_km2,Source,geometry
0,05BB001,BOW RIVER AT BANFF,2210.56,WSC_HYDAT,"POLYGON ((-115.83512 51.07677, -115.83524 51.0..."


# FROSTBYTE gap filling of SWE data

In [8]:
# Set user-specified variables
flag_buffer_default, buffer_km_default = 0, 0 # buffer flag (0: no buffer around test basin, 1: buffer of value buffer_default around test basin) and buffer default value in km to be applied if flag = 1
month_start_water_year_default, day_start_water_year_default = 10, 1  # water year start
month_end_water_year_default, day_end_water_year_default = 9, 30  # water year end
min_obs_corr_default = 3 # the minimum number of overlapping observations required to calculate the correlation between 2 stations
min_obs_cdf_default = 10 # the minimum number of observations required to calculate a station's cdf
min_corr_default = 0.6 # the minimum correlation value required for donor stations to be selected
window_days_default = 7 # the number of days used on either side of the infilling date for gap filling calculations
min_obs_KGE_default = 3 # the minimum number of observations required to calculate the KGE''
max_gap_days_default = 15  # max. number of days for gaps allowed in the daily SWE data for the linear interpolation
artificial_gap_perc_default = 100 # the percentage of observations to remove during the artificial gap filling for each station & month's first day
iterations_default = 1 # the number of times we repeat the artificial gap filling
artificial_gap_filling_flag = 0 # indicates whether artificial gap filling is performed (1) or not (0)
artificial_gap_filling_basins = ['05BB001'] # a list of the basin(s) to run the gap filling for. To include all basins simply write 'all'

In [9]:
# Import required functions
sys.path.append('C:/Users/walimunige.rupasingh/OneDrive - University of Calgary/Documents/Reasearch/Snow_drought_indicator/snowdroughtindex/notebooks/')  # Replace with the actual path to your functions module
from functions import extract_stations_in_basin, stations_basin_map, data_availability_monthly_plots_1, data_availability_monthly_plots_2, qm_gap_filling, artificial_gap_filling, plots_artificial_gap_evaluation

----------
 WORKFLOW 
----------

In [10]:
# Re-organize the dataset as needed
SWE_stations_ds = CANSWE.assign_coords({'lon':CANSWE.lon, 'lat':CANSWE.lat, 'station_name':CANSWE.station_name, 'elevation':CANSWE.elevation}).snw
SWE_stations_ds = SWE_stations_ds.to_dataset()

display(SWE_stations_ds)

In [11]:
# Extract unique station coordinates
unique_stations = CANSWE_df.reset_index().drop_duplicates(subset='station_id')[['station_id', 'lon', 'lat']]

# Convert SWE stations DataArray to GeoDataFrame for further analysis
data = {'station_id': unique_stations['station_id'].values, 
        'lon': unique_stations['lon'].values, 
        'lat': unique_stations['lat'].values} 
df = pd.DataFrame(data)
geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])]
crs = "EPSG:4326"
SWE_stations_gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)

display(SWE_stations_gdf)

Unnamed: 0,station_id,lon,lat,geometry
0,ALE-05BA801,-116.183334,51.416668,POINT (-116.18333 51.41667)
1,ALE-05BA802,-116.166664,51.433334,POINT (-116.16666 51.43333)
2,ALE-05BA806,-116.23333,51.416668,POINT (-116.23333 51.41667)
3,ALE-05BA808,-116.216667,51.416668,POINT (-116.21667 51.41667)
4,ALE-05BA810,-116.099998,51.466667,POINT (-116.10000 51.46667)
5,ALE-05BA811,-116.466667,51.700001,POINT (-116.46667 51.70000)
6,ALE-05BA812,-116.216667,51.316666,POINT (-116.21667 51.31667)
7,ALE-05BA813,-116.478752,51.709,POINT (-116.47875 51.70900)
8,ALE-05BA814,-116.383331,51.683334,POINT (-116.38333 51.68333)
9,ALE-05BB803,-115.783333,51.083332,POINT (-115.78333 51.08333)


In [12]:
# Convert test basin SWE data DataSet to Pandas DataFrame for further analysis
SWE_testbasin = SWE_stations_ds.to_dataframe().drop(columns=['lon','lat','station_name']).unstack()['snw'].T

# Remove time from dates
SWE_testbasin['date'] = SWE_testbasin.index.normalize()
SWE_testbasin = SWE_testbasin.set_index('date')

# Drop the dates with no data at all across all stations
SWE_testbasin = SWE_testbasin.dropna(axis=0, how='all')

# Choose data for the period of interest 1980 - 2023
SWE_testbasin = SWE_testbasin.loc['1980-01-01':'2023-07-31']

display(SWE_testbasin)

station_id,ALE-05BA801,ALE-05BA802,ALE-05BA806,ALE-05BA808,ALE-05BA810,ALE-05BA811,ALE-05BA812,ALE-05BA813,ALE-05BA814,ALE-05BB803,ALE-05BB803P,ALE-05CA805,ALE-05CA805P,SCD-AL003,SCD-AL055,SCD-AL091,SCD-AL142,SCD-AL148
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1980-01-01,,,,,,,,,,,,,,25.0,,,,
1980-01-02,,,,,,,,,,371.0,,,,,,,,
1980-01-08,,,,,,,,,,,,,,27.0,,,,
1980-01-15,,,,,,,,,,,,,,23.0,,,,
1980-01-23,,,,,,,,,,,,,,37.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-07-27,,,,,,,,,,,,,3.35013,,,,,
2023-07-28,,,,,,,,,,,,,3.28664,,,,,
2023-07-29,,,,,,,,,,,,,3.10140,,,,,
2023-07-30,,,,,,,,,,,,,3.15966,,,,,


In [13]:
# Re-organize the RDRS dataset as needed
P_stations_ds = P.assign_coords({'lon':P.lon, 'lat':P.lat, 'station_name':P.coordinate_id}).daily_precipitation
P_stations_ds = P_stations_ds.to_dataset()

display(P_stations_ds)

In [14]:

# Convert RDRS DataSet to GeoDataFrame for further analysis
data = {'lon': P['lon'].values.flatten(), 
        'lat': P['lat'].values.flatten()} 
P_df = pd.DataFrame(data)
geometry = [Point(xy) for xy in zip(P_df['lon'], P_df['lat'])]
crs = "EPSG:4326"
P_gdf = gpd.GeoDataFrame(P_df, crs=crs, geometry=geometry)

display(P_gdf)

# Plot the stations on the map
fig, ax = plt.subplots(figsize=(10, 10))
Bow_shapefile.plot(ax=ax, color='lightgrey')
SWE_stations_gdf.plot(ax=ax, color='red', markersize=5)
P_gdf.plot(ax=ax, color='orange', markersize=5)
plt.legend(['CANSWE stations', 'RDRS stations', 'CAPA stations'])
plt.savefig(output_path + 'SWE-p_stations.png')
plt.show()




Unnamed: 0,lon,lat,geometry
0,-115.625092,50.972343,POINT (-115.62509 50.97234)
1,-115.673279,51.057087,POINT (-115.67328 51.05709)
2,-115.856125,51.111439,POINT (-115.85612 51.11144)
3,-115.721619,51.141815,POINT (-115.72162 51.14182)
4,-115.586929,51.172028,POINT (-115.58693 51.17203)
...,...,...,...
343990,-116.076141,51.573727,POINT (-116.07614 51.57373)
343991,-116.398376,51.596443,POINT (-116.39838 51.59644)
343992,-116.262238,51.627586,POINT (-116.26224 51.62759)
343993,-116.125946,51.658566,POINT (-116.12595 51.65857)


  plt.show()


In [15]:
# Ensure 'coordinate_id', 'lat', 'lon', 'daily_precipitation' columns are present
if not all(col in P_df.columns for col in ['coordinate_id', 'lat', 'lon', 'daily_precipitation']):
	P_df = P[['coordinate_id', 'time', 'lat', 'lon', 'daily_precipitation']].to_dataframe().reset_index()

# rearrange data in P_df to get daily precipitation for each coordinate_id. Columns: 'coordinate_id', 'time', 1, 2, 3, ... so that each column represents a station
P_df = P_df[['coordinate_id', 'time', 'daily_precipitation']]
P_df.set_index(['coordinate_id', 'time'], inplace=True)
P_df = P_df.unstack(level=0)
P_df.columns = P_df.columns.droplevel()
P_df = P_df.reset_index()

# rearrange same as SWE_testbasin
P_df['date'] = P_df['time'].dt.normalize()
P_df = P_df.set_index('date')
P_df = P_df.drop(columns=['time'])



display(P_df)

coordinate_id,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,...,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0,64.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1980-01-01,0.254825,0.236767,0.303147,0.208841,0.096979,0.228869,0.183144,0.120171,0.185813,0.156444,...,,,,,,,,,,
1980-01-02,0.362730,0.405467,0.628346,0.461459,0.284733,0.693539,0.515130,0.402690,0.424519,0.364532,...,,,,,,,,,,
1980-01-03,1.083391,0.715090,1.133855,0.600795,0.259677,1.707479,1.113470,0.611129,2.780802,1.885300,...,,,,,,,,,,
1980-01-04,0.093789,0.127141,0.222424,0.192770,0.169092,0.296335,0.279333,0.196952,0.459932,0.349326,...,,,,,,,,,,
1980-01-05,0.339676,0.335644,0.263054,0.321718,0.357347,0.264432,0.291121,0.336791,0.284382,0.253953,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-25,,,,,,,,,,,...,,0.003145,,0.002656,,0.004027,,0.004573,,0.002644
2024-10-26,,,,,,,,,,,...,,0.000000,,0.000000,,0.000000,,0.000000,,0.000000
2024-10-27,,,,,,,,,,,...,,0.863121,,3.044768,,2.505789,,1.741460,,3.411551
2024-10-28,,,,,,,,,,,...,,1.047908,,0.457279,,0.381425,,0.285352,,0.914088


In [16]:
# Calculate the water year cumulative precipitation for all stations within the test basin
    
# Add water years as an extra column to the P observations Pandas DataFrame
year = []
for i in P_df.index:
    if (i.month == month_start_water_year_default and i.day >= day_start_water_year_default) or (i.month > month_start_water_year_default):
        year.append(i.year + 1)
    else:
        year.append(i.year)
P_df['water_year'] = year

# Calculate the cumulative precipitation
elem = -1
for y in list(set(P_df['water_year'])):
    elem += 1
    P_df_water_year = P_df[P_df['water_year'] == y]
    P_df_water_year_cumul = P_df_water_year.cumsum().drop(['water_year'], axis=1)
    if elem == 0:
        P_df_cumul_testbasin = P_df_water_year_cumul
    else:
        P_df_cumul_testbasin = pd.concat([P_df_cumul_testbasin, P_df_water_year_cumul])

display(P_df_cumul_testbasin)

coordinate_id,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,...,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0,64.0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1980-01-01,0.254825,0.236767,0.303147,0.208841,0.096979,0.228869,0.183144,0.120171,0.185813,0.156444,...,,,,,,,,,,
1980-01-02,0.617556,0.642234,0.931493,0.670301,0.381712,0.922408,0.698274,0.522861,0.610332,0.520976,...,,,,,,,,,,
1980-01-03,1.700946,1.357324,2.065349,1.271096,0.641389,2.629887,1.811744,1.133990,3.391135,2.406276,...,,,,,,,,,,
1980-01-04,1.794735,1.484465,2.287773,1.463866,0.810481,2.926222,2.091077,1.330942,3.851067,2.755602,...,,,,,,,,,,
1980-01-05,2.134411,1.820110,2.550827,1.785585,1.167828,3.190654,2.382198,1.667733,4.135449,3.009555,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-25,,,,,,,,,,,...,,17.140692,,20.270903,,18.628828,,14.464213,,18.428415
2024-10-26,,,,,,,,,,,...,,17.140692,,20.270903,,18.628828,,14.464213,,18.428415
2024-10-27,,,,,,,,,,,...,,18.003813,,23.315670,,21.134617,,16.205673,,21.839966
2024-10-28,,,,,,,,,,,...,,19.051722,,23.772949,,21.516043,,16.491026,,22.754053


In [17]:
# Print column names to check if '1' exists
print(P_df_cumul_testbasin.columns)

# Visually check that this is right for a time range
plt.figure(figsize=(10,5))
plt.plot(P_df_cumul_testbasin[1]['1981-09-01':'1983-10-02'], marker='o', color='b')
#ylim = plt.ylim(0)
plt.savefig(output_path + 'P_cum_year.png')
plt.show()

Index([ 1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,  9.0, 10.0, 11.0, 12.0,
       13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0,
       25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0,
       37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0,
       49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
       61.0, 62.0, 63.0, 64.0],
      dtype='object', name='coordinate_id')


  plt.show()


In [18]:
# rename index names to precip_1, precip_2, ...
P_df_cumul_testbasin.columns = ['precip_' + str(i) for i in range(1, len(P_df_cumul_testbasin.columns) + 1)]

display(P_df_cumul_testbasin)

Unnamed: 0_level_0,precip_1,precip_2,precip_3,precip_4,precip_5,precip_6,precip_7,precip_8,precip_9,precip_10,...,precip_55,precip_56,precip_57,precip_58,precip_59,precip_60,precip_61,precip_62,precip_63,precip_64
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1980-01-01,0.254825,0.236767,0.303147,0.208841,0.096979,0.228869,0.183144,0.120171,0.185813,0.156444,...,,,,,,,,,,
1980-01-02,0.617556,0.642234,0.931493,0.670301,0.381712,0.922408,0.698274,0.522861,0.610332,0.520976,...,,,,,,,,,,
1980-01-03,1.700946,1.357324,2.065349,1.271096,0.641389,2.629887,1.811744,1.133990,3.391135,2.406276,...,,,,,,,,,,
1980-01-04,1.794735,1.484465,2.287773,1.463866,0.810481,2.926222,2.091077,1.330942,3.851067,2.755602,...,,,,,,,,,,
1980-01-05,2.134411,1.820110,2.550827,1.785585,1.167828,3.190654,2.382198,1.667733,4.135449,3.009555,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-25,,,,,,,,,,,...,,17.140692,,20.270903,,18.628828,,14.464213,,18.428415
2024-10-26,,,,,,,,,,,...,,17.140692,,20.270903,,18.628828,,14.464213,,18.428415
2024-10-27,,,,,,,,,,,...,,18.003813,,23.315670,,21.134617,,16.205673,,21.839966
2024-10-28,,,,,,,,,,,...,,19.051722,,23.772949,,21.516043,,16.491026,,22.754053


In [19]:
# Combine SWE & P test basin observations in a single Pandas DataFrame
if P_gdf.empty or P_df.empty:
    SWE_P_testbasin = SWE_testbasin.copy()
else:
    SWE_P_testbasin = SWE_testbasin.merge(P_df_cumul_testbasin, left_index=True, right_index=True, how='outer')

# select dates 1980-01-01 to 2023-07-31
SWE_P_testbasin = SWE_P_testbasin.loc['1980-01-01':'2023-07-31']
    
display(SWE_P_testbasin)

Unnamed: 0_level_0,ALE-05BA801,ALE-05BA802,ALE-05BA806,ALE-05BA808,ALE-05BA810,ALE-05BA811,ALE-05BA812,ALE-05BA813,ALE-05BA814,ALE-05BB803,...,precip_55,precip_56,precip_57,precip_58,precip_59,precip_60,precip_61,precip_62,precip_63,precip_64
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1980-01-01,,,,,,,,,,,...,,,,,,,,,,
1980-01-02,,,,,,,,,,371.0,...,,,,,,,,,,
1980-01-03,,,,,,,,,,,...,,,,,,,,,,
1980-01-04,,,,,,,,,,,...,,,,,,,,,,
1980-01-05,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-07-27,,,,,,,,,,,...,137.357147,,126.171707,,115.995697,,148.365356,,129.099686,
2023-07-28,,,,,,,,,,,...,137.547897,,126.432991,,116.335579,,148.590591,,129.405060,
2023-07-29,,,,,,,,,,,...,137.567062,,126.453629,,116.356216,,148.594559,,129.418488,
2023-07-30,,,,,,,,,,,...,137.625397,,126.453629,,116.356216,,148.594559,,129.418488,


In [20]:
# Linear interpolation to fill in small data gaps
SWE_obs_basin_interp_da = SWE_stations_ds.snw.interpolate_na(method='linear', dim='time', max_gap=datetime.timedelta(days=max_gap_days_default))
SWE_obs_basin_interp_df = SWE_obs_basin_interp_da.to_dataframe().drop(columns=['lon','lat','station_name']).unstack()['snw'].T
SWE_obs_basin_interp_df['date'] =  SWE_obs_basin_interp_df.index.normalize()
SWE_obs_basin_interp_df = SWE_obs_basin_interp_df.set_index('date')

In [21]:
# Save flags for linear interpolation to a new Pandas dataframe (observations = 0; estimates = 1)
flags_interp_basin_da = SWE_obs_basin_interp_da.copy().fillna(-999)
original_da = SWE_stations_ds.snw.copy().fillna(-999)
flags_interp_basin_da = xr.where(flags_interp_basin_da==original_da, 0, 1)
flags_interp_basin_df = flags_interp_basin_da.to_dataframe().drop(columns=['lon','lat','station_name']).unstack()['snw'].T
flags_interp_basin_df['date'] =  flags_interp_basin_df.index.normalize()
flags_interp_basin_df = flags_interp_basin_df.set_index('date')

In [22]:
# Perform gap filling for the original dataset (no artificial gaps, flag=0)
# Note: this may take quite a while to run, depending on the number of gaps to fill
SWE_obs_basin_gapfilled_df, flags_gapfill_basin_df, donor_stations_gapfill_basin_df = qm_gap_filling(SWE_P_testbasin.copy(), window_days=window_days_default, min_obs_corr=min_obs_corr_default, min_obs_cdf=min_obs_cdf_default, min_corr=min_corr_default)

In [23]:
# Combine gapfill and linear interpolation flags into a single Pandas dataframe
flags_basin_df = flags_interp_basin_df + flags_gapfill_basin_df

In [24]:
# Save gap filled dataset and metadata to single dataset
SWE_gapfill_basin_da = xr.DataArray(data=SWE_obs_basin_gapfilled_df.values, coords=dict(time=SWE_obs_basin_gapfilled_df.index.values, station_id=SWE_obs_basin_gapfilled_df.columns.values), dims=['time','station_id'], name='SWE', attrs={'long_name':'Surface snow water equivalent','units':'kg m**-2'})
flags_basin_da = xr.DataArray(data=flags_basin_df.values, coords=dict(time=flags_basin_df.index.values, station_id=flags_basin_df.columns.values), dims=['time','station_id'], name='flag', attrs={'description':'observations = 0; estimates = 1'})
donor_stations_gapfill_basin_da = xr.DataArray(data=donor_stations_gapfill_basin_df.values, coords=dict(time=donor_stations_gapfill_basin_df.index.values, station_id=donor_stations_gapfill_basin_df.columns.values), dims=['time','station_id'], name='donor_stations', attrs={'description':'station_id of donor stations used for gap filling'})
SWE_obs_basin_gapfilled_ds = xr.merge([SWE_gapfill_basin_da, flags_basin_da, donor_stations_gapfill_basin_da])
lats = SWE_stations_ds.lat.sel(station_id=SWE_obs_basin_gapfilled_df.columns.values).isel(time=0).values
lons = SWE_stations_ds.lon.sel(station_id=SWE_obs_basin_gapfilled_df.columns.values).isel(time=0).values
names = SWE_stations_ds.station_name.sel(station_id=SWE_obs_basin_gapfilled_df.columns.values).isel(time=0).values
SWE_obs_basin_gapfilled_ds = SWE_obs_basin_gapfilled_ds.assign_coords({'lat':('station_id',lats),'lon':('station_id',lons),'station_name':('station_id',names)})
SWE_obs_basin_gapfilled_ds.attrs['info'] = 'Gap filled SWE data for basin. The gap filling was done using a combination of linear interpolation and quantile mapping, using donor data from neighbouring SWE and P stations in the same basin.'


In [25]:
# Plot bar chart of the number of times each donor station was used for gap filling
count = []
for s in SWE_P_testbasin.columns.values:
    count_s = SWE_obs_basin_gapfilled_ds.donor_stations.where(SWE_obs_basin_gapfilled_ds.donor_stations==s).count().data
    count.append(count_s)
fig = plt.figure()
plt.bar(SWE_P_testbasin.columns.values, count, color='b')
plt.xticks(rotation=90)
# xlabes are too many. so add them as a legend and x label as 1,2,3... according to the number of stations
plt.legend(SWE_P_testbasin.columns.values)
plt.xlabel('Donor stations')
plt.ylabel('# times used for gap filling')
plt.savefig(output_path+'donor_stations_gapfilling_in_basin.png', dpi=300, bbox_inches='tight')
plt.close(fig)

# Plot timeseries of the % of SWE stations with data in the test basin on the first day of each month, for the original data & after gap filling (flag=1)
fig = data_availability_monthly_plots_1(SWE_stations_ds, SWE_stations_ds.snw, SWE_obs_basin_gapfilled_ds.SWE, flag=1)
plt.xlim(1978,2024)
plt.show
plt.savefig(output_path+'SWEobs_monthly_availability_1_gapfilling_in_basin.png', dpi=300)
plt.close(fig)

In [26]:
# Plot the first SWE station in the dataset to visually check quantile mapping results
plt.figure(figsize=(20,5))
SWE_obs_basin_gapfilled_ds.SWE.isel(station_id=0).plot(color='r', marker='o', ms=5, label='after gap filling', lw=0)
SWE_P_testbasin.iloc[:,0].plot(color='k', marker='o', ms=5, label='original data', lw=0)
plt.title(SWE_P_testbasin.iloc[:,0].name)
plt.xlabel('')
plt.ylabel('SWE [mm]')
#legend in single line outside the plot bottom right
plt.legend(loc='lower right', bbox_to_anchor=(1, 0), ncol=1)
plt.savefig(output_path+'SWE_station_1_gapfilling_in_basin.png', dpi=300)
plt.show()

  plt.show()


In [29]:
# Perform artificial gap filling evaluation and plot the results (flag=1)
# Note: this may take quite a while to run, depending on the number of gaps to fill
pd.set_option("mode.chained_assignment", None) # suppresses the "SettingWithCopyWarning"

evaluation_artificial_gapfill_testbasin_dict, fig = artificial_gap_filling(SWE_P_testbasin.copy(), iterations=iterations_default, artificial_gap_perc=artificial_gap_perc_default, window_days=window_days_default, min_obs_corr=min_obs_corr_default, min_obs_cdf=min_obs_cdf_default, min_corr=min_corr_default, min_obs_KGE=min_obs_KGE_default, flag=1)

In [30]:
plt.savefig(output_path+'SWE_artificial_gapfilling_in_basin.png', dpi=300)
plt.close(fig)

In [31]:
# Plot artificial gap filling evaluation results
fig = plots_artificial_gap_evaluation(evaluation_artificial_gapfill_testbasin_dict)
plt.savefig(output_path+'SWE_artificial_gapfilling_eval_in_basin.png', dpi=300)
plt.close(fig)

In [27]:
# Artificial gap filling & eveluation  #
# -------------------------------------#

if artificial_gap_filling_flag == 1:

     if (artificial_gap_filling_basins == 'all') :

        print('Performing an artificial gap filling in basin ')

        # Perform artificial gap filling evaluation
        # Note: if flag=1 we also output a figure of the artificial gap filling values against the observed values for visual comparison
        evaluation_artificial_gapfill_basin, fig = artificial_gap_filling(SWE_P_testbasin.copy(), iterations=iterations_default, artificial_gap_perc=artificial_gap_perc_default, window_days=window_days_default, min_obs_corr=min_obs_corr_default, min_obs_cdf=min_obs_cdf_default, min_corr=min_corr_default, min_obs_KGE=min_obs_KGE_default, flag=1)
        plt.savefig(output_path+'SWE_artificial_gapfilling_in_basin.png', dpi=300)
        plt.close(fig)

        # Plot artificial gap filling evaluation results
        fig = plots_artificial_gap_evaluation(evaluation_artificial_gapfill_basin)
        plt.savefig(output_path+'SWE_artificial_gapfilling_eval_in_basin.png', dpi=300)
        plt.close(fig)

In [56]:
# Save basin gap filled SWE data to netcdf
SWE_obs_basin_gapfilled_ds.to_netcdf(output_path+'SWE_gapfilled_for_basin_new.nc', format="NETCDF4")

------------------------
# Calculation of SSWEI #

In [224]:
gap_filled_SWE = 'C:/Users/walimunige.rupasingh/OneDrive - University of Calgary/Documents/Reasearch/Snow_drought_indicator/snowdroughtindex/data/output_data/Case_study/SWE_gapfilled_for_basin_new.nc'

In [238]:
# Load the gap filled SWE data and creata dataframes
SWE_gap_filled = xr.open_dataset(gap_filled_SWE)
SWE_gap_filled_df = SWE_gap_filled.to_dataframe()

# select dates 1980-01-01 to 2023-07-31
SWE_gap_filled_df = SWE_gap_filled_df.loc['1980-01-01':'2023-07-31']

display(SWE_gap_filled_df)


Unnamed: 0_level_0,Unnamed: 1_level_0,SWE,flag,donor_stations,lat,lon,station_name
time,station_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1980-01-01,ALE-05BA801,,0.0,,51.416668,-116.183334,BOW RIVER
1980-01-01,ALE-05BA802,,0.0,,51.433334,-116.166664,PIPESTONE UPPER
1980-01-01,ALE-05BA806,,0.0,,51.416668,-116.233330,MIRROR LAKE
1980-01-01,ALE-05BA808,,0.0,,51.416668,-116.216667,CHATEAU LAWN
1980-01-01,ALE-05BA810,,0.0,,51.466667,-116.099998,PTARMIGAN HUT
...,...,...,...,...,...,...,...
2023-07-31,SCD-AL003,,0.0,,51.183334,-115.566666,BANFF
2023-07-31,SCD-AL055,,0.0,,51.200001,-115.599998,FORTY MILE CREEK
2023-07-31,SCD-AL091,,0.0,,51.416668,-116.233330,LAKE LOUISE WEST
2023-07-31,SCD-AL142,,0.0,,51.266666,-115.916664,MOUNT EISENHOWER


In [239]:
# add elevation column to the dataframe according to the station_id from the original CANSWE dataset
elevation = CANSWE_df.reset_index().drop_duplicates(subset='station_id')[['station_id', 'elevation']]
elevation = elevation.set_index('station_id')
SWE_gap_filled_df = SWE_gap_filled_df.join(elevation, on='station_id', rsuffix='_original')

# replace NaN values with 0
SWE_gap_filled_df = SWE_gap_filled_df.fillna(0)
display(SWE_gap_filled_df)



Unnamed: 0_level_0,Unnamed: 1_level_0,SWE,flag,donor_stations,lat,lon,station_name,elevation
time,station_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1980-01-01,ALE-05BA801,0.0,0.0,,51.416668,-116.183334,BOW RIVER,1580.0
1980-01-01,ALE-05BA802,0.0,0.0,,51.433334,-116.166664,PIPESTONE UPPER,1615.0
1980-01-01,ALE-05BA806,0.0,0.0,,51.416668,-116.233330,MIRROR LAKE,2030.0
1980-01-01,ALE-05BA808,0.0,0.0,,51.416668,-116.216667,CHATEAU LAWN,1740.0
1980-01-01,ALE-05BA810,0.0,0.0,,51.466667,-116.099998,PTARMIGAN HUT,2190.0
...,...,...,...,...,...,...,...,...
2023-07-31,SCD-AL003,0.0,0.0,,51.183334,-115.566666,BANFF,1397.0
2023-07-31,SCD-AL055,0.0,0.0,,51.200001,-115.599998,FORTY MILE CREEK,1677.0
2023-07-31,SCD-AL091,0.0,0.0,,51.416668,-116.233330,LAKE LOUISE WEST,1768.0
2023-07-31,SCD-AL142,0.0,0.0,,51.266666,-115.916664,MOUNT EISENHOWER,1524.0


In [254]:
# mean SWE for each day 
SWE_gap_filled_daily = SWE_gap_filled_df.groupby('time').mean()

#keep only time and SWE columns
SWE_gap_filled_daily = SWE_gap_filled_daily.drop(columns=['elevation','flag','lat','lon'])


display(SWE_gap_filled_daily)

Unnamed: 0_level_0,SWE
time,Unnamed: 1_level_1
1980-01-01,19.393889
1980-01-02,53.308889
1980-01-03,38.850556
1980-01-04,39.091667
1980-01-05,33.162778
...,...
2023-07-27,0.380007
2023-07-28,0.388147
2023-07-29,0.367856
2023-07-30,0.382203


In [233]:
# save the daily SWE data to a csv file
SWE_gap_filled_daily.to_csv(output_path + 'SWE_gap_filled_daily.csv')

In [132]:
# divide the data into 2 categories based on elevation
bins = [1300, 2000, 2400]
labels = ['low', 'high']
SWE_gap_filled_df['elevation_category'] = pd.cut(SWE_gap_filled_df['elevation'], bins=bins, labels=labels, right=False)

display(SWE_gap_filled_df)

# show number of stations in each category
SWE_gap_filled_df['elevation_category'].value_counts()


Unnamed: 0_level_0,Unnamed: 1_level_0,SWE,flag,donor_stations,lat,lon,station_name,elevation,elevation_category
time,station_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1980-01-01,ALE-05BA801,,0.0,,51.416668,-116.183334,BOW RIVER,1580.0,low
1980-01-01,ALE-05BA802,,0.0,,51.433334,-116.166664,PIPESTONE UPPER,1615.0,low
1980-01-01,ALE-05BA806,,0.0,,51.416668,-116.233330,MIRROR LAKE,2030.0,high
1980-01-01,ALE-05BA808,,0.0,,51.416668,-116.216667,CHATEAU LAWN,1740.0,low
1980-01-01,ALE-05BA810,,0.0,,51.466667,-116.099998,PTARMIGAN HUT,2190.0,high
...,...,...,...,...,...,...,...,...,...
2023-07-31,SCD-AL003,,0.0,,51.183334,-115.566666,BANFF,1397.0,low
2023-07-31,SCD-AL055,,0.0,,51.200001,-115.599998,FORTY MILE CREEK,1677.0,low
2023-07-31,SCD-AL091,,0.0,,51.416668,-116.233330,LAKE LOUISE WEST,1768.0,low
2023-07-31,SCD-AL142,,0.0,,51.266666,-115.916664,MOUNT EISENHOWER,1524.0,low


high    159180
low     127344
Name: elevation_category, dtype: int64

In [133]:
# monthly average SWE for each category from 1991-2020
SWE_gap_filled_df = SWE_gap_filled_df.reset_index()
SWE_gap_filled_df['year'] = SWE_gap_filled_df['time'].dt.year
SWE_gap_filled_df['month'] = SWE_gap_filled_df['time'].dt.month
SWE_gap_filled_df['day'] = SWE_gap_filled_df['time'].dt.day
SWE_gap_filled_df['date'] = SWE_gap_filled_df['time'].dt.normalize()
SWE_gap_filled_df = SWE_gap_filled_df.set_index('date')

# select dates 1991-01-01 to 2020-12-31
SWE_gap_filled_clim = SWE_gap_filled_df.loc['1991-01-01':'2020-12-31']

# calculate monthly average SWE for each category
monthly_avg_SWE = SWE_gap_filled_clim.groupby(['elevation_category', 'year', 'month']).mean()

display(monthly_avg_SWE)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SWE,flag,lat,lon,elevation,day
elevation_category,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
low,1991,1,160.140196,0.193548,51.345834,-116.008332,1618.375000,16.0
low,1991,2,219.303571,0.299107,51.345834,-116.008332,1618.375000,14.5
low,1991,3,296.239365,0.241935,51.345834,-116.008332,1618.375000,16.0
low,1991,4,331.364783,0.095833,51.345834,-116.008332,1618.375000,15.5
low,1991,5,,0.000000,51.345834,-116.008332,1618.375000,16.0
...,...,...,...,...,...,...,...,...
high,2020,8,4.506036,0.083871,51.453289,-116.155180,2164.100098,16.0
high,2020,9,4.477765,0.050000,51.453289,-116.155180,2164.100098,15.5
high,2020,10,32.324252,0.000000,51.453289,-116.155180,2164.100098,16.0
high,2020,11,142.175546,0.066667,51.453289,-116.155180,2164.100098,15.5


In [134]:
# get 30 year average SWE for each category
monthly_avg_SWE = monthly_avg_SWE.reset_index()
monthly_avg_SWE = monthly_avg_SWE.set_index('month')
monthly_avg_SWE = monthly_avg_SWE.groupby(['elevation_category', 'month']).mean()

display(monthly_avg_SWE)

# get average SWE for each category
avg_SWE = SWE_gap_filled_clim.groupby(['elevation_category']).mean()

display(avg_SWE)



Unnamed: 0_level_0,Unnamed: 1_level_0,year,SWE,flag,lat,lon,elevation,day
elevation_category,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
low,1,2005.5,111.676523,0.200403,51.345834,-116.008332,1618.375,16.0
low,2,2005.5,158.25483,0.302355,51.345834,-116.008332,1618.375,14.633333
low,3,2005.5,221.831091,0.239919,51.345834,-116.008332,1618.375,16.0
low,4,2005.5,235.864937,0.089583,51.345834,-116.008332,1618.375,15.5
low,5,2005.5,,0.0,51.345834,-116.008332,1618.375,16.0
low,6,2005.5,,0.0,51.345834,-116.008332,1618.375,15.5
low,7,2005.5,,0.0,51.345834,-116.008332,1618.375,16.0
low,8,2005.5,,0.0,51.345834,-116.008332,1618.375,16.0
low,9,2005.5,,0.0,51.345834,-116.008332,1618.375,15.5
low,10,2005.5,,0.0,51.345834,-116.008332,1618.375,16.0


Unnamed: 0_level_0,SWE,flag,lat,lon,elevation,year,month,day
elevation_category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
low,172.452069,0.069401,51.345834,-116.008332,1618.375122,2005.500365,6.522723,15.730243
high,293.432875,0.202993,51.453289,-116.15518,2164.100098,2005.500365,6.522723,15.730243


In [44]:
# save the data to a csv file
SWE_gap_filled_df.to_csv(output_path + 'SWE_gap_filled_with_elevation.csv')

In [135]:
# make 2 dateframes for each category
SWE_low_elev = SWE_gap_filled_df[SWE_gap_filled_df['elevation_category'] == 'low']
SWE_high_elev = SWE_gap_filled_df[SWE_gap_filled_df['elevation_category'] == 'high']

display(SWE_low_elev)
display(SWE_high_elev)

Unnamed: 0_level_0,time,station_id,SWE,flag,donor_stations,lat,lon,station_name,elevation,elevation_category,year,month,day
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1980-01-01,1980-01-01,ALE-05BA801,,0.0,,51.416668,-116.183334,BOW RIVER,1580.0,low,1980,1,1
1980-01-01,1980-01-01,ALE-05BA802,,0.0,,51.433334,-116.166664,PIPESTONE UPPER,1615.0,low,1980,1,1
1980-01-01,1980-01-01,ALE-05BA808,,0.0,,51.416668,-116.216667,CHATEAU LAWN,1740.0,low,1980,1,1
1980-01-01,1980-01-01,SCD-AL003,25.0,0.0,,51.183334,-115.566666,BANFF,1397.0,low,1980,1,1
1980-01-01,1980-01-01,SCD-AL055,,0.0,,51.200001,-115.599998,FORTY MILE CREEK,1677.0,low,1980,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-07-31,2023-07-31,SCD-AL003,,0.0,,51.183334,-115.566666,BANFF,1397.0,low,2023,7,31
2023-07-31,2023-07-31,SCD-AL055,,0.0,,51.200001,-115.599998,FORTY MILE CREEK,1677.0,low,2023,7,31
2023-07-31,2023-07-31,SCD-AL091,,0.0,,51.416668,-116.233330,LAKE LOUISE WEST,1768.0,low,2023,7,31
2023-07-31,2023-07-31,SCD-AL142,,0.0,,51.266666,-115.916664,MOUNT EISENHOWER,1524.0,low,2023,7,31


Unnamed: 0_level_0,time,station_id,SWE,flag,donor_stations,lat,lon,station_name,elevation,elevation_category,year,month,day
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1980-01-01,1980-01-01,ALE-05BA806,,0.0,,51.416668,-116.233330,MIRROR LAKE,2030.0,high,1980,1,1
1980-01-01,1980-01-01,ALE-05BA810,,0.0,,51.466667,-116.099998,PTARMIGAN HUT,2190.0,high,1980,1,1
1980-01-01,1980-01-01,ALE-05BA811,,0.0,,51.700001,-116.466667,BOW SUMMIT (OLD),2080.0,high,1980,1,1
1980-01-01,1980-01-01,ALE-05BA812,,0.0,,51.316666,-116.216667,LARCH VALLEY,2230.0,high,1980,1,1
1980-01-01,1980-01-01,ALE-05BA813,,0.0,,51.709000,-116.478752,BOW SUMMIT (NEW),2031.0,high,1980,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-07-31,2023-07-31,ALE-05BA814,,0.0,,51.683334,-116.383331,KATHERINE LAKE,2380.0,high,2023,7,31
2023-07-31,2023-07-31,ALE-05BB803,,0.0,,51.083332,-115.783333,SUNSHINE VILLAGE,2230.0,high,2023,7,31
2023-07-31,2023-07-31,ALE-05BB803P,3.58967,0.0,,51.083332,-115.783333,SUNSHINE VILLAGE PILLOW,2230.0,high,2023,7,31
2023-07-31,2023-07-31,ALE-05CA805,,0.0,,51.533333,-116.050003,SKOKI MOUNTAIN,2120.0,high,2023,7,31


In [154]:
# convert to geopandas dataframe
geometry_low = [Point(xy) for xy in zip(SWE_low_elev['lon'], SWE_low_elev['lat'])]
crs = "EPSG:4326"
SWE_low_elev_gdf = gpd.GeoDataFrame(SWE_low_elev, crs=crs, geometry=geometry_low)

geometry_high = [Point(xy) for xy in zip(SWE_high_elev['lon'], SWE_high_elev['lat'])]
crs = "EPSG:4326"
SWE_high_elev_gdf = gpd.GeoDataFrame(SWE_high_elev, crs=crs, geometry=geometry_high)

display(SWE_low_elev_gdf)
display(SWE_high_elev_gdf)

Unnamed: 0_level_0,time,station_id,SWE,flag,donor_stations,lat,lon,station_name,elevation,elevation_category,year,month,day,geometry
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1980-01-01,1980-01-01,ALE-05BA801,,0.0,,51.416668,-116.183334,BOW RIVER,1580.0,low,1980,1,1,POINT (-116.18333 51.41667)
1980-01-01,1980-01-01,ALE-05BA802,,0.0,,51.433334,-116.166664,PIPESTONE UPPER,1615.0,low,1980,1,1,POINT (-116.16666 51.43333)
1980-01-01,1980-01-01,ALE-05BA808,,0.0,,51.416668,-116.216667,CHATEAU LAWN,1740.0,low,1980,1,1,POINT (-116.21667 51.41667)
1980-01-01,1980-01-01,SCD-AL003,25.0,0.0,,51.183334,-115.566666,BANFF,1397.0,low,1980,1,1,POINT (-115.56667 51.18333)
1980-01-01,1980-01-01,SCD-AL055,,0.0,,51.200001,-115.599998,FORTY MILE CREEK,1677.0,low,1980,1,1,POINT (-115.60000 51.20000)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-07-31,2023-07-31,SCD-AL003,,0.0,,51.183334,-115.566666,BANFF,1397.0,low,2023,7,31,POINT (-115.56667 51.18333)
2023-07-31,2023-07-31,SCD-AL055,,0.0,,51.200001,-115.599998,FORTY MILE CREEK,1677.0,low,2023,7,31,POINT (-115.60000 51.20000)
2023-07-31,2023-07-31,SCD-AL091,,0.0,,51.416668,-116.233330,LAKE LOUISE WEST,1768.0,low,2023,7,31,POINT (-116.23333 51.41667)
2023-07-31,2023-07-31,SCD-AL142,,0.0,,51.266666,-115.916664,MOUNT EISENHOWER,1524.0,low,2023,7,31,POINT (-115.91666 51.26667)


Unnamed: 0_level_0,time,station_id,SWE,flag,donor_stations,lat,lon,station_name,elevation,elevation_category,year,month,day,geometry
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1980-01-01,1980-01-01,ALE-05BA806,,0.0,,51.416668,-116.233330,MIRROR LAKE,2030.0,high,1980,1,1,POINT (-116.23333 51.41667)
1980-01-01,1980-01-01,ALE-05BA810,,0.0,,51.466667,-116.099998,PTARMIGAN HUT,2190.0,high,1980,1,1,POINT (-116.10000 51.46667)
1980-01-01,1980-01-01,ALE-05BA811,,0.0,,51.700001,-116.466667,BOW SUMMIT (OLD),2080.0,high,1980,1,1,POINT (-116.46667 51.70000)
1980-01-01,1980-01-01,ALE-05BA812,,0.0,,51.316666,-116.216667,LARCH VALLEY,2230.0,high,1980,1,1,POINT (-116.21667 51.31667)
1980-01-01,1980-01-01,ALE-05BA813,,0.0,,51.709000,-116.478752,BOW SUMMIT (NEW),2031.0,high,1980,1,1,POINT (-116.47875 51.70900)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-07-31,2023-07-31,ALE-05BA814,,0.0,,51.683334,-116.383331,KATHERINE LAKE,2380.0,high,2023,7,31,POINT (-116.38333 51.68333)
2023-07-31,2023-07-31,ALE-05BB803,,0.0,,51.083332,-115.783333,SUNSHINE VILLAGE,2230.0,high,2023,7,31,POINT (-115.78333 51.08333)
2023-07-31,2023-07-31,ALE-05BB803P,3.58967,0.0,,51.083332,-115.783333,SUNSHINE VILLAGE PILLOW,2230.0,high,2023,7,31,POINT (-115.78333 51.08333)
2023-07-31,2023-07-31,ALE-05CA805,,0.0,,51.533333,-116.050003,SKOKI MOUNTAIN,2120.0,high,2023,7,31,POINT (-116.05000 51.53333)


In [162]:
# Plot the stations on the map
fig, ax = plt.subplots(figsize=(10, 10))
Bow_shapefile.plot(ax=ax, color='lightgrey')
SWE_low_elev_gdf.plot(ax=ax, color='red', markersize=5)
SWE_high_elev_gdf.plot(ax=ax, color='blue', markersize=5)
plt.legend(['Low elevation stations', 'High elevation stations'])
plt.savefig(output_path + 'SWE_low_high_elev_stations.png')
plt.show()

  plt.show()


In [166]:
# Ensure the shapefile and GeoDataFrames have the same CRS
Bow_shapefile = Bow_shapefile.to_crs(crs)
SWE_low_elev_gdf = SWE_low_elev_gdf.to_crs(crs)
SWE_high_elev_gdf = SWE_high_elev_gdf.to_crs(crs)

# Perform spatial overlay to get the intersection of the shapefile with low elevation GeoDataFrame
low_elev_shapefile = gpd.overlay(Bow_shapefile, SWE_low_elev_gdf, how='union')

# Perform spatial overlay to get the intersection of the shapefile with high elevation GeoDataFrame
high_elev_shapefile = gpd.overlay(Bow_shapefile, SWE_high_elev_gdf, how='union')

# Display the results
display(low_elev_shapefile)
display(high_elev_shapefile)

  low_elev_shapefile = gpd.overlay(Bow_shapefile, SWE_low_elev_gdf, how='union')
  high_elev_shapefile = gpd.overlay(Bow_shapefile, SWE_high_elev_gdf, how='union')


Unnamed: 0,Station_ID,Station_Na,Area_km2,Source,time,station_id,SWE,flag,donor_stations,lat,lon,station_name,elevation,elevation_category,year,month,day,geometry
0,05BB001,BOW RIVER AT BANFF,2210.56,WSC_HYDAT,NaT,,,,,,,,,,,,,"POLYGON ((-115.83512 51.07677, -115.83499 51.0..."


Unnamed: 0,Station_ID,Station_Na,Area_km2,Source,time,station_id,SWE,flag,donor_stations,lat,lon,station_name,elevation,elevation_category,year,month,day,geometry
0,05BB001,BOW RIVER AT BANFF,2210.56,WSC_HYDAT,NaT,,,,,,,,,,,,,"POLYGON ((-116.31063 51.70239, -116.31037 51.7..."


In [168]:
# Plot the two shapefiles if they are not empty
fig, ax = plt.subplots(figsize=(10, 10))
Bow_shapefile.plot(ax=ax, color='lightgrey')

if not low_elev_shapefile.empty:
	low_elev_shapefile.plot(ax=ax, color='red')
else:
	print("Low elevation shapefile is empty.")

#if not high_elev_shapefile.empty:
	high_elev_shapefile.plot(ax=ax, color='blue')
#else:
	#print("High elevation shapefile is empty.")

plt.legend(['Low elevation stations', 'High elevation stations'])
plt.savefig(output_path + 'SWE_low_high_elev_shapefiles.png')
plt.show()

  plt.show()


In [136]:
#Take mean SWE for each day for each category
daily_mean_low = SWE_low_elev.groupby('time')['SWE'].mean().reset_index()
daily_mean_high = SWE_high_elev.groupby('time')['SWE'].mean().reset_index()

# Rename the columns for clarity
daily_mean_low.columns = ['date', 'mean_SWE']
daily_mean_high.columns = ['date', 'mean_SWE']

# Display the result
print(daily_mean_low.head())
print(daily_mean_high.head())

        date  mean_SWE
0 1980-01-01      25.0
1 1980-01-02       NaN
2 1980-01-03       NaN
3 1980-01-04       NaN
4 1980-01-05       NaN
        date    mean_SWE
0 1980-01-01  162.045000
1 1980-01-02  319.853333
2 1980-01-03  233.103333
3 1980-01-04  234.550000
4 1980-01-05  298.465000


In [137]:
# Plot the daily mean values
plt.figure(figsize=(10,6))
plt.plot(daily_mean_low['date'], daily_mean_low['mean_SWE'], label='1300 m - 1750 m', color='blue')
plt.plot(daily_mean_high['date'], daily_mean_high['mean_SWE'], label='2100 m - 2400 m', color='red')
plt.xlabel('Date')
plt.ylabel('Mean Value')
plt.title('Daily Mean Values')
plt.legend()
plt.savefig(output_path + 'daily_mean_values.png')
plt.close()


In [249]:
# Set season parameters for the low elevation category
start_month, start_day = 11, 1  # Start in November
end_month, end_day = 5, 1       # End in May

# Find the first date with 15 mm SWE each year to set as season start
daily_mean_low['season_year'] = daily_mean_low['date'].apply(lambda x: x.year if x.month >= start_month else x.year - 1)
season_starts_low = daily_mean_low[daily_mean_low['mean_SWE'] >= 9].groupby('season_year')['date'].min()

# Filter seasons based on season start and ensure they run through to April 1st of the next year
filtered_seasons_low = []

for year, start_date in season_starts_low.items():
    if start_date.month < start_month:
        continue  # Skip incomplete seasons at the beginning

    end_date = pd.Timestamp(year + 1, end_month, end_day)
    season_data_low = daily_mean_low[(daily_mean_low['date'] >= start_date) & (daily_mean_low['date'] <= end_date)]
    
    # Check if season has data from start_date to end_date (April 1st)
    if not season_data_low.empty and season_data_low['date'].max() >= end_date - pd.Timedelta(days=1):
        filtered_seasons_low.append(season_data_low)

display(filtered_seasons_low)

# Combine all complete seasons if there are any
if filtered_seasons_low:
    season_data_low = pd.concat(filtered_seasons_low, ignore_index=True)

    # Extract the year and month-day for filtering
    season_data_low['Year'] = season_data_low['date'].dt.year
    season_data_low['Month-Day'] = season_data_low['date'].dt.strftime('%m-%d')
    season_data_low['Year_Month'] = season_data_low['date'].dt.strftime('%Y-%m')

    # replace Nan values with 0
    #season_data_low = season_data_low.fillna(0)

    display(season_data_low)
else:
    print("No complete seasons found for low elevation category.")

[          date  mean_SWE  season_year
 714 1981-12-15      15.0         1981
 715 1981-12-16       NaN         1981
 716 1981-12-17       NaN         1981
 717 1981-12-18       NaN         1981
 718 1981-12-19       NaN         1981
 ..         ...       ...          ...
 847 1982-04-27       NaN         1981
 848 1982-04-28       NaN         1981
 849 1982-04-29       NaN         1981
 850 1982-04-30       NaN         1981
 851 1982-05-01       NaN         1981
 
 [138 rows x 3 columns],
            date  mean_SWE  season_year
 1057 1982-11-23      15.0         1982
 1058 1982-11-24       NaN         1982
 1059 1982-11-25       NaN         1982
 1060 1982-11-26       NaN         1982
 1061 1982-11-27       NaN         1982
 ...         ...       ...          ...
 1212 1983-04-27       NaN         1982
 1213 1983-04-28       NaN         1982
 1214 1983-04-29       NaN         1982
 1215 1983-04-30       NaN         1982
 1216 1983-05-01       NaN         1982
 
 [160 rows x 3 columns]

Unnamed: 0,date,mean_SWE,season_year,Year,Month-Day,Year_Month
0,1981-12-15,15.0,1981,1981,12-15,1981-12
1,1981-12-16,,1981,1981,12-16,1981-12
2,1981-12-17,,1981,1981,12-17,1981-12
3,1981-12-18,,1981,1981,12-18,1981-12
4,1981-12-19,,1981,1981,12-19,1981-12
...,...,...,...,...,...,...
4250,2023-04-27,,2022,2023,04-27,2023-04
4251,2023-04-28,,2022,2023,04-28,2023-04
4252,2023-04-29,,2022,2023,04-29,2023-04
4253,2023-04-30,,2022,2023,04-30,2023-04


In [250]:
# Set season parameters for the mid elevation category
start_month, start_day = 11, 1  # Start in November
end_month, end_day = 5, 1       # End in May

# Find the first date with 15 mm SWE each year to set as season start
daily_mean_high['season_year'] = daily_mean_high['date'].apply(lambda x: x.year if x.month >= start_month else x.year - 1)
season_starts_high = daily_mean_high[daily_mean_high['mean_SWE'] >= 15].groupby('season_year')['date'].min()

# Filter seasons based on season start and ensure they run through to April 1st of the next year
filtered_seasons_high = []

for year, start_date in season_starts_high.items():
    if start_date.month < start_month:
        continue  # Skip incomplete seasons at the beginning

    end_date = pd.Timestamp(year + 1, end_month, end_day)
    season_data_high = daily_mean_high[(daily_mean_high['date'] >= start_date) & (daily_mean_high['date'] <= end_date)]
    
    # Check if season has data from start_date to end_date (April 1st)
    if not season_data_high.empty and season_data_high['date'].max() >= end_date - pd.Timedelta(days=1):
        filtered_seasons_high.append(season_data_high)

# Combine all complete seasons if there are any
if filtered_seasons_high:
    season_data_high = pd.concat(filtered_seasons_high, ignore_index=True)

    # Extract the year and month-day for filtering
    season_data_high['Year'] = season_data_high['date'].dt.year
    season_data_high['Month-Day'] = season_data_high['date'].dt.strftime('%m-%d')
    season_data_high['Year_Month'] = season_data_high['date'].dt.strftime('%Y-%m')

    # replace Nan values with 0
    #season_data_high = season_data_high.fillna(0)

    display(season_data_high)
else:
    print("No complete seasons found for high elevation category.")

Unnamed: 0,date,mean_SWE,season_year,Year,Month-Day,Year_Month
0,1980-11-05,15.085000,1980,1980,11-05,1980-11
1,1980-11-06,17.460000,1980,1980,11-06,1980-11
2,1980-11-07,19.810000,1980,1980,11-07,1980-11
3,1980-11-08,20.610000,1980,1980,11-08,1980-11
4,1980-11-09,22.880000,1980,1980,11-09,1980-11
...,...,...,...,...,...,...
7824,2023-04-27,368.924150,2022,2023,04-27,2023-04
7825,2023-04-28,353.350912,2022,2023,04-28,2023-04
7826,2023-04-29,358.505962,2022,2023,04-29,2023-04
7827,2023-04-30,352.162246,2022,2023,04-30,2023-04


In [256]:
# Find the first date with 15 mm SWE each year to set as season start
SWE_gap_filled_daily['season_year'] = SWE_gap_filled_daily['date'].apply(lambda x: x.year if x.month >= start_month else x.year - 1)
season_starts = SWE_gap_filled_daily[SWE_gap_filled_daily['SWE'] >= 15].groupby('season_year')['date'].min()

# Filter seasons based on season start and ensure they run through to April 1st of the next year
filtered_seasons = []

for year, start_date in season_starts.items():
    if start_date.month < start_month:
        continue  # Skip incomplete seasons at the beginning

    end_date = pd.Timestamp(year + 1, end_month, end_day)
    season_data = SWE_gap_filled_daily[(SWE_gap_filled_daily['date'] >= start_date) & (SWE_gap_filled_daily['date'] <= end_date)]
    
    # Check if season has data from start_date to end_date (April 1st)
    if not season_data.empty and season_data['date'].max() >= end_date - pd.Timedelta(days=1):
        filtered_seasons.append(season_data)

# Combine all complete seasons if there are any
if filtered_seasons:
    season_data = pd.concat(filtered_seasons, ignore_index=True)

    # Extract the year and month-day for filtering
    season_data['Year'] = season_data['date'].dt.year
    season_data['Month-Day'] = season_data['date'].dt.strftime('%m-%d')
    season_data['Year_Month'] = season_data['date'].dt.strftime('%Y-%m')

    # replace Nan values with 0
    #season_data_high = season_data_high.fillna(0)

    display(season_data)
else:
    print("No complete seasons found for high elevation category.")

KeyError: 'date'

Step 1: Replace zeros with perturbations

In [149]:
def perturb_zeros(swe_column):
    """Perturbs zero values with small positive values."""
    swe_array = swe_column.to_numpy()  # Convert to NumPy array for efficient manipulation
    nonzero_min = swe_array[swe_array > 0].min()  # Find the smallest nonzero value
    
    # Generate perturbations for zero values
    perturbation = np.random.uniform(0, nonzero_min / 2, size=swe_column[swe_column == 0].shape)
    
    # Replace zeros with perturbation
    swe_column[swe_column == 0] = perturbation
    
    return swe_column

# Apply to mean_SWE column only
season_data_low['mean_SWE'] = perturb_zeros(season_data_low['mean_SWE'].copy())

display(season_data_low)
print("Number of zeros in mean_SWE:", (season_data_low['mean_SWE'] == 0).sum())

# Apply to mean_SWE column only
season_data_high['mean_SWE'] = perturb_zeros(season_data_high['mean_SWE'].copy())

display(season_data_high)
print("Number of zeros in mean_SWE:", (season_data_high['mean_SWE'] == 0).sum())

Unnamed: 0,date,mean_SWE,season_year,Year,Month-Day,Year_Month
0,1981-12-15,15.000000,1981,1981,12-15,1981-12
1,1981-12-16,1.099151,1981,1981,12-16,1981-12
2,1981-12-17,1.107074,1981,1981,12-17,1981-12
3,1981-12-18,0.841027,1981,1981,12-18,1981-12
4,1981-12-19,1.906329,1981,1981,12-19,1981-12
...,...,...,...,...,...,...
4250,2023-04-27,2.076589,2022,2023,04-27,2023-04
4251,2023-04-28,2.316217,2022,2023,04-28,2023-04
4252,2023-04-29,0.999925,2022,2023,04-29,2023-04
4253,2023-04-30,1.940278,2022,2023,04-30,2023-04


Number of zeros in mean_SWE: 0


Unnamed: 0,date,mean_SWE,season_year,Year,Month-Day,Year_Month
0,1980-11-05,15.085000,1980,1980,11-05,1980-11
1,1980-11-06,17.460000,1980,1980,11-06,1980-11
2,1980-11-07,19.810000,1980,1980,11-07,1980-11
3,1980-11-08,20.610000,1980,1980,11-08,1980-11
4,1980-11-09,22.880000,1980,1980,11-09,1980-11
...,...,...,...,...,...,...
9071,2023-05-26,61.854427,2022,2023,05-26,2023-05
9072,2023-05-27,60.022142,2022,2023,05-27,2023-05
9073,2023-05-28,64.656928,2022,2023,05-28,2023-05
9074,2023-05-29,65.965241,2022,2023,05-29,2023-05


Number of zeros in mean_SWE: 0


Step 2: Compute 6-month SWE integration 

In [150]:
# Step 1: Filter for November to May
#df_seasonal = season_data[season_data['Month-Day'].between('11-01', '04-30')]
def integrate_season(group):
    """Integrates SWE values from November 1st to May 1st."""
    # Ensure dates are sorted (though dataset is pre-sorted)
    group = group.sort_values(by='date')
    # Convert dates to numerical days since start of the season
    days_since_start = (group['date'] - group['date'].min()).dt.days
    # Integrate SWE over the period
    total_swe_integration = trapz(group['mean_SWE'], days_since_start)
    return pd.Series({'total_SWE_integration': total_swe_integration})


# Group by season_year and compute integration
Integrated_data_low = season_data_low.groupby('season_year').apply(integrate_season).reset_index()

# Inspect results
display(Integrated_data_low)

# Group by season_year and compute integration
Integrated_data_high = season_data_high.groupby('season_year').apply(integrate_season).reset_index()

# Inspect results
display(Integrated_data_high)

Unnamed: 0,season_year,total_SWE_integration
0,1981,11814.354159
1,1982,11288.805181
2,1983,9149.303965
3,1984,10612.296146
4,1985,13585.312148
5,1986,11141.149921
6,1989,13876.686149
7,1990,16328.359834
8,1991,10940.607585
9,1993,11744.499261


Unnamed: 0,season_year,total_SWE_integration
0,1980,65122.229621
1,1981,60361.685248
2,1982,49543.70019
3,1983,57558.083213
4,1984,51902.107493
5,1985,84641.187107
6,1986,55765.242309
7,1987,47550.791137
8,1988,58277.842304
9,1989,79520.304585


In [71]:
# Group by month and compute integration
Integrated_data_monthly_low = season_data_low.groupby('Year_Month').apply(integrate_season).reset_index()

# Ensure season_year corresponds to each month
Integrated_data_monthly_low['season_year'] = Integrated_data_monthly_low['Year_Month'].apply(lambda x: int(x.split('-')[0]) if int(x.split('-')[1]) >= start_month else int(x.split('-')[0]) - 1)

# Group by season_year and compute integration
Integrated_data_season_low = Integrated_data_monthly_low.groupby('season_year').sum().reset_index()

display(Integrated_data_season_low)
display(Integrated_data_monthly_low)

Unnamed: 0,season_year,total_SWE_integration
0,1981,11216.666947
1,1982,10670.829518
2,1983,8737.671887
3,1984,10046.969653
4,1985,12921.835359
5,1986,10547.853441
6,1987,7838.971782
7,1988,10220.390927
8,1989,13202.846932
9,1990,15559.55496


Unnamed: 0,Year_Month,total_SWE_integration,season_year
0,1981-12,41.755333,1981
1,1982-01,1450.220543,1981
2,1982-02,3198.720754,1981
3,1982-03,4555.379792,1981
4,1982-04,1954.995363,1981
...,...,...,...
238,2023-01,1274.033048,2022
239,2023-02,1800.288775,2022
240,2023-03,3031.931839,2022
241,2023-04,1313.864114,2022


In [70]:
# Group by month and compute integration
Integrated_data_monthly_high = season_data_high.groupby('Year_Month').apply(integrate_season).reset_index()

# Ensure season_year corresponds to each month
Integrated_data_monthly_high['season_year'] = Integrated_data_monthly_high['Year_Month'].apply(lambda x: int(x.split('-')[0]) if int(x.split('-')[1]) >= start_month else int(x.split('-')[0]) - 1)

# Group by season_year and compute integration
Integrated_data_season_high = Integrated_data_monthly_high.groupby('season_year').sum().reset_index()

display(Integrated_data_season_high)
display(Integrated_data_monthly_high)

Unnamed: 0,season_year,total_SWE_integration
0,1980,63131.330663
1,1981,58543.995248
2,1982,47972.160954
3,1983,55918.340187
4,1984,50322.902909
5,1985,82229.93951
6,1986,53969.947888
7,1987,46106.745819
8,1988,56551.102776
9,1989,77150.31503


Unnamed: 0,Year_Month,total_SWE_integration,season_year
0,1980-11,1347.225833,1980
1,1980-12,6911.161250,1980
2,1981-01,8462.407056,1980
3,1981-02,8495.549143,1980
4,1981-03,12799.318353,1980
...,...,...,...
296,2023-01,5815.782136,2022
297,2023-02,6049.542395,2022
298,2023-03,8629.947263,2022
299,2023-04,10219.369248,2022


Step 3: Rank data and compute Gringorten probabilities

In [72]:
def gringorten_probabilities(values):
    """Compute Gringorten plotting position probabilities."""
    sorted_values = np.sort(values)
    ranks = np.argsort(np.argsort(values)) + 1  # Rank from smallest to largest
    n = len(values)
    probabilities = (ranks - 0.44) / (n + 0.12)
    return probabilities

# Ensure you're applying the function only to 'total_SWE_integration'
if 'total_SWE_integration' in Integrated_data_season_low.columns:
    Integrated_data_season_low['Gringorten_probabilities'] = gringorten_probabilities(Integrated_data_season_low['total_SWE_integration'])
else:
    print("Error: 'total_SWE_integration' column is missing.")

# Inspect results
display(Integrated_data_season_low)

# Ensure you're applying the function only to 'total_SWE_integration'
if 'total_SWE_integration' in Integrated_data_season_high.columns:
    Integrated_data_season_high['Gringorten_probabilities'] = gringorten_probabilities(Integrated_data_season_high['total_SWE_integration'])
else:
    print("Error: 'total_SWE_integration' column is missing.")

# Inspect results
display(Integrated_data_season_high)

Unnamed: 0,season_year,total_SWE_integration,Gringorten_probabilities
0,1981,11216.666947,0.537388
1,1982,10670.829518,0.437687
2,1983,8737.671887,0.21336
3,1984,10046.969653,0.337986
4,1985,12921.835359,0.711864
5,1986,10547.853441,0.412762
6,1987,7838.971782,0.088734
7,1988,10220.390927,0.362911
8,1989,13202.846932,0.761715
9,1990,15559.55496,0.936191


Unnamed: 0,season_year,total_SWE_integration,Gringorten_probabilities
0,1980,63131.330663,0.569573
1,1981,58543.995248,0.5
2,1982,47972.160954,0.152134
3,1983,55918.340187,0.384045
4,1984,50322.902909,0.221707
5,1985,82229.93951,0.91744
6,1986,53969.947888,0.337662
7,1987,46106.745819,0.105751
8,1988,56551.102776,0.407236
9,1989,77150.31503,0.871058


Step 4: Compute SWEI

In [73]:
def compute_swei(probabilities):
    """Transform probabilities to SWEI using the inverse normal distribution."""
    return norm.ppf(probabilities)

# Ensure 'Gringorten_probabilities' exists before computing SWEI
if 'Gringorten_probabilities' in Integrated_data_season_low.columns:
    Integrated_data_season_low['SWEI'] = compute_swei(Integrated_data_season_low['Gringorten_probabilities'])

# Ensure 'Gringorten_probabilities' exists before computing SWEI
if 'Gringorten_probabilities' in Integrated_data_season_high.columns:
    Integrated_data_season_high['SWEI'] = compute_swei(Integrated_data_season_high['Gringorten_probabilities'])

# Step 5: Classify drought conditions
def classify_drought(swei):
    """Classify drought conditions based on SWEI values."""
    if swei <= -2.0:
        return "Exceptional Drought"
    elif -2.0 < swei <= -1.5:
        return "Extreme Drought"
    elif -1.5 < swei <= -1.0:
        return "Severe Drought"
    elif -1.0 < swei <= -0.5:
        return "Moderate Drought"
    elif -0.5 < swei <= 0.5:
        return "Near Normal"
    elif 0.5 < swei <= 1.0:
        return "Abnormally Wet"
    elif 1.0 < swei <= 1.5:
        return "Moderately Wet"
    elif 1.5 < swei <= 2.0:
        return "Very Wet"
    else:
        return "Extremely Wet"


# Apply drought classification to the SWEI column
if 'SWEI' in Integrated_data_season_low.columns:
    Integrated_data_season_low['Drought_Classification'] = Integrated_data_season_low['SWEI'].apply(classify_drought)

# Step 3: Select relevant columns for output
if 'season_year' in Integrated_data_season_low.columns:
    output_columns = ['season_year', 'Gringorten_probabilities', 'SWEI', 'Drought_Classification']
elif 'year' in Integrated_data_season_low.columns:
    output_columns = ['year', 'Gringorten_probabilities', 'SWEI', 'Drought_Classification']
else:
    output_columns = ['Gringorten_probabilities', 'SWEI', 'Drought_Classification']

# Output the selected columns
print("Results:\n", Integrated_data_season_low[output_columns])

# Apply drought classification to the SWEI column
if 'SWEI' in Integrated_data_season_high.columns:
    Integrated_data_season_high['Drought_Classification'] = Integrated_data_season_high['SWEI'].apply(classify_drought)

# Step 3: Select relevant columns for output
if 'season_year' in Integrated_data_season_high.columns:
    output_columns = ['season_year', 'Gringorten_probabilities', 'SWEI', 'Drought_Classification']
elif 'year' in Integrated_data_season_high.columns:
    output_columns = ['year', 'Gringorten_probabilities', 'SWEI', 'Drought_Classification']
else:
    output_columns = ['Gringorten_probabilities', 'SWEI', 'Drought_Classification']

# Output the selected columns
print("Results:\n", Integrated_data_season_high[output_columns])

Results:
     season_year  Gringorten_probabilities      SWEI Drought_Classification
0          1981                  0.537388  0.093855            Near Normal
1          1982                  0.437687 -0.156836            Near Normal
2          1983                  0.213360 -0.794817       Moderate Drought
3          1984                  0.337986 -0.417966            Near Normal
4          1985                  0.711864  0.558840         Abnormally Wet
5          1986                  0.412762 -0.220447            Near Normal
6          1987                  0.088734 -1.348593         Severe Drought
7          1988                  0.362911 -0.350688            Near Normal
8          1989                  0.761715  0.711830         Abnormally Wet
9          1990                  0.936191  1.523566               Very Wet
10         1991                  0.387836 -0.284962            Near Normal
11         1992                  0.013958 -2.198461    Exceptional Drought
12         1993

In [98]:
# Extract necessary columns
plot_data_low = Integrated_data_season_low[['season_year', 'SWEI', 'Drought_Classification']]

# Extract necessary columns
plot_data_high = Integrated_data_season_high[['season_year', 'SWEI', 'Drought_Classification']]

# Sort by season_year for better plotting
plot_data_low = plot_data_low.sort_values(by='season_year')
plt.figure(figsize=(15, 6))
plt.plot(plot_data_low['season_year'], plot_data_low['SWEI'], marker='o', label='SWEI for 1300-2000 m', color='black')

# Sort by season_year for better plotting
plot_data_high = plot_data_high.sort_values(by='season_year')
#plt.figure(figsize=(10, 6))
plt.plot(plot_data_high['season_year'], plot_data_high['SWEI'], marker='o', label='SWEI for 2000-2500 m', color='red')

# Add thresholds for drought classifications
plt.axhline(-2.0, color='r', linestyle='--', label='Exceptional Drought Threshold')
plt.axhline(-1.5, color='orange', linestyle='--', label='Extreme Drought Threshold')
plt.axhline(-1.0, color='yellow', linestyle='--', label='Severe Drought Threshold')
plt.axhline(-0.5, color='gray', linestyle='--', label='Near Normal Threshold')
plt.axhline(0.5, color='pink', linestyle='--', label='Abnormaly Wet Threshold')
plt.axhline(1.0, color='violet', linestyle='--', label='Moderately Wet Threshold')
plt.axhline(1.5, color='purple', linestyle='--', label='Very Wet Threshold')
plt.axhline(2.0, color='blue', linestyle='--', label='Extremely Wet Threshold')

# Customize the plot
plt.title('SWEI Trends by Season Year')
plt.xlabel('Season Year')
plt.ylabel('Standardized SWEI')
plt.xticks(rotation=45)
# put legend outside the plot right top 
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
# Add gridlines for each year
for year in plot_data_high['season_year'].unique():
    plt.axvline(year, color='gray', linestyle='--', linewidth=0.5)
plt.grid()
plt.tight_layout()
plt.savefig(output_path + 'SWEI_trends.png')
plt.close()

