# Global analysis of snowmelt runoff onset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import dask.dataframe as dd
import seaborn as sns
import xarray as xr
import coiled
import dask
from global_snowmelt_runoff_onset.config import Config

In [None]:
config = Config('../config/global_config.txt')

In [None]:
cluster = coiled.Cluster(idle_timeout="10 minutes",
                        n_workers=10,
                        worker_memory="64 GB",
                        worker_cpu=8,
                        scheduler_memory="64 GB",
                        spot_policy="spot",
                        environ={"GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR"},
                        workspace="uwtacolab",
                        )

client = cluster.get_client()

In [4]:
global_ds = xr.open_zarr(config.global_runoff_store, consolidated=True,decode_coords='all')

## Read in parquet files

In [None]:
results_ddf = dd.read_parquet('snowmelt/analysis/tiles/', filesystem=config.azure_blob_fs,split_row_groups='adaptive')
results_ddf

In [None]:
results_ddf.columns

In [7]:
#all_tiles_results_df = ddf[["original_lat","dem","runoff_onset_median","chili"]].repartition(partition_size="256 MiB").persist()
all_tiles_results_df = ddf

In [None]:
all_tiles_results_df.memory_usage().compute() / 1e9

## global analysis: linear regression and correlations

In [43]:
# for every X increase in elevation there is a y delay
# for every X increase in latitude there is a y delay

In [None]:
all_tiles_results_df.corr(numeric_only=True).compute()

## create latitude and elevation bins

In [11]:
dem_bin_low = 0
dem_bin_high = 8000
dem_bin_interval = 100
dem_bins = np.arange(dem_bin_low,dem_bin_high+dem_bin_interval,dem_bin_interval)
lat_bin_low = -80
lat_bin_high = 80
lat_bin_interval = 1
lat_bins = np.arange(lat_bin_low,lat_bin_high+lat_bin_interval,lat_bin_interval)

In [None]:
all_tiles_results_df['lat_bin'] = all_tiles_results_df['original_lat'].map_partitions(pd.cut, lat_bins)
all_tiles_results_df['dem_bin'] = all_tiles_results_df['dem'].map_partitions(pd.cut, dem_bins)
all_tiles_results_df = all_tiles_results_df.dropna(subset=['lat_bin','dem_bin'])
all_tiles_results_df

In [None]:
all_tiles_results_df['lat_bin'] = all_tiles_results_df['lat_bin'].apply(lambda x: x.left).astype(int)
all_tiles_results_df['dem_bin'] = all_tiles_results_df['dem_bin'].apply(lambda x: x.left).astype(int)
all_tiles_results_df

## Median snowmelt runoff onset binned by elevation and latitude

In [None]:
groupby_latitude_and_elevation_df = all_tiles_results_df[['lat_bin','dem_bin','runoff_onset_median']].groupby(['lat_bin', 'dem_bin']).median()
groupby_latitude_and_elevation_df

In [None]:
with dask.config.set({"dataframe.shuffle.method": "tasks"}):
    groupby_latitude_and_elevation_df = groupby_latitude_and_elevation_df.compute()
groupby_latitude_and_elevation_df

In [None]:
runoff_onset_vs_lat_and_elev_df = groupby_latitude_and_elevation_df.reset_index().pivot(index='lat_bin', columns='dem_bin', values='runoff_onset_median').reindex(lat_bins).sort_index(ascending=False)
runoff_onset_vs_lat_and_elev_df

In [None]:
f,ax=plt.subplots(figsize=(8,10),dpi=300)
sns.heatmap(runoff_onset_vs_lat_and_elev_df, square=True,ax=ax, cmap='viridis', cbar_kws={'label': 'snowmelt runoff onset [DOWY]'},
            vmin=0,vmax=365)

ax.set_xlabel('elevation (m)')
ax.set_ylabel('latitude [degrees]')
ax.set_title('2015-2024 median date of snowmelt runoff onset\nbinned by elevation and latitude')

## global analysis: chili / influence of shortwave radiation

In [None]:
all_tiles_results_df['chili_class'] = 'neutral'
all_tiles_results_df['chili_class'] = all_tiles_results_df['chili_class'].where(
    (all_tiles_results_df['chili'] >= 0.448) & (all_tiles_results_df['chili'] <= 0.767),
    other=all_tiles_results_df['chili'].map(lambda x: 'warm' if x > 0.767 else 'cool' if x < 0.448 else 'neutral')
)

In [None]:
grouped = all_tiles_results_df[['lat_bin','dem_bin','chili_class','runoff_onset_median']].dropna().groupby(['lat_bin', 'dem_bin','chili_class'])['runoff_onset_median'].mean().compute()
grouped

In [None]:
pivot_df = grouped.unstack()
pivot_df

In [None]:
# Reshape to get warm and cool values
warm_cool_ratio_df = pivot_df['warm'] / pivot_df['cool']
warm_cool_ratio_df

In [None]:
warm_cool_ratio_df = warm_cool_ratio_df.reset_index().pivot(index='lat_bin', columns='dem_bin').reindex(lat_bins).sort_index(ascending=False)
warm_cool_ratio_df

In [None]:
f,ax=plt.subplots(figsize=(8,10),dpi=300)
sns.heatmap(warm_cool_ratio_df, square=True,ax=ax, cmap='PuOr', cbar_kws={'label': 'warm / cool'},robust=True)

ax.set_xlabel('elevation (m)')
ax.set_ylabel('latitude [degrees]')
ax.set_title('Ratio of snowmelt runoff onset medians by CHILI warm/cool classification\nbinned by elevation and latitude')

## Code graveyard

In [None]:
# # import pyarrow.parquet as pq
# # import glob
# # import os

# #config.azure_blob_fs.download('snowmelt/analysis/tiles/','tiles/',recursive=True)

# # for filepath in glob.glob('tiles/*'):
# #     try:
# #         pq.ParquetFile(filepath)
# #     except Exception as e:
# #         print(f'Error reading {filepath}: {e}')
# #         os.remove(filepath)
# # ddf = dd.read_parquet('tiles/')



# #df = ddf[["original_lat","original_lon","runoff_onset_median","dem","aspect"]].persist()

# config.azure_blob_fs.ls('snowmelt/analysis/tiles/')

# ddf = dd.read_parquet('snowmelt/analysis/tiles/tile_008*', filesystem=config.azure_blob_fs)
# ddf

# for col in ddf.columns:
#     if col == 'hemisphere':
#         continue
#     print(f'for {col} with data type {ddf[col].dtype}')
#     print(f'the mean is {ddf[col].mean().compute()}')
#     print(f'the std is {ddf[col].std().compute()}')
#     print(f'the min is {ddf[col].min().compute()}')
#     print(f'the max is {ddf[col].max().compute()}')
#     print(f'the median is {ddf[col].median_approximate().compute()}')
#     print('---------------------------------')

    
# all_tiles_results_df = ddf[["original_lat","runoff_onset_median","dem"]].repartition(partition_size="256 MiB").persist()#.compute().repartition(partition_size="256 MiB")
# all_tiles_results_df