# Global analysis of snowmelt runoff onset

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import dask.dataframe as dd
import seaborn as sns
import xarray as xr
import coiled
import dask
from global_snowmelt_runoff_onset.config import Config, Tile

In [2]:
config = Config('../config/global_config.txt')

Configuration loaded:
resolution = 0.00072000072000072
spatial_chunk_dim = 2048
bbox_left = -179.999
bbox_right = 179.999
bbox_top = 81.099
bbox_bottom = -59.999
wy_start = 2015
wy_end = 2024
low_backscatter_threshold = 0.001
min_monthly_acquisitions = 2
max_allowed_days_gap_per_orbit = 30
min_years_for_median_std = 3
valid_tiles_geojson_path = ../processing/valid_tiles.geojson
tile_results_path = ../processing/tile_results.csv
global_runoff_zarr_store_azure_path = snowmelt/snowmelt_runoff_onset/global.zarr
seasonal_snow_mask_zarr_store_azure_path = snowmelt/snow_mask_v2/global_modis_snow_mask.zarr


In [None]:
cluster = coiled.Cluster(idle_timeout="10 minutes",
                        n_workers=10,
                        worker_memory="32 GB",
                        worker_cpu=4,
                        scheduler_memory="32 GB",
                        spot_policy="spot",
                        environ={"GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR"},
                        workspace="azure",
                        )

client = cluster.get_client()

In [3]:
global_ds = xr.open_zarr(config.global_runoff_store, consolidated=True,decode_coords='all')

## Read in parquet files

In [None]:
ddf = dd.read_parquet('snowmelt/analysis/tiles/', filesystem=config.azure_blob_fs)

In [None]:
#df = ddf[["original_lat","original_lon","runoff_onset_median","dem","aspect"]].persist()
all_tiles_results_df = ddf[["original_lat","runoff_onset_median","dem"]].repartition(partition_size="256 MiB").persist()#.compute().repartition(partition_size="256 MiB")
all_tiles_results_df

In [None]:
all_tiles_results_df.memory_usage().compute() / 1e9

## global analysis: linear regression and correlations

In [8]:
# for every X increase in elevation there is a y delay
# for every X increase in latitude there is a y delay

In [None]:
all_tiles_results_df.corr().compute()

## global analysis: bin by latitude and elevation

In [10]:
dem_bin_low = 0
dem_bin_high = 8000
dem_bin_interval = 100
dem_bins = np.arange(dem_bin_low,dem_bin_high+dem_bin_interval,dem_bin_interval)
lat_bin_low = -80
lat_bin_high = 80
lat_bin_interval = 1
lat_bins = np.arange(lat_bin_low,lat_bin_high+lat_bin_interval,lat_bin_interval)

In [None]:
all_tiles_results_df['lat_bin'] = all_tiles_results_df['original_lat'].map_partitions(pd.cut, lat_bins)
all_tiles_results_df['dem_bin'] = all_tiles_results_df['dem'].map_partitions(pd.cut, dem_bins)
all_tiles_results_df = all_tiles_results_df.dropna()
all_tiles_results_df

In [None]:
all_tiles_results_df['lat_bin'] = all_tiles_results_df['lat_bin'].apply(lambda x: x.left).astype(int)
all_tiles_results_df['dem_bin'] = all_tiles_results_df['dem_bin'].apply(lambda x: x.left).astype(int)
all_tiles_results_df

In [None]:
groupby_latitude_and_elevation_df = all_tiles_results_df.groupby(['lat_bin', 'dem_bin']).median()#.reset_index()
groupby_latitude_and_elevation_df

In [None]:
with dask.config.set({"dataframe.shuffle.method": "tasks"}):
    groupby_latitude_and_elevation_df = groupby_latitude_and_elevation_df.compute()
groupby_latitude_and_elevation_df

In [None]:
runoff_onset_vs_lat_and_elev_df = groupby_latitude_and_elevation_df.reset_index().pivot(index='lat_bin', columns='dem_bin', values='runoff_onset_median').reindex(lat_bins).sort_index(ascending=False)
runoff_onset_vs_lat_and_elev_df

In [None]:
# Pivot the result DataFrame to create a 2D array suitable for heatmap
#pivot_df = result.pivot(index='lat_bin', columns='dem_bin', values='runoff_onset_median')


# Create the heatmap
f,ax=plt.subplots(figsize=(8,10),dpi=300)
sns.heatmap(runoff_onset_vs_lat_and_elev_df, square=True,ax=ax, cmap='viridis', cbar_kws={'label': 'snowmelt runoff onset [DOWY]'},
            vmin=0,vmax=365)

# Set labels and title
ax.set_xlabel('elevation (m)')
ax.set_ylabel('latitude [degrees]')
ax.set_title('2015-2024 median date of snowmelt runoff onset\nbinned by elevation and latitude')

# Rotate x-axis labels for better readability
#ax.set_ticks(rotation=45, ha='right')

#f.tight_layout()

## global analysis: 

## Code graveyard