## Use case: statistical analysis
Spatial or temporal averages for a defined time period and/or region on hydrology related data


In [None]:
import xarray as xr
import os
import sys
import pandas as pd
import numpy as np
import yaml 
import geopandas

import matplotlib.pyplot as plt

In [None]:
import seaborn as sns  # noqa, pandas aware plotting library

In [None]:
'SP_SRC' in os.environ
sys.platform

In [None]:
if ('SP_SRC' in os.environ):
    root_src_dir = os.environ['SP_SRC']
elif sys.platform == 'win32':
    root_src_dir = r'C:\Users\sin17h\Documents\silverpieces'
else:
    root_src_dir = '/home/sin17h/Documents/silverpieces'

pkg_src_dir = root_src_dir
sys.path.append(pkg_src_dir)

In [None]:
from silverpieces import *
from silverpieces.functions import *

In [None]:
if ('SP_DATA' in os.environ):
    root_data_dir = os.environ['SP_DATA']
elif sys.platform == 'win32':
    root_data_dir = r'C:\Temp\awraData\base_files'
else:
    root_data_dir = '/home/sin17h/temp/awraData'


In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
# the default cmap_sequential for xarray is viridis. 'RdBu' is divergent, but works better for wetness concepts
# # https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html
xr.set_options(cmap_sequential='coolwarm')

In [None]:
awra_daily_files_path = os.path.join(root_data_dir, 'e0_avg_*.nc')
awra_daily = xr.open_mfdataset(awra_daily_files_path)

In [None]:
#we have loaded the complete daily AWRA Potential evapotranspiration (PET) dataset, from 1911 to 2105
awra_daily

In [None]:
#plot PET for a particular day
awra_daily.e0_avg.isel(time=30000).plot()

In [None]:
%%time
#give me the monthly mean PET for the full period of the dataset. Takes around 3 secs to compute the stats


#Provide a YML file 
ymlStr = """Args: 
                            product: 'AWRA' #fill in the name of the test file created with dummy data
                            timespan:
                                startDate:  
                                endDate:  
                            variablename: 'e0_avg'"""
args_file = yaml.safe_load(ymlStr)
args_file['Args']['product'] = awra_daily_files_path
awra_monthly_mean_full = monthly_mean(args_file)
awra_monthly_mean_full

In [None]:
#plot monthly means for Dec 2015
awra_monthly_mean_full.isel(time=1259).plot()

In [None]:
#plot monthly means for July 2015
awra_monthly_mean_full.isel(time=1252).plot()

In [None]:
%%time
awra_seasonal_mean_full = seasonal_mean(args_file)
awra_seasonal_mean_full

In [None]:
#plot seasonal means for season ending in Feb,1912
awra_seasonal_mean_full.isel(time=4).plot()

In [None]:
#plot seasonal means for season ending in Feb,2015
awra_seasonal_mean_full.isel(time=416).plot()

In [None]:
#plot the difference in seasonal mean for season ending Feb, 2015 and Feb, 1912
(awra_seasonal_mean_full.isel(time=416) - awra_seasonal_mean_full.isel(time=4)).plot()

In [None]:
awra_seasonal_mean_full_condensed = awra_seasonal_mean_full.groupby('time.season').mean(dim='time')
awra_seasonal_mean_full_condensed

In [None]:
##plotting the condensed seasonal mean, either single plot or facet, takes very long time. 
#I waited for 6 minutes before cancelling

#awra_seasonal_mean_full_condensed.isel(season=1).plot()
#awra_seasonal_mean_full_condensed.plot.imshow(col='season', robust=True)

In [None]:
awra_yearly_mean_full = yearly_mean(args_file)
awra_yearly_mean_full

In [None]:
#plot yearly mean for 2015
awra_yearly_mean_full.isel(time=104).plot()

In [None]:
#plot yearly mean difference for year 2015 and 1990
(awra_yearly_mean_full.isel(time=104) - awra_yearly_mean_full.isel(time=79)).plot()

In [None]:
#now cookie-cut the Fritzroy catchment region 
#Provide a YML file 
ymlStr = """Args: 
                            product: 'AWRA' #fill in the name of the test file created with dummy data
                            timespan:
                                startDate:  
                                endDate:  
                            shape_file:  
                            variablename: 'e0_avg'"""
args_file = yaml.safe_load(ymlStr)
args_file['Args']['product'] = awra_daily_files_path

shape_file_path = r'C:\Temp\shapefile_forTest\Fitzroy_GEE_prospective_GW_aquifers_v02.shp'
args_file['Args']['shape_file'] = shape_file_path

awra_fitzroy_monthly_mean_full = monthly_mean(args_file)
awra_fitzroy_monthly_mean_full

In [None]:
#plot monthly means Fitzroy for Dec 2015
awra_fitzroy_monthly_mean_full.isel(time=1259).plot()

In [None]:
#you can also filter based on a date range
#Provide a YML file 
ymlStr = """Args: 
                            product: 'AWRA' #fill in the name of the test file created with dummy data
                            timespan:
                                startDate: 2015-01-01 
                                endDate:  2015-12-31
                            shape_file:  
                            variablename: 'e0_avg'"""
args_file = yaml.safe_load(ymlStr)
args_file['Args']['product'] = awra_daily_files_path
awra_monthly_mean_daterange = monthly_mean(args_file)
awra_monthly_mean_daterange

In [None]:
#Facet grid of plots of monthly means of year 2015
awra_monthly_mean_daterange.plot.imshow(col='time', robust=True, col_wrap=3)

In [None]:
#cookie cut with geojson files
df = geopandas.read_file(r'C:\Temp\shapefile_forTest\Fitzroy_GEE_prospective_GW_aquifers_v02.geojson')

In [None]:
df.crs

In [None]:
df.plot()

In [None]:
#if we want to use salem with GeoJSON, we have to then write the shp file as an intermediate file to disk, 
#read it back then and proceed with cookie-cutting. GeoPandas does not allow writting the file to memory buffer
#which could 

#df.to_file(r"C:\Temp\shapefile_forTest\geojson_to_shape.shp")
salem.read_shapefile(r"C:\Temp\shapefile_forTest\geojson_to_shape.shp")

In [None]:
result_mask = functions.cookie_cut_shapefile(awra_daily, 'Fitzroy_GEE_prospective_GW_aquifers_v02.shp')