In [1]:
%load_ext autoreload
%autoreload 2

# Frequency analysis 

Text


In [2]:
import xarray as xr
import xhydro as xh
import numpy as np
import xdatasets as xd

ERROR 1: PROJ: proj_create_from_database: Open of /home/slanglois/mambaforge/envs/xhydro/share/proj failed


<a id='data_load'></a>
## Prepare the data

To conduct frequency analysis on historical time series from various sites, we begin by obtaining a dataset comprising hydrological information. 

Here, we use the [xdataset](https://hydrologie.github.io/xdatasets/notebooks/getting_started.html) library to acquire hydrological data from the [Ministère de l'Environnement, de la Lutte contre les changements climatiques, de la Faune et des Parcs](https://www.cehq.gouv.qc.ca/atlas-hydroclimatique/stations-hydrometriques/index.htm). Specifically, our query focuses on stations with IDs beginning with `02`, possessing a natural flow pattern and limited to streamflow data. 

Users may prefer to generate their own `xarray.DataArray` using their individual dataset. At a minimum, the `xarray.DataArray` used for frequency analysis needs to have an `id` and a `time` dimension.

In [6]:
data = xd.Query(
    **{
        "datasets":{
            "deh":{
                "id" :["02*"],
                "regulated":["Natural"],
                "variables":["streamflow"],
            }
        }, "time":{"start": "1970-01-01", 
                   "minimum_duration":(15*365, 'd')},

  }
).data.squeeze().load()
data

In [7]:
(data
 .streamflow
 .dropna('time', 'all')
 .hvplot(x='time',grid=True, widget_location='bottom', groupby='id')
)

## Customize the analysis settings

With a collection of hydrological data now at our disposal, we can provide the `xarray.Dataset` to the `Data` object. This step allows us to fine-tune certain configurations before proceeding with the frequency analysis.

In [8]:
from xhydro.frequency_analysis.local import Data

In [10]:
xfa = Data(data)

### a) Define the seasons
We can define seasons by supplying a season's name along with a range of Julian days.

In [11]:
fall_start = xh.get_julian_day(month=9, day = 1)
fall_end = xh.get_julian_day(month=12, day=1)

spring_start = xh.get_julian_day(month=2, day=11)
spring_end = xh.get_julian_day(month=6, day=19)

In [24]:
xfa.season = ['fall', fall_start, fall_end]
xfa.season = ['spring', spring_start, spring_end]
xfa.season = ['annual', 1, 365]



In [25]:
xfa.get_seasons()

['Fall', 'Spring', 'spring_custom', 'fall', 'spring', 'annual']

If a season is no longer required, it can readily be remove like this : 

In [27]:
xfa.rm_season('annual')
xfa.get_seasons()

['Fall', 'Spring', 'spring_custom', 'fall', 'spring']

In cases where distinct catchments necessitate individualized Julian Day ranges for each year, users can explicitly define these ranges

In [17]:
import random
# Generating random flood dates for all year and one catchment
m = np.empty((1, len(range(1910, 2023))), dtype=object)
for i in np.ndindex(m.shape):
    m[i] = [random.randint(70, 70), random.randint(139, 139)]

ds_dates = xr.Dataset()
ds_dates.coords['year'] = xr.DataArray(range(1910, 2023), dims=('year',))
ds_dates.coords['id'] = xr.DataArray(['020302'], dims=('id',))

ds_dates['value'] = xr.DataArray(m, dims=('id', 'year'))
ds_dates

In [18]:
xfa._season['spring_custom'] = ds_dates

In [21]:
xfa.get_seasons()

['Fall', 'Spring', 'spring_custom']

### b) Get block maxima
Upon selecting each desired season, we have the capability to extract block maxima series from every station. Here we select the peak flow for each season.

In [30]:
maxima1 = xfa._get_max(tolerence=0.15, seasons=['spring_custom'])
maxima2 = xfa._get_max(tolerence=0.15, seasons=['Spring'])

We then convert to a `pd.DataFrame` and only keep desired columns

In [52]:
coords_to_drop = set(maxima1.coords )- set(maxima1.dims)
maxima1.drop(coords_to_drop).to_dataframe(name='Maxima').reset_index().dropna(how='any')

Unnamed: 0,season,year,id,Maxima
83,spring_custom,1993,20302,135.600006
84,spring_custom,1994,20302,501.0


In [53]:
coords_to_drop = set(maxima2.coords )- set(maxima2.dims)
maxima2.drop(coords_to_drop).to_dataframe(name='Maxima').reset_index().dropna(how='any')

Unnamed: 0,season,year,id,Maxima
4,Spring,1970,020802,266.000000
6,Spring,1970,021502,204.000000
7,Spring,1970,021601,413.000000
8,Spring,1970,021702,54.700001
11,Spring,1970,022003,309.000000
...,...,...,...,...
1990,Spring,2023,023702,84.209999
1991,Spring,2023,024003,208.600006
1992,Spring,2023,024004,17.190001
1996,Spring,2023,024014,405.500000


On calcul les volume de crues à dates fixes

In [12]:
vol = xfa.calculate_volume(dates=[35, 36])
vol.volume.to_dataframe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,end_date,start_date,volume
year,id,variable,spatial_agg,timestep,time_agg,source,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1970,020302,streamflow,watershed,0,mean,0,1970-02-05,1970-02-04,
1970,020404,streamflow,watershed,0,mean,0,1970-02-05,1970-02-04,
1970,020502,streamflow,watershed,0,mean,0,1970-02-05,1970-02-04,
1970,020602,streamflow,watershed,0,mean,0,1970-02-05,1970-02-04,
1970,020802,streamflow,watershed,0,mean,0,1970-02-05,1970-02-04,6.851520
...,...,...,...,...,...,...,...,...,...
2023,024007,streamflow,watershed,0,mean,0,2023-02-05,2023-02-04,
2023,024010,streamflow,watershed,0,mean,0,2023-02-05,2023-02-04,
2023,024013,streamflow,watershed,0,mean,0,2023-02-05,2023-02-04,
2023,024014,streamflow,watershed,0,mean,0,2023-02-05,2023-02-04,2.327616


On calcul les volumes de crues avec un DataSet

In [13]:
sub_set_example = xfa.select_catchments(['020302'])
vol = sub_set_example.calculate_volume(dates=dates_ds)
vol.to_dataframe().dropna()


Unnamed: 0_level_0,Unnamed: 1_level_0,units,start_date,end_date,volume
year,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1993,20302,hm³,1993-03-11,1993-05-19,22411.681368
1994,20302,hm³,1994-03-11,1994-05-19,49109.812832


In [14]:
vol.to_dataframe().dropna().reset_index()

Unnamed: 0,year,id,units,start_date,end_date,volume
0,1993,20302,hm³,1993-03-11,1993-05-19,22411.681368
1,1994,20302,hm³,1994-03-11,1994-05-19,49109.812832


<a id='data_getMaximum'></a>
Use get_maximum to get the maximums per season for selected catcment, if no period selected, anual maxmaximum will be fectch

In [15]:
xfa.get_maximum(tolerence=.85)

Unnamed: 0,id,season,year,start_date,end_date,streamflow
3,020602,Whole year,1970,02-11,06-19,37.900002
4,020802,Whole year,1970,02-11,06-19,266.000000
6,021502,Whole year,1970,02-11,06-19,204.000000
7,021601,Whole year,1970,02-11,06-19,413.000000
8,021702,Whole year,1970,02-11,06-19,54.700001
...,...,...,...,...,...,...
1990,023702,Whole year,2023,02-11,06-19,84.209999
1991,024003,Whole year,2023,02-11,06-19,208.600006
1992,024004,Whole year,2023,02-11,06-19,17.190001
1996,024014,Whole year,2023,02-11,06-19,405.500000


In [16]:
xfa.get_maximum(tolerence=0.15, seasons=['Spring'])

Unnamed: 0,id,season,year,start_date,end_date,streamflow
4,020802,Spring,1970,02-11,06-19,266.000000
6,021502,Spring,1970,02-11,06-19,204.000000
7,021601,Spring,1970,02-11,06-19,413.000000
8,021702,Spring,1970,02-11,06-19,54.700001
11,022003,Spring,1970,02-11,06-19,309.000000
...,...,...,...,...,...,...
1990,023702,Spring,2023,02-11,06-19,84.209999
1991,024003,Spring,2023,02-11,06-19,208.600006
1992,024004,Spring,2023,02-11,06-19,17.190001
1996,024014,Spring,2023,02-11,06-19,405.500000


# CLass Local()

<a id='fa_init'></a>
Init local with a data Ds

In [57]:
from xhydro.frequency_analysis.local import Local

In [58]:
return_period = np.array([2, 5, 10, 20, 50, 100, 200, 1000, 2000, 10000])
dist_list = ['expon', 'gamma', 'genextreme', 'gennorm', 'gumbel_r', 'pearson3', 'weibull_min']

fa = Local(data_ds=xfa,
              return_period=return_period,
              dist_list=dist_list,
              tolerence=0.15,
              seasons=['Spring'],
              min_year=15,
              vars_of_interest=['max'])

In [59]:
fa.analyse_max

In [19]:
fa.view_values('max')

Unnamed: 0,id,season,time,start_date,end_date,streamflow
3,020802,Spring,1970,02-11,06-19,266.000000
5,021502,Spring,1970,02-11,06-19,204.000000
6,021601,Spring,1970,02-11,06-19,413.000000
7,021702,Spring,1970,02-11,06-19,54.700001
10,022003,Spring,1970,02-11,06-19,309.000000
...,...,...,...,...,...,...
1774,023432,Spring,2023,02-11,06-19,66.580002
1776,023702,Spring,2023,02-11,06-19,84.209999
1777,024003,Spring,2023,02-11,06-19,208.600006
1780,024014,Spring,2023,02-11,06-19,405.500000


In [20]:
fa.view_criterions('max')

Unnamed: 0,id,season,scipy_dist,value_criterions
0,020404,Spring,expon,"{'aic': 473.11780986942864, 'bic': 476.6402101..."
1,020404,Spring,gamma,"{'aic': 458.2985841195104, 'bic': 463.58218446..."
2,020404,Spring,genextreme,"{'aic': 457.5956462573534, 'bic': 462.87924660..."
3,020404,Spring,gennorm,"{'aic': 458.6848317031879, 'bic': 463.96843205..."
4,020404,Spring,gumbel_r,"{'aic': 455.63163990069773, 'bic': 459.1540401..."
...,...,...,...,...
226,024015,Spring,genextreme,"{'aic': 102.98921985069077, 'bic': 105.4888598..."
227,024015,Spring,gennorm,"{'aic': 107.10226730923392, 'bic': 109.6019073..."
228,024015,Spring,gumbel_r,"{'aic': 101.55648839938996, 'bic': 103.2229150..."
229,024015,Spring,pearson3,"{'aic': 95.20549363483296, 'bic': 97.705133667..."


In [21]:
fa.view_quantiles('max')

Unnamed: 0,id,season,scipy_dist,return_period,streamflow_quantiles
0,020404,Spring,expon,2.0,110.0
1,020502,Spring,expon,2.0,19.0
2,020602,Spring,expon,2.0,154.0
3,020802,Spring,expon,2.0,255.0
4,021407,Spring,expon,2.0,195.0
...,...,...,...,...,...
2305,024003,Spring,weibull_min,10000.0,528.0
2306,024007,Spring,weibull_min,10000.0,1180.0
2307,024013,Spring,weibull_min,10000.0,291.0
2308,024014,Spring,weibull_min,10000.0,1198.0
