<h1 style="font-size:2.5rem; color:green;"> This notebook will be used to calculate the monthly snow depth values at all stations: Ifrane, Midelt, Tichki, M'goun, Tizi Touzna and Oukaimeden.  </h1>  

<h1 style="font-size:2rem; color:green;"> Load required libraries </h1> 

In [1]:
# To reload external files automatically (ex: utils)
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import netCDF4 as nc
from netCDF4 import Dataset  
import datetime as dt
import calendar as cld
import matplotlib.pyplot as plt
import xarray as xr

<h1 style="font-size:2rem; color:green;"> Initialization of global variables </h1>

In [2]:
path_input =  r'C:\Data\Snow\SD\In-situ\modified_data\V1'
path_output = r'C:\Data\Snow\SD\In-situ\modified_data\monthly_values'
start_date = '2001-01-01'
end_date = '2021-12-31'
period = slice(start_date, end_date)
stations_labels = [ 'IFR', 'MID', 'TIC', 'MGN', 'TIZ', 'OUK']
stations_data = {key: 'data' for key in stations_labels}
lats = {'IFR':33.5,'MID':32.67,'TIC':31.53,'MGN':31.50,'TIZ':31.56,'OUK':31.20}
lons = {'IFR':-5.16,'MID':-4.74,'TIC':-6.30,'MGN':-6.45,'TIZ':-6.29,'OUK':-7.86}
datasets_data = {key: 'data' for key in stations_labels}

<h1 style="font-size:2rem; color:green;"> Importing and preparing data </h1>

In [3]:
for key in stations_labels:
    stations_data[key] = pd.read_csv(path_input + '/sd_'+key+'.csv')

In [4]:
stations_data

{'IFR':             date  sd_max
 0     01/01/2005     8.0
 1     02/01/2005     4.0
 2     03/01/2005     3.0
 3     04/01/2005     2.0
 4     05/01/2005     1.0
 ...          ...     ...
 6319  27/12/2021     0.0
 6320  28/12/2021     0.0
 6321  29/12/2021     0.0
 6322  30/12/2021     0.0
 6323  31/12/2021     0.0
 
 [6324 rows x 2 columns],
 'MID':             date  sd_max
 0     01/01/2005     0.0
 1     02/01/2005     0.0
 2     03/01/2005     0.0
 3     04/01/2005     0.0
 4     05/01/2005     0.0
 ...          ...     ...
 6319  27/12/2021     0.0
 6320  28/12/2021     0.0
 6321  29/12/2021     0.0
 6322  30/12/2021     0.0
 6323  31/12/2021     0.0
 
 [6324 rows x 2 columns],
 'TIC':             date   sd
 0     04/04/2001  0.0
 1     05/04/2001  0.0
 2     06/04/2001  0.0
 3     07/04/2001  0.0
 4     08/04/2001  0.0
 ...          ...  ...
 3533  31/12/2010  0.0
 3534  01/01/2011  0.0
 3535  02/01/2011  0.0
 3536  03/01/2011  0.0
 3537  04/01/2011  0.0
 
 [3538 rows x 2 colum

In [5]:
stations_data['IFR'] = stations_data['IFR'].rename(columns={'date':'date','sd_max':'sd'})
stations_data['MID'] = stations_data['MID'].rename(columns={'date':'date','sd_max':'sd'})

In [6]:
for key in stations_labels:
    stations_data[key]['date'] = pd.to_datetime(stations_data[key]['date'], yearfirst=True, dayfirst=True, errors='coerce')

In [7]:
stations_data

{'IFR':            date   sd
 0    2005-01-01  8.0
 1    2005-01-02  4.0
 2    2005-01-03  3.0
 3    2005-01-04  2.0
 4    2005-01-05  1.0
 ...         ...  ...
 6319 2021-12-27  0.0
 6320 2021-12-28  0.0
 6321 2021-12-29  0.0
 6322 2021-12-30  0.0
 6323 2021-12-31  0.0
 
 [6324 rows x 2 columns],
 'MID':            date   sd
 0    2005-01-01  0.0
 1    2005-01-02  0.0
 2    2005-01-03  0.0
 3    2005-01-04  0.0
 4    2005-01-05  0.0
 ...         ...  ...
 6319 2021-12-27  0.0
 6320 2021-12-28  0.0
 6321 2021-12-29  0.0
 6322 2021-12-30  0.0
 6323 2021-12-31  0.0
 
 [6324 rows x 2 columns],
 'TIC':            date   sd
 0    2001-04-04  0.0
 1    2001-04-05  0.0
 2    2001-04-06  0.0
 3    2001-04-07  0.0
 4    2001-04-08  0.0
 ...         ...  ...
 3533 2010-12-31  0.0
 3534 2011-01-01  0.0
 3535 2011-01-02  0.0
 3536 2011-01-03  0.0
 3537 2011-01-04  0.0
 
 [3538 rows x 2 columns],
 'MGN':            date    sd
 0    2001-10-08  0.20
 1    2001-10-09  0.21
 2    2001-10-10  0.22
 3  

In [8]:
for key in stations_labels:
    for l in range(len(stations_data[key])):  
        if str(stations_data[key]['date'][l])=='NaT':
            stations_data[key] = stations_data[key].drop(l, axis=0)

<div class="alert alert-block alert-success"; background-color:red> Important remark: The snow depth at the two stations 
        Ifrane and Midelt is a maximum daily depth and it is given in centimeters (cm). In the other stations, it is a
        daily average and it is given in meters (m). We will convert everything in centimeters.  </div> 

In [9]:
stations_data['TIC']['sd'] = stations_data['TIC']['sd']*100
stations_data['MGN']['sd'] = stations_data['TIC']['sd']*100
stations_data['TIZ']['sd'] = stations_data['TIC']['sd']*100
stations_data['OUK']['sd'] = stations_data['OUK']['sd']*100

In [10]:
for key in stations_labels:
    stations_data[key] = stations_data[key].set_index('date')

In [11]:
stations_data

{'IFR':              sd
 date           
 2005-01-01  8.0
 2005-01-02  4.0
 2005-01-03  3.0
 2005-01-04  2.0
 2005-01-05  1.0
 ...         ...
 2021-12-27  0.0
 2021-12-28  0.0
 2021-12-29  0.0
 2021-12-30  0.0
 2021-12-31  0.0
 
 [6209 rows x 1 columns],
 'MID':              sd
 date           
 2005-01-01  0.0
 2005-01-02  0.0
 2005-01-03  0.0
 2005-01-04  0.0
 2005-01-05  0.0
 ...         ...
 2021-12-27  0.0
 2021-12-28  0.0
 2021-12-29  0.0
 2021-12-30  0.0
 2021-12-31  0.0
 
 [6209 rows x 1 columns],
 'TIC':              sd
 date           
 2001-04-04  0.0
 2001-04-05  0.0
 2001-04-06  0.0
 2001-04-07  0.0
 2001-04-08  0.0
 ...         ...
 2010-12-31  0.0
 2011-01-01  0.0
 2011-01-02  0.0
 2011-01-03  0.0
 2011-01-04  0.0
 
 [3538 rows x 1 columns],
 'MGN':              sd
 date           
 2001-10-08  0.0
 2001-10-09  0.0
 2001-10-10  0.0
 2001-10-11  0.0
 2001-10-12  0.0
 ...         ...
 2008-03-21  0.0
 2008-03-22  0.0
 2008-03-23  0.0
 2008-03-24  0.0
 2008-03-25  0.0
 
 [

<h1 style="font-size:2rem; color:green;"> Add missing data and convert to xarray dataset </h1>

In [12]:
datetimes = pd.date_range(start=start_date, end=end_date)

In [13]:
datetimes

DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03', '2001-01-04',
               '2001-01-05', '2001-01-06', '2001-01-07', '2001-01-08',
               '2001-01-09', '2001-01-10',
               ...
               '2021-12-22', '2021-12-23', '2021-12-24', '2021-12-25',
               '2021-12-26', '2021-12-27', '2021-12-28', '2021-12-29',
               '2021-12-30', '2021-12-31'],
              dtype='datetime64[ns]', length=7670, freq='D')

In [14]:
for key in stations_labels:
    stations_data[key] = stations_data[key].reindex(datetimes, fill_value=np.nan)

In [15]:
stations_data

{'IFR':              sd
 2001-01-01  NaN
 2001-01-02  NaN
 2001-01-03  NaN
 2001-01-04  NaN
 2001-01-05  NaN
 ...         ...
 2021-12-27  0.0
 2021-12-28  0.0
 2021-12-29  0.0
 2021-12-30  0.0
 2021-12-31  0.0
 
 [7670 rows x 1 columns],
 'MID':              sd
 2001-01-01  NaN
 2001-01-02  NaN
 2001-01-03  NaN
 2001-01-04  NaN
 2001-01-05  NaN
 ...         ...
 2021-12-27  0.0
 2021-12-28  0.0
 2021-12-29  0.0
 2021-12-30  0.0
 2021-12-31  0.0
 
 [7670 rows x 1 columns],
 'TIC':             sd
 2001-01-01 NaN
 2001-01-02 NaN
 2001-01-03 NaN
 2001-01-04 NaN
 2001-01-05 NaN
 ...         ..
 2021-12-27 NaN
 2021-12-28 NaN
 2021-12-29 NaN
 2021-12-30 NaN
 2021-12-31 NaN
 
 [7670 rows x 1 columns],
 'MGN':             sd
 2001-01-01 NaN
 2001-01-02 NaN
 2001-01-03 NaN
 2001-01-04 NaN
 2001-01-05 NaN
 ...         ..
 2021-12-27 NaN
 2021-12-28 NaN
 2021-12-29 NaN
 2021-12-30 NaN
 2021-12-31 NaN
 
 [7670 rows x 1 columns],
 'TIZ':             sd
 2001-01-01 NaN
 2001-01-02 NaN
 2001-01-03 N

In [16]:
for key in stations_labels:
    aux = pd.DataFrame(columns=['lon','lat'])
    aux['lon'] = [lons[key]]*len(datetimes)
    aux['lat'] = [lats[key]]*len(datetimes)
    stations_data[key] = pd.concat([aux, stations_data[key].reset_index().rename(columns={'index	':'date'})], axis=1, join='inner')

In [17]:
for key in stations_labels:
    stations_data[key] = stations_data[key].rename(columns={'index':'date'})

In [18]:
for key in stations_labels:
    stations_data[key] = stations_data[key].set_index(['lon','lat','date'])

In [19]:
for key in stations_labels:
    datasets_data[key] = stations_data[key].to_xarray()

In [20]:
datasets_data

{'IFR': <xarray.Dataset>
 Dimensions:  (lon: 1, lat: 1, date: 7670)
 Coordinates:
   * lon      (lon) float64 -5.16
   * lat      (lat) float64 33.5
   * date     (date) datetime64[ns] 2001-01-01 2001-01-02 ... 2021-12-31
 Data variables:
     sd       (lon, lat, date) float64 nan nan nan nan nan ... 0.0 0.0 0.0 0.0,
 'MID': <xarray.Dataset>
 Dimensions:  (lon: 1, lat: 1, date: 7670)
 Coordinates:
   * lon      (lon) float64 -4.74
   * lat      (lat) float64 32.67
   * date     (date) datetime64[ns] 2001-01-01 2001-01-02 ... 2021-12-31
 Data variables:
     sd       (lon, lat, date) float64 nan nan nan nan nan ... 0.0 0.0 0.0 0.0,
 'TIC': <xarray.Dataset>
 Dimensions:  (lon: 1, lat: 1, date: 7670)
 Coordinates:
   * lon      (lon) float64 -6.3
   * lat      (lat) float64 31.53
   * date     (date) datetime64[ns] 2001-01-01 2001-01-02 ... 2021-12-31
 Data variables:
     sd       (lon, lat, date) float64 nan nan nan nan nan ... nan nan nan nan,
 'MGN': <xarray.Dataset>
 Dimensions:  (lo

In [21]:
variables_monthly_values = {key: 'ds' for key in stations_labels}

<h1 style="font-size:2rem; color:green;"> Computing monthly values </h1>

In [28]:
for key in stations_labels:
    aux =  datasets_data[key]
    year_month_idx = pd.MultiIndex.from_arrays([aux['sd']['date.year'].values, aux['sd']['date.month'].values])
    aux.coords['year_month'] = ('date', year_month_idx)
    
    days_with_data_monthly_values = aux['sd'].groupby('year_month').count(dim='date')
    days_with_data_monthly_values = days_with_data_monthly_values.rename('days_with_data')

    days_of_months_values = aux['sd'].fillna(9999).where(aux['sd'].fillna(9999) >-9999).groupby('year_month').count(dim='date')
    days_of_months_values = days_of_months_values.rename('days_of_month')

    days_without_snow_values = aux['sd'].where(aux['sd']==0).groupby('year_month').count(dim='date')
    days_without_snow_values = days_without_snow_values.rename('days_without_snow')

    number_of_days_with_snow_monthly_values = aux['sd'].fillna(-9999).where(aux['sd'].fillna(-9999)>0).groupby('year_month').count(dim='date')
    number_of_days_with_snow_monthly_values = number_of_days_with_snow_monthly_values.rename('days_with_snow')

    sd_variables_monthly_values = xr.merge([number_of_days_with_snow_monthly_values, days_without_snow_values, days_with_data_monthly_values, days_of_months_values])
    
    sd_variables_monthly_values['days_with_snow_15'] =  sd_variables_monthly_values['days_with_snow'].where(sd_variables_monthly_values['days_with_data']>=15) 
    sd_variables_monthly_values['days_without_snow_15'] =  sd_variables_monthly_values['days_without_snow'].where(sd_variables_monthly_values['days_with_data']>=15)
    sd_variables_monthly_values['ext_days_with_snow'] = sd_variables_monthly_values['days_with_snow'] + sd_variables_monthly_values['days_with_snow'] / sd_variables_monthly_values['days_with_data'] * (sd_variables_monthly_values['days_of_month'] - sd_variables_monthly_values['days_with_data'])
    sd_variables_monthly_values['ext_days_with_snow_15'] = sd_variables_monthly_values['days_with_snow_15'] + sd_variables_monthly_values['days_with_snow_15'] / sd_variables_monthly_values['days_with_data'] * (sd_variables_monthly_values['days_of_month'] - sd_variables_monthly_values['days_with_data'])

    time = []
    for item in sd_variables_monthly_values.year_month.values:
        if item[1]<10:
            time.append(str(item[0])+'-0'+str(item[1])+'-01')
        else:
            time.append(str(item[0])+'-'+str(item[1])+'-01')
            
    sd_variables_monthly_values = sd_variables_monthly_values.assign_coords(year_month=pd.DatetimeIndex(time))
    sd_variables_monthly_values = sd_variables_monthly_values.rename({'year_month':'time'})

    variables_monthly_values[key] = sd_variables_monthly_values


In [29]:
variables_monthly_values

{'IFR': <xarray.Dataset>
 Dimensions:                (lon: 1, lat: 1, time: 252)
 Coordinates:
   * lon                    (lon) float64 -5.16
   * lat                    (lat) float64 33.5
   * time                   (time) datetime64[ns] 2001-01-01 ... 2021-12-01
 Data variables:
     days_with_snow         (lon, lat, time) int32 0 0 0 0 0 0 0 ... 0 0 0 0 1 2
     days_without_snow      (lon, lat, time) int32 0 0 0 0 0 0 ... 31 30 31 29 29
     days_with_data         (lon, lat, time) int32 0 0 0 0 0 0 ... 31 30 31 30 31
     days_of_month          (lon, lat, time) int32 31 28 31 30 31 ... 30 31 30 31
     days_with_snow_15      (lon, lat, time) float64 nan nan nan ... 0.0 1.0 2.0
     days_without_snow_15   (lon, lat, time) float64 nan nan nan ... 29.0 29.0
     ext_days_with_snow     (lon, lat, time) float64 nan nan nan ... 0.0 1.0 2.0
     ext_days_with_snow_15  (lon, lat, time) float64 nan nan nan ... 0.0 1.0 2.0,
 'MID': <xarray.Dataset>
 Dimensions:                (lon: 1, lat: 

<h1 style="font-size:2rem; color:green;"> Storing data to netcdf files </h1>

In [30]:
for key in stations_labels:  
    variables_monthly_values[key].to_netcdf(path_output+'/'+key+'_in_situ.nc')