In [1]:
import numpy as np 
import pandas as pd 
import netCDF4 as nc 
from netCDF4 import Dataset 
import matplotlib.pyplot as plt 
import xarray as xr
import datetime  
from os import listdir
import os

In [2]:
# ## Set variables
# Setting latitudes and longitudes boundaries
latlim, lonlim = slice(10, 34.5), slice(-10,-2)    

# Month labels, and we difine also a months_labels_without_july that will be served for preparing a dataset which has a 
# problem with july month. 
months_labels = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]  
months_labels_without_july = ["01", "02", "03", "04", "05", "06", "08", "09", "10", "11", "12"]

# Setting threshold that will be used for number of days with snow deduced from snow cover fraction, and it will be used 
# also to filter small values of scf (Snow Cover Fraction) less than 1% while computing monthly value of scf.  
prod = 'ERA5'
par = 'sd'

# Path of input data
path_ERA5_LAND = '/home/hchaabani/Data/Snow/SNW/Reanalysis/ERA-LAND/raw_data'
path_ERA5 = '/home/hchaabani/Data/Snow/SNW/Reanalysis/ERA5/raw_data'

input_paths = {'ERA5':path_ERA5, 'ERA_LAND': path_ERA5_LAND}

data = xr.open_mfdataset(input_paths[prod] +'/*.nc').load()
#data = data.sel(time=slice('1982-01-01', '1990-12-31'))
data = data.rename({'longitude':'lon','latitude':'lat','sd':'sd'})
    
# data.coords['lon'] = ('lon', (data.lon.values + 180)%360 - 180)

In [3]:
for threshold in ['0.002']: #'0.03','0.05','0.07','0.09','0.11']:
    
    # Directory path served to store data
    output_dir_era_land = '/home/hchaabani/Data/Snow/SNW/Reanalysis/ERA-LAND/modified_data/monthly_values/monthly_values_threshold_'+threshold
    output_dir_era5 = '/home/hchaabani/Data/Snow/SNW/Reanalysis/ERA5/modified_data/monthly_values/monthly_values_threshold_'+threshold
    output_paths = {'ERA5':output_dir_era5, 'ERA_LAND': output_dir_era_land}
    
    year_month_idx = pd.MultiIndex.from_arrays([data['time.year'].values, data['time.month'].values])
    data.coords['year_month'] = ('time', year_month_idx)

    days_with_data_monthly_values = data[par].fillna(-9999).where(data[par].fillna(-9999)>-9999).groupby('year_month').count(dim='time')
    days_with_data_monthly_values = days_with_data_monthly_values.rename('days_with_data')

    days_of_months_values = data[par].fillna(9999).where(data[par].fillna(205) >-9999).groupby('year_month').count(dim='time')
    days_of_months_values = days_of_months_values.rename('days_of_month')   

    days_without_snow_values = data[par].where(data[par]<=float(threshold)).groupby('year_month').count(dim='time')
    days_without_snow_values = days_without_snow_values.rename('days_without_snow')

    number_of_days_with_snow_monthly_values = data[par].fillna(-9999).where(data[par].fillna(-9999)>float(threshold)).groupby('year_month').count(dim='time')
    number_of_days_with_snow_monthly_values = number_of_days_with_snow_monthly_values.rename('days_with_snow')

    monthly_values = data[par].where(data[par].fillna(-9999)>=float(threshold)).groupby('year_month').mean(dim='time')
    monthly_values = monthly_values.rename(par+'_monthly_value')

    ERA5_variables_monthly_values = xr.merge([monthly_values, number_of_days_with_snow_monthly_values, days_without_snow_values, days_with_data_monthly_values, days_of_months_values])

    ERA5_variables_monthly_values[par+'_monthly_value_15'] = ERA5_variables_monthly_values[par+'_monthly_value'].where(ERA5_variables_monthly_values['days_with_data']>=15)
    ERA5_variables_monthly_values['days_with_snow_15'] = ERA5_variables_monthly_values['days_with_snow'].where(ERA5_variables_monthly_values['days_with_data']>=15)
    ERA5_variables_monthly_values['days_without_snow_15'] = ERA5_variables_monthly_values['days_without_snow'].where(ERA5_variables_monthly_values['days_with_data']>=15)
    ERA5_variables_monthly_values['ext_days_with_snow'] = ERA5_variables_monthly_values['days_with_snow'] + ERA5_variables_monthly_values['days_with_snow'] / ERA5_variables_monthly_values['days_with_data'] * (ERA5_variables_monthly_values['days_of_month'] - ERA5_variables_monthly_values['days_with_data'])
    ERA5_variables_monthly_values['ext_days_with_snow_15'] = ERA5_variables_monthly_values['days_with_snow_15'] + ERA5_variables_monthly_values['days_with_snow_15'] / ERA5_variables_monthly_values['days_with_data'] * (ERA5_variables_monthly_values['days_of_month'] - ERA5_variables_monthly_values['days_with_data'])

    time = []
    for item in ERA5_variables_monthly_values.year_month.values:
        if item[1]<10:
            time.append(str(item[0])+'-0'+str(item[1])+'-01')
        else:
            time.append(str(item[0])+'-'+str(item[1])+'-01')

    ERA5_variables_monthly_values = ERA5_variables_monthly_values.assign_coords(year_month=pd.DatetimeIndex(time))
    ERA5_variables_monthly_values = ERA5_variables_monthly_values.rename({'year_month':'time'})
    ERA5_variables_monthly_values.to_netcdf(output_paths[prod] +'/'+par+'_ERA5_variables_monthly_values_threshold_'+threshold+'.nc')
    
    print('Done for threshold: ' + threshold)

Done for threshold: 0.002
