In [1]:
#!/usr/bin/python
#import wradlib as wrl
import pylab as pl
from glob import glob
import warnings
warnings.filterwarnings('ignore')
try:
    get_ipython().magic("matplotlib inline")
except:
    pl.ion()
import numpy as np

import csv
import os
import datetime
import pandas as pd

#shapefile: GIS vector data format (ESRI)
import shapefile as shp  # Requires the pyshp package
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib import ticker
import netCDF4 as nc4
from datetime import date,timedelta
import plotly.graph_objs as go
import matplotlib.lines as mlines
import seaborn as sns

import importlib
import geopandas as gpd
import xarray as xr
import cartopy.crs as ccrs  # Projections list

import _pickle as pickle

#import external functions (need pip install ipynb)
import ipynb.fs.full.HaileventsFunctions as HF

## Notebook for filtering OT data with Reanalysis proxies: 

**Structure of the filter:**

- Read OT data, hourly aggregate, spatial domain over SPHERA, parallax correction, write them in xarray and then to geodataframe
- **Temporal window**: from ot timing extract the last **4h** of SPHERA proxies and aggregate them to 1 geodataframe (extracting max/min values over the 4 temporal steps) (Generally 4 hours could be enough for dynamical ambient conditions but not always for thermodynamical conditions (particularly in the morning hours) especially in rapidly evolving situations such as advection of unstable air masses or frontal systems (see Kunz et al. 2020 for details))
Kris and Michael suggestion: use **3h**! 
- **Spatial window**: consider a neighbourhood of 7x7 (upscaled) SPHERA grid cells (circa 70km) around the OT detection
- **Threshold values**: check if SPHERA proxies, aggregated in 4h time and over the spatial window, are below or above certain thresholds (values defined statistically similarly to Punge 2017 and compared to literature ), for the moment considered 2-98percentiles as thresholds, <span style="color:red"> NOW LETS TRY WITH 5-95perc! </span>
- **Filtering OTs**: apply the procedure to every OT of the considered hourly dataset and filter out irrelevant OTs, then compare before and after

THEN

- Extend process to **every OT hour of the day** of an event: building a dictionary of geodataframes for OT data for every hour before and after filtering to compare

AND THEN

- Extend to the **whole set of events**

In [2]:
#Preliminar parameters:

#Folder address containing data:
fold = '/home/ciccuz/phd/KIT/hail_data/'

#lat/lon coords to cover the whole SPHERA domain:
lonmin_S=6; lonmax_S=19; latmin_S=35; latmax_S=49
latplot_S=[latmin_S,latmax_S]; lonplot_S=[lonmin_S,lonmax_S]

#With geopandas:
sfg_ita = gpd.read_file(fold + "hail4_punge/geodata/ITA_adm/ITA_adm0.shp", encoding='latin-1')
sfg_deu = gpd.read_file(fold + "hail4_punge/geodata/DEU_adm/DEU_adm0.shp", encoding='latin-1')
sfg_aut = gpd.read_file(fold + "hail4_punge/geodata/AUT_adm/AUT_adm0.shp", encoding='latin-1')
sfg_che = gpd.read_file(fold + "hail4_punge/geodata/CHE_adm/CHE_adm0.shp", encoding='latin-1')
sfg_cro = gpd.read_file(fold + "hail4_punge/geodata/Croatia/mj788hg8036.shp", encoding='latin-1')
sfg_slo = gpd.read_file(fold + "hail4_punge/geodata/Slovenia/gk259nt7452.shp", encoding='latin-1')
sfg_hun = gpd.read_file(fold + "hail4_punge/geodata/Hungary/qh513vs8921.shp", encoding='latin-1')


#maps limits whole SPHERA domain
xlim=lonplot_S
ylim=latplot_S

#maps limits for Graz event (sample case):
xlimG = [12,17]
ylimG = [46,49]

clonsel, clatsel, csizesel, cnamesel, ckindsel = HF.HF_cities(latmin_S,latmax_S,lonmin_S,lonmax_S)

#geographic shapefiles
sf_geo = [sfg_ita,sfg_aut]

#read shapefile of SPHERA 10km grid:
sf_sp = gpd.read_file(fold + "data/lampinet/mask/grid_10_49N.shp", encoding='latin-1')

#For considering CAPE as plotting (for example):
sp_par = "CAPE_MU"
col_num = 6
cbar_ext = 'max'
un_meas = '[J/kg]'

In [32]:
dtime = datetime.datetime.strptime('2016-06-30','%Y-%m-%d')

In [33]:
dtime

datetime.datetime(2016, 6, 30, 0, 0)

In [34]:
dtime-timedelta(days=1)

datetime.datetime(2016, 6, 29, 0, 0)

In [7]:
#EVENT CASE:

"""
-----------------------------------------
DONE
-----------------------------------------
"""

"""
7 April 2016 (Condove - Torino)
"""
day=2016098; dtime=datetime.datetime.strptime('2016-04-07','%Y-%m-%d')

"""
8 April 2016 (Condove - Torino)
"""
#day=2016099; dtime=datetime.datetime.strptime('2016-04-08','%Y-%m-%d')

"""
18 June 2016 (Asiago - Veneto)
"""
#day=2016170; dtime=datetime.datetime.strptime('2016-06-18','%Y-%m-%d')

"""
24 June 2016 (Dettingen - S Germany)
"""
#day=2016176; dtime=datetime.datetime.strptime('2016-06-24','%Y-%m-%d')

"""
5 August 2016 (Mondolfo (PU) - Marche)
"""
#day=2016218; dtime=datetime.datetime.strptime('2016-08-05','%Y-%m-%d') 

"""
16 August 2016 (Albenga - Savona)
"""
#day=2016229; dtime=datetime.datetime.strptime('2016-08-16','%Y-%m-%d')

"""
29 August 2016 (Torino + Graz)
"""
#day=2016242; dtime=datetime.datetime.strptime('2016-08-29','%Y-%m-%d') 

"""
15 October 2016 (Casalbordino (Chieti) - Abruzzo)
"""
#day=2016289; dtime=datetime.datetime.strptime('2016-10-15','%Y-%m-%d') 

"""
4 May 2017 (Treviso - Veneto)
"""
#day=2017124; dtime=datetime.datetime.strptime('2017-05-04','%Y-%m-%d') 

"""
14 May 2017 (Veneto - Nord-est Italy)
"""
#day=2017134; dtime=datetime.datetime.strptime('2017-05-14','%Y-%m-%d') 

"""
25 June 2017 (Nord-est Italy)
"""
#day=2017176; dtime=datetime.datetime.strptime('2017-06-25','%Y-%m-%d') 

"""
27 June 2017 (Asti)
"""
#day=2017178; dtime=datetime.datetime.strptime('2017-06-27','%Y-%m-%d')  

"""
28 June 2017 (Nord Italy)
"""
#day=2017179; dtime=datetime.datetime.strptime('2017-06-28','%Y-%m-%d')  

"""
29 July 2017 (Pontenure - Piacenza - Emilia Romagna)
"""
#day=2017210; dtime=datetime.datetime.strptime('2017-07-29','%Y-%m-%d')  

"""
9-10 August 2017 (Trentino Sudtirol)
"""
#day=2017221; dtime=datetime.datetime.strptime('2017-08-09','%Y-%m-%d')
#day=2017222; dtime=datetime.datetime.strptime('2017-08-10','%Y-%m-%d') 

"""
18 August 2017 (Bohringer - South Germany)
"""
#day=2017230; dtime=datetime.datetime.strptime('2017-08-18','%Y-%m-%d')  

"""
4 June 2018 (Noceto - Parma - Emilia Romagna)
"""
#day=2018155; dtime=datetime.datetime.strptime('2018-06-04','%Y-%m-%d')

"""
23 July 2018 (Turi - Bari)
"""
#day=2018204; dtime=datetime.datetime.strptime('2018-07-23','%Y-%m-%d')

"""
2 September 2018 (Pescara)
"""
#day=2018245; dtime=datetime.datetime.strptime('2018-09-02','%Y-%m-%d') 

"""
7 September 2018 (Carapelle/Orta Nova - Foggia)
"""
#day=2018250; dtime=datetime.datetime.strptime('2018-09-07','%Y-%m-%d') 

"""
7 October 2018 (Melpignano - Lecce)
"""
#day=2018280; dtime=datetime.datetime.strptime('2018-10-07','%Y-%m-%d') 

"""
29 October 2018 (Sardinia)
"""
#day=2018302; dtime=datetime.datetime.strptime('2018-10-29','%Y-%m-%d') 

latplot=latplot_S; lonplot=lonplot_S;

In [6]:
#SETUP OT parameters:

if dtime.year == 2018:
    otpref='NASALARC_MET11_SEVIRI_OTDETECTION_'     #2018

else:
    otpref='NASALARC_MET10_SEVIRI_OTDETECTION_'     #2016 2017

otsep = '_'
othstart = '0000'
#names for the OT data files used to read them
otdir=fold + f'data/OT_SEVIRI_data/{day}/';
otposf='.nc';

ncfile=otdir+otpref+str(day)+otsep+othstart+otposf;

In [7]:
#SETUP THRESHOLDS FOR SPHERA PROXIES (percentiles of param distributions in presence of ESWD report)

#SET WHEN CONSIDERING TEMP WINDOW = 4H, 2-98 perc thresholds
"""
t_CAPE = 532.15
t_K = 30.2
t_LI = -1.38
t_DLS = 7.52
t_H0 = 4100
"""

#SET WHEN CONSIDERING TEMP WINDOW = 3H,2-98 perc thresholds
#t_CAPE = 505.32
#t_K = 30.2
#t_LI = -1.27
#t_DLS = 7.4
#t_H0 = 4104.5

#SET WHEN CONSIDERING TEMP WINDOW = 3H,5-95 perc thresholds
t_CAPE = 618.42
t_K = 32.5
t_LI = -2.08
t_DLS = 9.25
t_H0 = 4039

## - Read OT data, h-aggregate, limit spat. domain, parallax corr, write geodataframe

In [7]:
#read original OT data:
cpclat, cpclon, otpmax, dtmin, ot_timing = HF.HF_OTdata(day,hhmin,hhmax,otpref)

NameError: name 'hhmin' is not defined

In [12]:
gdf_OT = HF.HF_OTdata_2_gdf(cpclat,cpclon,otpmax,ot_timing)
gdf_OT

NameError: name 'cpclat' is not defined

## - Temporal window

from ot timing extract the last 4h of SPHERA proxies and aggregate them to 1 geodataframe (extracting max/min values over the 4 temporal steps)

#read daily sphera data
df_sp = HF.HF_reanProxies_read(year_u, mon_u, day_u)            #of the day of the OT
df_sp_Dbefore = HF.HF_reanProxies_read(year_u, mon_u, day_u-1)  #of the day before when OT event is at 0,1,2 am

#build sphera geodataframe based on the temporal aggregation over last 4 hours before OTs time of occurrence in gdf_OT
gdf_sp_4OTh = HF.HF_rean_tAgg_OTh(gdf_OT, df_sp, sf_sp)
gdf_sp_4OTh

## - Spatial window

consider a neighbourhood of 7x7 SPHERA (temporally-aggregated) grid cells around every OT detection

<span style="color:red"> This step is particularly computing-demanding, should try to optimize the code! </span>
Check here: https://gis.stackexchange.com/questions/357598/fast-way-of-querying-existence-of-a-point-between-two-dataframes

Provato con dataframe, check

"""
Function for extracting the spatial windows around every SPHERA cell containing at least 1 OT detection, write a
dictionary of geodataframes containing all the cells in the neighbourhood of OT detections
"""

def HF_rean_spatWindow_OLDVERSION(gdf_OT, gdf_sp_4OTh):
    
    # list of SPHERA cells containing the OT detections:
    S_OTcells = []
    #S_OTcells = pd.DataFrame(columns=gdf_sp_4OTh.columns)

    #loop to identify which SPHERA grid contains each OT point
    for point in gdf_OT.reset_index(drop=True).geometry:

        for s_cell in gdf_sp_4OTh.geometry:    #.index

            if point.within(s_cell):       #gdf_sp_4OTh['geometry'][s_cell]
                
                """
                I THINK THE FOLLOWING IS THE MOST HEAVY PIECE OF CODE
                since it is a crossed check of among all S_OTcells!
                (maybe could drop all non unique cells in the end with one command instead of a loop! should try)
                """
                #condition for not repeating the same cell more than once:
                if not any(p.equals(s_cell) for p in S_OTcells):

                    S_OTcells.append(s_cell)
                    
                #S_OTcells.loc[s_cell] = gdf_sp_4OTh.loc[s_cell]
                           
    #Select for every S_OTcell the spatial neighbourhood of 7x7=49 grid cells around it (res. of approx 0.63° 70km):
    #write sub-geodataframes in a dictionary
    dgdf_sp_4OTh_nn = dict()

    #Loop to apply it to every cell containing at least 1 OT detection:
    for cel in S_OTcells:   #.geometry
        #identify cell
        sp_cel = gdf_sp_4OTh[gdf_sp_4OTh.geometry ==  cel]

        #select 70km-nearest neighbourhood (nn) around the cell (the 48+1(itself) grid  cells having the smallest dist.):
        nn_ind = gdf_sp_4OTh.geometry.distance(cel).sort_values()[:49].index
        dgdf_sp_4OTh_nn[int(sp_cel.index.values)] = gdf_sp_4OTh.loc[nn_ind]
        
    return dgdf_sp_4OTh_nn

#build dictionary of temporally-aggregated geodataframes of spatial windows of params around every SPHERA cell 
#containing at least 1 OT detection
dgdf_sp_4OTh_nn = HF.HF_rean_spatWindowOT(gdf_OT, gdf_sp_4OTh)

fig, ax = plt.subplots(figsize = (20,12))

ax.set_ylim(ylim)
ax.set_xlim(xlim)

#grid shapefile
sf_sp.plot(ax=ax, alpha = 0.5, facecolor = 'none', lw = 0.1, linestyle='--', zorder=2)

#geographic shapefiles
sf_geo[0].plot(ax=ax, alpha = 0.5, facecolor = 'none', lw = 0.5, zorder=2)  #ita shapefile
if len(sf_geo) > 1:
    sf_geo[1].plot(ax=ax, alpha = 0.5, facecolor = 'none', lw = 0.5, zorder=2)  #other shapefile

#plot geodataframe of SPHERA data with color_ton as colorcoding

for S_OTcell in dgdf_sp_4OTh_nn.keys():
    
    color_ton, bins, colors = HF.HF_calc_color(dgdf_sp_4OTh_nn[S_OTcell]["CAPE_MU"], color=6)
    
    dgdf_sp_4OTh_nn[S_OTcell].plot(ax=ax, color=color_ton)

gdf_OT.plot(ax=ax, markersize=10, figsize=(12, 8), column='otpmax', cmap='Greens')

#add colorbar
cmap_sp_par = matplotlib.colors.ListedColormap(sns.color_palette(colors).as_hex())
norm = matplotlib.colors.BoundaryNorm(bins, cmap_sp_par.N, extend=cbar_ext)

img = plt.imshow([bins], cmap=cmap_sp_par, norm=norm)
img.set_visible(False)

cb_sp_par=plt.colorbar(orientation='vertical', spacing='proportional', norm=norm,  pad=0.025, shrink=0.8);
cb_sp_par.ax.set_yticklabels(bins, fontsize=13)
cb_sp_par.ax.set_ylabel(f'{sp_par}  {un_meas}', fontsize=15);

#plt.savefig('/home/ciccuz/phd/KIT/scripts/plots/OT_filter_construction/spatMatch_sphera_OT_Sneighbourhood_all_ex2.png', 
#            dpi=300, bbox_inches='tight');

## - Filtering

For every subset of SPHERA data in dgdf_sp_4OTh_nn apply procedure based on proxies thresholds to discard or keep OT detections in gdf_OT 

Thresholds have been defined in DatasetExploration jupyter-notebook and checked on literature

The design of the filter states that at least 1 of the 49 cells forming the neighbourhood must fullfill all the 5 thresholds for the parameters (not necessarily all in the same cell!). <span style="color:red"> This should be equivalent to extracting the max/min of the parameters in the neighbourhood and then verify it against the thresholds (similarly to what done for estimating SPHERA parameters ESWD-based distributions), is this true?? </span>

FILT_gdf_OT = HF.HF_OTfilter(gdf_OT, dgdf_sp_4OTh_nn, t_CAPE, t_K, t_LI, t_DLS, t_H0)

fig, ax = plt.subplots(figsize = (20,12))

ax.set_ylim(ylim)
ax.set_xlim(xlim)

#grid shapefile
sf_sp.plot(ax=ax, alpha = 0.5, facecolor = 'none', lw = 0.1, linestyle='--', zorder=2)

#geographic shapefiles
sf_geo[0].plot(ax=ax, alpha = 0.5, facecolor = 'none', lw = 0.5, zorder=2)  #ita shapefile
if len(sf_geo) > 1:
    sf_geo[1].plot(ax=ax, alpha = 0.5, facecolor = 'none', lw = 0.5, zorder=2)  #other shapefile

#plot geodataframe of SPHERA data with color_ton as colorcoding

gdf_OT.plot(ax=ax, markersize=10, figsize=(12, 8), column='otpmax', cmap='Greens')
FILT_gdf_OT.plot(ax=ax, markersize=10, figsize=(12, 8), column='otpmax', cmap='Reds')

#plt.savefig('/home/ciccuz/phd/KIT/scripts/plots/OT_filter_construction/spatMatch_sphera_OT_Sneighbourhood_all_ex2.png', 
#            dpi=300, bbox_inches='tight');

## - Extend to the whole day of event:

Do the process for every hour of the day and store all filtered OT data

In [8]:
hhmin=0
hhmax=23

#read daily sphera data
df_sp = HF.HF_reanProxies_read(dtime)
df_sp_Dbefore = HF.HF_reanProxies_read(dtime-timedelta(days=1)) 

#dictionary to contain hourly geodataframes of unfiltered and filtered OTs:
dORIG_gdf_OT_daily = dict()
dFILT_gdf_OT_daily = dict()

In [88]:
for hh in np.arange(hhmin,hhmax+1,1):
    print(f'hour = {hh}')
    #read original OT data:
    cpclat, cpclon, otpmax, dtmin, ot_timing = HF.HF_OTdata(day,hh,hh+1,otpref)
    print('step1 - read OTs')
    
    #condition: if there aren't OTs detected in the current hour (i.e. len(ot_timing=0)) skip to next hour:
    if len(ot_timing) == 0:
        continue
    
    #convert to geodataframes
    gdf_OT = HF.HF_OTdata_2_gdf(cpclat,cpclon,otpmax,ot_timing)
    
    if len(gdf_OT) == 0:
        continue
        
    dORIG_gdf_OT_daily[gdf_OT['time'].iloc[0]] = gdf_OT
    print('step2 - convert to gdf')
    
    #condition to exit loop if there are no OTs detected in the last hour:
    if len(gdf_OT) == 0:
        dFILT_gdf_OT_daily[gdf_OT['time'].iloc[0]] = gpd.GeoDataFrame()
        continue
    else:
        
        #extract SPHERA data aggregated over the last 3 hours before including hhmin
        gdf_sp_3OTh = HF.HF_rean_tAgg_OTh(gdf_OT, df_sp, df_sp_Dbefore, sf_sp)
        print('step3 - temporal aggregation of SPHERA data')
    
        #extract the spatial windows around every SPHERA cell containing at least 1 OT detection
        dgdf_sp_3OTh_nn = HF.HF_rean_spatWindowOT(gdf_OT, gdf_sp_3OTh)
        print('step4 - spatial neighbourhood of SPHERA data')
        
        #filter OT data:
        FILT_gdf_OT = HF.HF_OTfilter(gdf_OT, dgdf_sp_3OTh_nn, t_CAPE, t_K, t_LI, t_DLS, t_H0)
        dFILT_gdf_OT_daily[gdf_OT['time'].iloc[0]] = FILT_gdf_OT  #assign time to dataset nested in dictionary

hour = 0
step1 - read OTs
step2 - convert to gdf
step3 - temporal aggregation of SPHERA data
step4 - spatial neighbourhood of SPHERA data
Filtered 1 OTs
Filtered 4 OTs
Filtered 6 OTs
Filtered 1 OTs
1 OTs kept!
2 OTs kept!
4 OTs kept!
1 OTs kept!
2 OTs kept!
1 OTs kept!
2 OTs kept!
2 OTs kept!
1 OTs kept!
6 OTs kept!
2 OTs kept!
5 OTs kept!
1 OTs kept!
4 OTs kept!
4 OTs kept!
2 OTs kept!
1 OTs kept!
6 OTs kept!
2 OTs kept!
2 OTs kept!
1 OTs kept!
2 OTs kept!
4 OTs kept!
2 OTs kept!
5 OTs kept!
1 OTs kept!
7 OTs kept!
2 OTs kept!
4 OTs kept!
1 OTs kept!
7 OTs kept!
2 OTs kept!
2 OTs kept!
3 OTs kept!
2 OTs kept!
9 OTs kept!
6 OTs kept!
9 OTs kept!
7 OTs kept!
3 OTs kept!
7 OTs kept!
1 OTs kept!
2 OTs kept!
1 OTs kept!
3 OTs kept!
4 OTs kept!
4 OTs kept!
4 OTs kept!
6 OTs kept!
3 OTs kept!
1 OTs kept!
hour = 1
step1 - read OTs
step2 - convert to gdf
step3 - temporal aggregation of SPHERA data
step4 - spatial neighbourhood of SPHERA data
Filtered 2 OTs
Filtered 3 OTs
Filtered 1 OTs
Filtere

5 OTs kept!
6 OTs kept!
7 OTs kept!
5 OTs kept!
5 OTs kept!
4 OTs kept!
3 OTs kept!
Filtered 4 OTs
1 OTs kept!
5 OTs kept!
Filtered 7 OTs
Filtered 2 OTs
Filtered 2 OTs
Filtered 1 OTs
1 OTs kept!
5 OTs kept!
Filtered 1 OTs
1 OTs kept!
1 OTs kept!
6 OTs kept!
1 OTs kept!
3 OTs kept!
2 OTs kept!
5 OTs kept!
2 OTs kept!
1 OTs kept!
1 OTs kept!
3 OTs kept!
1 OTs kept!
7 OTs kept!
2 OTs kept!
Filtered 1 OTs
Filtered 2 OTs
Filtered 3 OTs
Filtered 1 OTs
4 OTs kept!
5 OTs kept!
4 OTs kept!
4 OTs kept!
Filtered 7 OTs
Filtered 6 OTs
Filtered 2 OTs
Filtered 1 OTs
Filtered 6 OTs
Filtered 4 OTs
Filtered 4 OTs
9 OTs kept!
Filtered 5 OTs
Filtered 2 OTs
Filtered 2 OTs
1 OTs kept!
5 OTs kept!
4 OTs kept!
3 OTs kept!
Filtered 4 OTs
Filtered 2 OTs
2 OTs kept!
2 OTs kept!
Filtered 4 OTs
Filtered 5 OTs
Filtered 8 OTs
Filtered 5 OTs
3 OTs kept!
1 OTs kept!
2 OTs kept!
8 OTs kept!
3 OTs kept!
2 OTs kept!
3 OTs kept!
Filtered 2 OTs
Filtered 2 OTs
Filtered 2 OTs
Filtered 5 OTs
Filtered 4 OTs
5 OTs kept!
2 OTs k

6 OTs kept!
2 OTs kept!
2 OTs kept!
8 OTs kept!
1 OTs kept!
4 OTs kept!
4 OTs kept!
3 OTs kept!
2 OTs kept!
1 OTs kept!
2 OTs kept!
4 OTs kept!
2 OTs kept!
2 OTs kept!
4 OTs kept!
6 OTs kept!
5 OTs kept!
1 OTs kept!
2 OTs kept!
3 OTs kept!
1 OTs kept!
5 OTs kept!
5 OTs kept!
7 OTs kept!
1 OTs kept!
6 OTs kept!
1 OTs kept!
1 OTs kept!
2 OTs kept!
1 OTs kept!
1 OTs kept!
2 OTs kept!
3 OTs kept!
5 OTs kept!
3 OTs kept!
4 OTs kept!
3 OTs kept!
4 OTs kept!
3 OTs kept!
1 OTs kept!
4 OTs kept!
6 OTs kept!
7 OTs kept!
3 OTs kept!
1 OTs kept!
3 OTs kept!
1 OTs kept!
5 OTs kept!
5 OTs kept!
1 OTs kept!
4 OTs kept!
4 OTs kept!
1 OTs kept!
2 OTs kept!
2 OTs kept!
6 OTs kept!
4 OTs kept!
6 OTs kept!
1 OTs kept!
2 OTs kept!
4 OTs kept!
2 OTs kept!
6 OTs kept!
4 OTs kept!
2 OTs kept!
3 OTs kept!
3 OTs kept!
1 OTs kept!
4 OTs kept!
2 OTs kept!
2 OTs kept!
1 OTs kept!
2 OTs kept!
4 OTs kept!
1 OTs kept!
2 OTs kept!
1 OTs kept!
1 OTs kept!
1 OTs kept!
1 OTs kept!
1 OTs kept!
1 OTs kept!
2 OTs kept!
hour

3 OTs kept!
2 OTs kept!
2 OTs kept!
7 OTs kept!
1 OTs kept!
4 OTs kept!
3 OTs kept!
1 OTs kept!
2 OTs kept!
4 OTs kept!
1 OTs kept!
2 OTs kept!
1 OTs kept!
2 OTs kept!
2 OTs kept!
2 OTs kept!
1 OTs kept!
2 OTs kept!
1 OTs kept!
2 OTs kept!
7 OTs kept!
1 OTs kept!
4 OTs kept!
1 OTs kept!
4 OTs kept!
8 OTs kept!
6 OTs kept!
6 OTs kept!
2 OTs kept!
4 OTs kept!
4 OTs kept!
2 OTs kept!
hour = 14
step1 - read OTs
step2 - convert to gdf
step3 - temporal aggregation of SPHERA data
step4 - spatial neighbourhood of SPHERA data
Filtered 1 OTs
Filtered 2 OTs
Filtered 4 OTs
Filtered 3 OTs
4 OTs kept!
2 OTs kept!
1 OTs kept!
Filtered 1 OTs
Filtered 5 OTs
Filtered 3 OTs
Filtered 5 OTs
Filtered 4 OTs
2 OTs kept!
1 OTs kept!
2 OTs kept!
1 OTs kept!
3 OTs kept!
3 OTs kept!
2 OTs kept!
3 OTs kept!
1 OTs kept!
8 OTs kept!
3 OTs kept!
4 OTs kept!
2 OTs kept!
5 OTs kept!
4 OTs kept!
4 OTs kept!
4 OTs kept!
1 OTs kept!
1 OTs kept!
3 OTs kept!
1 OTs kept!
1 OTs kept!
3 OTs kept!
6 OTs kept!
1 OTs kept!
4 OTs 

4 OTs kept!
6 OTs kept!
8 OTs kept!
6 OTs kept!
2 OTs kept!
3 OTs kept!
4 OTs kept!
6 OTs kept!
1 OTs kept!
5 OTs kept!
1 OTs kept!
5 OTs kept!
3 OTs kept!
2 OTs kept!
1 OTs kept!
hour = 16
step1 - read OTs
step2 - convert to gdf
step3 - temporal aggregation of SPHERA data
step4 - spatial neighbourhood of SPHERA data
Filtered 4 OTs
4 OTs kept!
Filtered 1 OTs
Filtered 2 OTs
Filtered 2 OTs
Filtered 6 OTs
Filtered 8 OTs
Filtered 5 OTs
Filtered 3 OTs
Filtered 1 OTs
Filtered 1 OTs
Filtered 2 OTs
Filtered 6 OTs
Filtered 6 OTs
Filtered 4 OTs
Filtered 6 OTs
4 OTs kept!
Filtered 1 OTs
1 OTs kept!
Filtered 3 OTs
Filtered 5 OTs
Filtered 2 OTs
3 OTs kept!
1 OTs kept!
3 OTs kept!
Filtered 5 OTs
3 OTs kept!
1 OTs kept!
Filtered 2 OTs
Filtered 2 OTs
Filtered 1 OTs
Filtered 3 OTs
Filtered 3 OTs
Filtered 8 OTs
Filtered 1 OTs
Filtered 1 OTs
Filtered 6 OTs
Filtered 2 OTs
4 OTs kept!
5 OTs kept!
4 OTs kept!
1 OTs kept!
7 OTs kept!
8 OTs kept!
2 OTs kept!
6 OTs kept!
8 OTs kept!
7 OTs kept!
7 OTs kept!
3 O

1 OTs kept!
Filtered 5 OTs
Filtered 1 OTs
Filtered 5 OTs
Filtered 7 OTs
Filtered 9 OTs
Filtered 4 OTs
Filtered 1 OTs
Filtered 1 OTs
Filtered 1 OTs
Filtered 2 OTs
Filtered 6 OTs
Filtered 2 OTs
Filtered 1 OTs
Filtered 2 OTs
Filtered 1 OTs
Filtered 3 OTs
Filtered 2 OTs
Filtered 5 OTs
Filtered 1 OTs
Filtered 1 OTs
Filtered 4 OTs
Filtered 9 OTs
Filtered 5 OTs
Filtered 1 OTs
Filtered 4 OTs
Filtered 5 OTs
Filtered 1 OTs
Filtered 5 OTs
Filtered 7 OTs
Filtered 3 OTs
Filtered 1 OTs
2 OTs kept!
Filtered 1 OTs
2 OTs kept!
5 OTs kept!
1 OTs kept!
7 OTs kept!
2 OTs kept!
3 OTs kept!
4 OTs kept!
3 OTs kept!
3 OTs kept!
2 OTs kept!
8 OTs kept!
3 OTs kept!
1 OTs kept!
1 OTs kept!
2 OTs kept!
4 OTs kept!
6 OTs kept!
5 OTs kept!
5 OTs kept!
3 OTs kept!
5 OTs kept!
5 OTs kept!
7 OTs kept!
3 OTs kept!
4 OTs kept!
2 OTs kept!
6 OTs kept!
1 OTs kept!
7 OTs kept!
6 OTs kept!
1 OTs kept!
3 OTs kept!
1 OTs kept!
3 OTs kept!
3 OTs kept!
2 OTs kept!
5 OTs kept!
9 OTs kept!
2 OTs kept!
Filtered 2 OTs
2 OTs kept!
F

Filtered 1 OTs
Filtered 1 OTs
Filtered 3 OTs
Filtered 3 OTs
Filtered 4 OTs
Filtered 3 OTs
Filtered 1 OTs
Filtered 1 OTs
Filtered 1 OTs
Filtered 1 OTs
Filtered 2 OTs
Filtered 4 OTs
Filtered 8 OTs
Filtered 5 OTs
6 OTs kept!
2 OTs kept!
Filtered 2 OTs
Filtered 1 OTs
2 OTs kept!
Filtered 1 OTs
Filtered 2 OTs
Filtered 1 OTs
Filtered 4 OTs
Filtered 5 OTs
Filtered 1 OTs
Filtered 3 OTs
Filtered 1 OTs
Filtered 1 OTs
Filtered 3 OTs
Filtered 2 OTs
Filtered 2 OTs
Filtered 6 OTs
Filtered 2 OTs
Filtered 3 OTs
Filtered 2 OTs
Filtered 8 OTs
Filtered 2 OTs
Filtered 4 OTs
Filtered 1 OTs
Filtered 4 OTs
Filtered 7 OTs
Filtered 6 OTs
Filtered 1 OTs
Filtered 4 OTs
Filtered 2 OTs
Filtered 4 OTs
Filtered 2 OTs
Filtered 2 OTs
Filtered 2 OTs
Filtered 2 OTs
hour = 23
step1 - read OTs
step2 - convert to gdf
step3 - temporal aggregation of SPHERA data
step4 - spatial neighbourhood of SPHERA data
Filtered 1 OTs
Filtered 3 OTs
Filtered 2 OTs
Filtered 1 OTs
Filtered 8 OTs
Filtered 1 OTs
Filtered 4 OTs
Filtered 3 OTs


In [89]:
NorigOTs = []
NfiltOTs = []
for hour in dFILT_gdf_OT_daily.keys():
    print(f'{hour}')
    print(f'Numb. of OTs after filtering: {len(dFILT_gdf_OT_daily[hour])}')
    print(f'Numb. of Original OTs: {len(dORIG_gdf_OT_daily[hour])}')
    NorigOTs.append(len(dORIG_gdf_OT_daily[hour]))
    NfiltOTs.append(len(dFILT_gdf_OT_daily[hour]))
print(f'--------------------------------------------------------------------')

print(f'Tot numb of original OTs: {sum(NorigOTs)}')
print(f'Numb of OTs after filtering: {sum(NfiltOTs)} ({sum(NfiltOTs)/sum(NorigOTs)*100:.3} %)')
print(f'Numb of filtered OTs: {sum(NorigOTs) - sum(NfiltOTs)} ({(sum(NorigOTs) - sum(NfiltOTs))/sum(NorigOTs)*100:.3} %)')

2018-10-29T00:00:00Z
Numb. of OTs after filtering: 166
Numb. of Original OTs: 178
2018-10-29T01:00:00Z
Numb. of OTs after filtering: 184
Numb. of Original OTs: 192
2018-10-29T02:00:00Z
Numb. of OTs after filtering: 177
Numb. of Original OTs: 234
2018-10-29T03:00:00Z
Numb. of OTs after filtering: 228
Numb. of Original OTs: 318
2018-10-29T04:00:00Z
Numb. of OTs after filtering: 231
Numb. of Original OTs: 359
2018-10-29T05:00:00Z
Numb. of OTs after filtering: 363
Numb. of Original OTs: 421
2018-10-29T06:00:00Z
Numb. of OTs after filtering: 577
Numb. of Original OTs: 689
2018-10-29T07:00:00Z
Numb. of OTs after filtering: 303
Numb. of Original OTs: 425
2018-10-29T08:00:00Z
Numb. of OTs after filtering: 412
Numb. of Original OTs: 526
2018-10-29T09:00:00Z
Numb. of OTs after filtering: 512
Numb. of Original OTs: 529
2018-10-29T10:00:00Z
Numb. of OTs after filtering: 509
Numb. of Original OTs: 546
2018-10-29T11:00:00Z
Numb. of OTs after filtering: 472
Numb. of Original OTs: 539
2018-10-29T12:00

In [90]:
#write dictionaries of geodataframes containing for each hour of the day the original and filtered OTs datasets,
#plus a dataframe of metadata containing the info for each our of filtered OTs and daily amount of:
filt_nonfilt_OTs = dict()

filt_nonfilt_OTs['ORIGINAL_OTs'] = dORIG_gdf_OT_daily
filt_nonfilt_OTs['FILTERED_OTs'] = dFILT_gdf_OT_daily
filt_nonfilt_OTs['metadata'] = pd.DataFrame(index=np.arange(0,24,1),columns=['hour','Filt_OTs','Orig_OTs'])

for ind,hour in zip(np.arange(0,24,1),dFILT_gdf_OT_daily.keys()):
    
    filt_nonfilt_OTs['metadata'].loc[ind]['hour'] = hour
    filt_nonfilt_OTs['metadata'].loc[ind]['Filt_OTs'] = len(dFILT_gdf_OT_daily[hour])
    filt_nonfilt_OTs['metadata'].loc[ind]['Orig_OTs'] = len(dORIG_gdf_OT_daily[hour])

In [91]:
#write dictionary to external file
#with open(fold + f'data/OT_filtered_data/tempAgg_3h_5-95pThresh/OTfilt_{dtime.year}{(dtime.month):02d}{(dtime.day):02d}.txt', 'wb') as file:
#     pickle.dump(filt_nonfilt_OTs,file)

In [92]:
#read dictionary from external file
with open(fold + f'data/OT_filtered_data/tempAgg_3h_5-95pThresh/OTfilt_{dtime.year}{(dtime.month):02d}{(dtime.day):02d}.txt', "rb") as file:  
     OTFILTERED_TURIN_GRAZ = pickle.load(file)

In [93]:
OTFILTERED_TURIN_GRAZ['metadata']

Unnamed: 0,hour,Filt_OTs,Orig_OTs
0,2018-10-29T00:00:00Z,166,178
1,2018-10-29T01:00:00Z,184,192
2,2018-10-29T02:00:00Z,177,234
3,2018-10-29T03:00:00Z,228,318
4,2018-10-29T04:00:00Z,231,359
5,2018-10-29T05:00:00Z,363,421
6,2018-10-29T06:00:00Z,577,689
7,2018-10-29T07:00:00Z,303,425
8,2018-10-29T08:00:00Z,412,526
9,2018-10-29T09:00:00Z,512,529
