In [1]:
import os
import sys
sys.path.append(os.chdir('../../'))

In [2]:
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
import datetime

import matplotlib.pyplot as plt
import plotly.plotly as ply
import plotly.graph_objs as go
import src.utils.graph as graph
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

tqdm.pandas()
pd.options.display.max_columns = None
pd.options.display.max_colwidth = -1

In [3]:
import src.data as data
import src.utility as utility

In [4]:
mode = 'full'
t = 'test'
steps_behind_event=4
steps_after_event=3

In [5]:
def add_possible_sensors(events_df):
    sensors = data.sensors()
    res_df = sensors[['KEY', 'KM']].drop_duplicates().sort_values(['KEY','KM']).groupby('KEY').agg(list)
    res_df = res_df.rename(columns={'KM':'ROAD_SENSORS'})
    return events_df.merge(res_df, on='KEY', how='left')

def merge_speed_events(speed_df, events_df):
    events_with_sensor_df = add_possible_sensors(events_df)
    #def in_range()
    events_with_sensor_df['sensors'] = events_with_sensor_df.progress_apply( \
        lambda row: [x for x in row.ROAD_SENSORS if row.KM_START <= x <= row.KM_END], axis=1)
    events_with_sensor_df = events_with_sensor_df[events_with_sensor_df['sensors'].str.len() > 0]
    return events_with_sensor_df.drop('ROAD_SENSORS', axis=1)

## Build a time-window for each event

In [6]:
e = data.events(mode, t)
speeds = data.speeds_original(t)
e.shape

caching resources/dataset/preprocessed/full/test/events.csv.gz
caching resources/dataset/originals/speeds_test.csv.gz


(443895, 12)

In [7]:
# find the starting time of each event
ev_agg = e.astype({'KEY':'int'}).groupby('index').agg({
    'step_duration':'first',
    'EVENT_DETAIL':'first',
    'EVENT_TYPE':'first',
    'KM_END':'first',
    'KM_START':'first',
    'KEY':'first',
    'KEY_2':'first',
    'KM_EVENT':'first',
    'START_DATETIME_UTC':'min',
}).rename(columns={'step_duration':'event_duration'})

In [8]:
ev_agg.head()

Unnamed: 0_level_0,event_duration,EVENT_DETAIL,EVENT_TYPE,KM_END,KM_START,KEY,KEY_2,KM_EVENT,START_DATETIME_UTC
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,3,15.0,Veicolo_in_avaria,1001,997,146,146_999,999.0,2019-01-01 07:45:00
1,4,2.0,Ostacolo_in_carreggiata,2145,2141,526,526_2143,2143.0,2019-01-01 15:30:00
2,21,24.0,Pavimentazione,2541,2537,518,518_2539,2539.0,2019-01-02 15:00:00
3,2,11.0,extended_accident,2368,2364,147,147_2366,2366.0,2019-01-03 06:00:00
4,21,23.0,Barriere,2441,2437,147,147_2439,2439.0,2019-01-04 09:15:00


In [9]:
ev_agg.loc[[12251]]

Unnamed: 0_level_0,EVENT_DETAIL,EVENT_TYPE,KM_END,KM_START,KEY,KEY_2,KM_EVENT,START_DATETIME_UTC
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
12251,40.0,Segnaletica_verticale,241,150,533,533_239,,2019-01-05 09:30:00


In [9]:
# create time-windows start and end
ev_agg['timewind_start'] = ev_agg.START_DATETIME_UTC - pd.to_timedelta(15*steps_behind_event, unit='m')
ev_agg['timewind_end'] = ev_agg.START_DATETIME_UTC + pd.to_timedelta(15*steps_after_event, unit='m')
ev_agg.head()

Unnamed: 0_level_0,event_duration,EVENT_DETAIL,EVENT_TYPE,KM_END,KM_START,KEY,KEY_2,KM_EVENT,START_DATETIME_UTC,timewind_start,timewind_end
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,3,15.0,Veicolo_in_avaria,1001,997,146,146_999,999.0,2019-01-01 07:45:00,2019-01-01 06:45:00,2019-01-01 08:30:00
1,4,2.0,Ostacolo_in_carreggiata,2145,2141,526,526_2143,2143.0,2019-01-01 15:30:00,2019-01-01 14:30:00,2019-01-01 16:15:00
2,21,24.0,Pavimentazione,2541,2537,518,518_2539,2539.0,2019-01-02 15:00:00,2019-01-02 14:00:00,2019-01-02 15:45:00
3,2,11.0,extended_accident,2368,2364,147,147_2366,2366.0,2019-01-03 06:00:00,2019-01-03 05:00:00,2019-01-03 06:45:00
4,21,23.0,Barriere,2441,2437,147,147_2439,2439.0,2019-01-04 09:15:00,2019-01-04 08:15:00,2019-01-04 10:00:00


In [11]:
ev_agg = merge_speed_events(speeds, ev_agg)
ev_agg.head(10)

caching resources/dataset/preprocessed/sensors.csv.gz


HBox(children=(IntProgress(value=0, max=13592), HTML(value='')))




Unnamed: 0,event_duration,EVENT_DETAIL,EVENT_TYPE,KM_END,KM_START,KEY,KEY_2,KM_EVENT,START_DATETIME_UTC,timewind_start,timewind_end,sensors
0,3,15.0,Veicolo_in_avaria,1001,997,146,146_999,999.0,2019-01-01 07:45:00,2019-01-01 06:45:00,2019-01-01 08:30:00,[997]
7,3,2.0,Ostacolo_in_carreggiata,967,963,523,523_965,965.0,2019-01-04 12:15:00,2019-01-04 11:15:00,2019-01-04 13:00:00,[965]
8,5,22.0,Veicolo_in_avaria,997,992,146,146_995,,2019-01-04 12:15:00,2019-01-04 11:15:00,2019-01-04 13:00:00,[997]
11,5,2.0,Ostacolo_in_carreggiata,279,270,525,525_277,,2019-01-05 07:45:00,2019-01-05 06:45:00,2019-01-05 08:30:00,[278]
25,33,8.0,Segnaletica_orizzontale,320,316,525,525_318,318.0,2019-01-07 08:30:00,2019-01-07 07:30:00,2019-01-07 09:15:00,[319]
30,55,32.0,Gestione_viabilita,741,696,517,517_739,,2019-01-07 21:45:00,2019-01-07 20:45:00,2019-01-07 22:30:00,[728]
58,4,31.0,Pavimentazione,1089,917,146,146_1087,,2019-01-13 11:45:00,2019-01-13 10:45:00,2019-01-13 12:30:00,"[919, 925, 939, 990, 997, 1050, 1074]"
64,2,15.0,Veicolo_in_avaria,929,925,523,523_927,927.0,2019-01-14 15:00:00,2019-01-14 14:00:00,2019-01-14 15:45:00,[927]
68,8,21.0,Ostacolo_in_carreggiata,456,452,35,35_454,454.0,2019-01-15 14:00:00,2019-01-15 13:00:00,2019-01-15 14:45:00,[453]
75,5,9.0,Pavimentazione,307,303,525,525_305,305.0,2019-01-19 07:30:00,2019-01-19 06:30:00,2019-01-19 08:15:00,[305]


In [14]:
ev_agg.loc[[12251]]

Unnamed: 0,step_duration,EVENT_DETAIL,EVENT_TYPE,KM_END,KM_START,KEY,KEY_2,KM_EVENT,START_DATETIME_UTC,timewind_start,timewind_end,sensors
12251,14,0.0,Meteo,604,587,421,421_602,,2019-01-03 20:45:00,2019-01-03 19:45:00,2019-01-03 21:30:00,[604]


In [12]:
base_df = pd.DataFrame({col:np.repeat(ev_agg[col], ev_agg['sensors'].str.len()) \
                           for col in ev_agg.columns.drop('sensors')} \
            ).assign(**{'KM': np.concatenate(ev_agg['sensors'].values)})

In [13]:
base_df.head()

Unnamed: 0,event_duration,EVENT_DETAIL,EVENT_TYPE,KM_END,KM_START,KEY,KEY_2,KM_EVENT,START_DATETIME_UTC,timewind_start,timewind_end,KM
0,3,15.0,Veicolo_in_avaria,1001,997,146,146_999,999.0,2019-01-01 07:45:00,2019-01-01 06:45:00,2019-01-01 08:30:00,997
7,3,2.0,Ostacolo_in_carreggiata,967,963,523,523_965,965.0,2019-01-04 12:15:00,2019-01-04 11:15:00,2019-01-04 13:00:00,965
8,5,22.0,Veicolo_in_avaria,997,992,146,146_995,,2019-01-04 12:15:00,2019-01-04 11:15:00,2019-01-04 13:00:00,997
11,5,2.0,Ostacolo_in_carreggiata,279,270,525,525_277,,2019-01-05 07:45:00,2019-01-05 06:45:00,2019-01-05 08:30:00,278
25,33,8.0,Segnaletica_orizzontale,320,316,525,525_318,318.0,2019-01-07 08:30:00,2019-01-07 07:30:00,2019-01-07 09:15:00,319


In [15]:
base_df.loc[[12251]]

Unnamed: 0,EVENT_DETAIL,EVENT_TYPE,KM_END,KM_START,KEY,KEY_2,KM_EVENT,START_DATETIME_UTC,timewind_start,timewind_end,KM
12251,40.0,Segnaletica_verticale,241,150,533,533_239,,2019-01-05 09:30:00,2019-01-05 08:30:00,2019-01-05 10:15:00,174
12251,40.0,Segnaletica_verticale,241,150,533,533_239,,2019-01-05 09:30:00,2019-01-05 08:30:00,2019-01-05 10:15:00,190
12251,40.0,Segnaletica_verticale,241,150,533,533_239,,2019-01-05 09:30:00,2019-01-05 08:30:00,2019-01-05 10:15:00,194
12251,40.0,Segnaletica_verticale,241,150,533,533_239,,2019-01-05 09:30:00,2019-01-05 08:30:00,2019-01-05 10:15:00,197
12251,40.0,Segnaletica_verticale,241,150,533,533_239,,2019-01-05 09:30:00,2019-01-05 08:30:00,2019-01-05 10:15:00,205
12251,40.0,Segnaletica_verticale,241,150,533,533_239,,2019-01-05 09:30:00,2019-01-05 08:30:00,2019-01-05 10:15:00,217
12251,40.0,Segnaletica_verticale,241,150,533,533_239,,2019-01-05 09:30:00,2019-01-05 08:30:00,2019-01-05 10:15:00,231


In [14]:
base_df = utility.expand_timestamps(base_df, col_ts_start='timewind_start', col_ts_end='timewind_end')\
                    .drop(['timewind_start','timewind_end','step_duration'], axis=1) \
                    .rename(columns={'index':'event_index'}) \
                    .sort_values('event_index')
base_df['DATETIME_UTC'] = pd.to_datetime(base_df['DATETIME_UTC'], unit='s')

HBox(children=(IntProgress(value=0, max=5798), HTML(value='')))




In [15]:
base_df.head()

Unnamed: 0,event_index,event_duration,EVENT_DETAIL,EVENT_TYPE,KM_END,KM_START,KEY,KEY_2,KM_EVENT,START_DATETIME_UTC,KM,DATETIME_UTC
348,0,3,15.0,Veicolo_in_avaria,1001,997,146,146_999,999.0,2019-01-01 07:45:00,997,2019-01-01 07:45:00
351,0,3,15.0,Veicolo_in_avaria,1001,997,146,146_999,999.0,2019-01-01 07:45:00,997,2019-01-01 08:30:00
350,0,3,15.0,Veicolo_in_avaria,1001,997,146,146_999,999.0,2019-01-01 07:45:00,997,2019-01-01 08:15:00
349,0,3,15.0,Veicolo_in_avaria,1001,997,146,146_999,999.0,2019-01-01 07:45:00,997,2019-01-01 08:00:00
347,0,3,15.0,Veicolo_in_avaria,1001,997,146,146_999,999.0,2019-01-01 07:45:00,997,2019-01-01 07:30:00


In [18]:
base_df[(base_df.event_index == 12251) & (base_df.KM == 231)]

Unnamed: 0,event_index,EVENT_DETAIL,EVENT_TYPE,KM_END,KM_START,KEY,KEY_2,KM_EVENT,START_DATETIME_UTC,KM,DATETIME_UTC


In [20]:
joined_df = base_df.drop('KEY_2',axis=1).merge(speeds.astype({'KEY':'int'}), how='left', on=['KEY','KM','DATETIME_UTC'])
joined_df.shape

(46384, 17)

In [21]:
joined_df.head(3)

Unnamed: 0,event_index,event_duration,EVENT_DETAIL,EVENT_TYPE,KM_END,KM_START,KEY,KM_EVENT,START_DATETIME_UTC,KM,DATETIME_UTC,SPEED_AVG,SPEED_SD,SPEED_MIN,SPEED_MAX,N_VEHICLES,KEY_2
0,0,3,15.0,Veicolo_in_avaria,1001,997,146,999.0,2019-01-01 07:45:00,997,2019-01-01 07:45:00,182.63175,23.690489,134.294,239.464,16.0,146_997
1,0,3,15.0,Veicolo_in_avaria,1001,997,146,999.0,2019-01-01 07:45:00,997,2019-01-01 08:30:00,185.821077,24.696339,134.294,207.104,13.0,146_997
2,0,3,15.0,Veicolo_in_avaria,1001,997,146,999.0,2019-01-01 07:45:00,997,2019-01-01 08:15:00,183.5621,26.831366,142.384,231.374,20.0,146_997


In [41]:
joined_df[(joined_df.event_index == 12251) & (joined_df.KM == 231)]

Unnamed: 0,event_index,event_duration,EVENT_DETAIL,EVENT_TYPE,KM_END,KM_START,KEY,KM_EVENT,START_DATETIME_UTC,KM,DATETIME_UTC,SPEED_AVG,SPEED_SD,SPEED_MIN,SPEED_MAX,N_VEHICLES,KEY_2


In [22]:
joined_df = joined_df.merge(data.weather(), how='left')
joined_df = joined_df.merge(data.sensors(), how='left')

joined_df = joined_df.sort_values(['KEY','KM','DATETIME_UTC']).groupby(['event_index','KEY','KM'], as_index=False).agg({
    'KM_START':'first',
    'KM_END':'first',
    'DATETIME_UTC':list,
    'event_duration':'first',
    'SPEED_AVG':list,
    'SPEED_SD':list,
    'SPEED_MAX':list,
    'SPEED_MIN':list,
    'N_VEHICLES':list,
    'EMERGENCY_LANE':'first',
    'LANES':'first',
    'ROAD_TYPE':'first',
    'EVENT_DETAIL':lambda x: x.values[steps_behind_event],
    'EVENT_TYPE':lambda x: x.values[steps_behind_event],
    'WEATHER': list,
    'DISTANCE': list,
    'TEMPERATURE': list,
    'MIN_TEMPERATURE': list,
    'MAX_TEMPERATURE': list,
})

#set km distance
joined_df['distance_start'] = joined_df['KM'] - joined_df['KM_START']
joined_df['distance_end'] = joined_df['KM'] - joined_df['KM_END']

In [25]:
joined_df.head(3)

Unnamed: 0,event_index,KEY,KM,KM_END,KM_START,DATETIME_UTC,event_duration,SPEED_AVG,SPEED_SD,SPEED_MAX,SPEED_MIN,N_VEHICLES,EMERGENCY_LANE,LANES,ROAD_TYPE,EVENT_DETAIL,EVENT_TYPE,WEATHER,DISTANCE,TEMPERATURE,MIN_TEMPERATURE,MAX_TEMPERATURE,distance_start,distance_end
0,0,146,997,1001,997,"[2019-01-01 06:45:00, 2019-01-01 07:00:00, 2019-01-01 07:15:00, 2019-01-01 07:30:00, 2019-01-01 07:45:00, 2019-01-01 08:00:00, 2019-01-01 08:15:00, 2019-01-01 08:30:00]",3,"[176.80327272727277, 188.395875, 196.9915, 197.6117333333333, 182.63175, 189.4304615384616, 183.5621, 185.8210769230769]","[22.479033133526503, 25.934938436724053, 32.09191956863908, 24.74963334639429, 23.690489094430557, 25.58676178747968, 26.831366173311327, 24.69633911622509]","[210.34, 252.408, 276.678, 244.318, 239.46400000000003, 242.7, 231.37400000000002, 207.104]","[144.002, 144.002, 156.946, 160.18200000000004, 134.29399999999998, 158.56400000000005, 142.384, 134.29399999999998]","[11.0, 16.0, 16.0, 15.0, 16.0, 13.0, 20.0, 13.0]",0,1,0,15.0,Veicolo_in_avaria,"[Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno]","[10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]","[9.2, 9.2, 9.2, 9.2, 9.2, 9.2, 9.9, 9.9]","[9.2, 9.2, 9.2, 9.2, 9.2, 9.2, 9.2, 9.2]","[11.4, 11.4, 11.4, 11.4, 11.4, 11.4, 11.4, 11.4]",0,-4
1,7,523,965,967,963,"[2019-01-04 11:15:00, 2019-01-04 11:30:00, 2019-01-04 11:45:00, 2019-01-04 12:00:00, 2019-01-04 12:15:00, 2019-01-04 12:30:00, 2019-01-04 12:45:00, 2019-01-04 13:00:00]",3,"[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]",0,2,0,2.0,Ostacolo_in_carreggiata,"[Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno]","[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]","[17.4, 17.4, 17.4, 17.4, 17.9, 17.9, 17.9, 17.9]","[9.1, 9.1, 9.1, 9.1, 9.1, 9.1, 9.1, 9.1]","[17.4, 17.4, 17.4, 17.4, 17.9, 17.9, 17.9, 17.9]",2,-2
2,8,146,997,997,992,"[2019-01-04 11:15:00, 2019-01-04 11:30:00, 2019-01-04 11:45:00, 2019-01-04 12:00:00, 2019-01-04 12:15:00, 2019-01-04 12:30:00, 2019-01-04 12:45:00, 2019-01-04 13:00:00]",5,"[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]",0,1,0,22.0,Veicolo_in_avaria,"[Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno]","[10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]","[17.4, 17.4, 17.4, 17.4, 17.9, 17.9, 17.9, 17.9]","[9.1, 9.1, 9.1, 9.1, 9.1, 9.1, 9.1, 9.1]","[17.4, 17.4, 17.4, 17.4, 17.9, 17.9, 17.9, 17.9]",5,0


In [24]:
joined_df[joined_df.event_index == 5265]

Unnamed: 0,event_index,KEY,KM,DATETIME_UTC,SPEED_AVG,SPEED_SD,SPEED_MAX,SPEED_MIN,N_VEHICLES,EMERGENCY_LANE,LANES,ROAD_TYPE,EVENT_DETAIL,EVENT_TYPE,WEATHER,DISTANCE,TEMPERATURE,MIN_TEMPERATURE,MAX_TEMPERATURE,KM_START
986,5265,537,305,"[2019-01-12 15:00:00, 2019-01-12 15:15:00, 2019-01-12 15:30:00, 2019-01-12 15:45:00, 2019-01-12 16:00:00, 2019-01-12 16:15:00, 2019-01-12 16:30:00, 2019-01-12 16:45:00]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]",0,2,0,6.0,extended_accident,"[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan]",8


In [26]:
# split the last m measures in different columns
def split_prediction_fields(row, event_beginning_step):
    return pd.Series((
            row.DATETIME_UTC[:event_beginning_step], row.DATETIME_UTC[event_beginning_step:], 
            row.SPEED_AVG[:event_beginning_step],    row.SPEED_AVG[event_beginning_step:],
            row.SPEED_SD[:event_beginning_step],
            row.SPEED_MAX[:event_beginning_step],
            row.SPEED_MIN[:event_beginning_step],
            row.N_VEHICLES[:event_beginning_step],
            row.WEATHER[:event_beginning_step],
            row.DISTANCE[:event_beginning_step],
            row.TEMPERATURE[:event_beginning_step],
            row.MIN_TEMPERATURE[:event_beginning_step],
            row.MAX_TEMPERATURE[:event_beginning_step],
    ))

In [27]:
columns_to_split = ['DATETIME_UTC','DATETIME_UTC_y',
                            'SPEED_AVG','SPEED_AVG_Y',
                            'SPEED_SD', 'SPEED_MAX', 'SPEED_MIN', 'N_VEHICLES', 'WEATHER', 'DISTANCE',
                            'TEMPERATURE', 'MIN_TEMPERATURE', 'MAX_TEMPERATURE']
joined_df[columns_to_split] = joined_df.apply(split_prediction_fields, axis=1, event_beginning_step=steps_behind_event)

In [46]:
joined_df[joined_df.event_index == 5265]

Unnamed: 0,event_index,KEY,KM,DATETIME_UTC,event_duration,SPEED_AVG,SPEED_SD,SPEED_MAX,SPEED_MIN,N_VEHICLES,EMERGENCY_LANE,LANES,ROAD_TYPE,EVENT_DETAIL,EVENT_TYPE,WEATHER,DISTANCE,TEMPERATURE,MIN_TEMPERATURE,MAX_TEMPERATURE,KM_START,DATETIME_UTC_y,SPEED_AVG_Y


In [28]:
for col_name in columns_to_split:
    if col_name.upper().endswith('_Y'):
        new_cols = ['{}_{}'.format(col_name, i) for i in range(0, steps_after_event+1)]
    else:
        new_cols = ['{}_{}'.format(col_name, i) for i in range(-steps_behind_event, 0)]

    joined_df[new_cols] = pd.DataFrame(joined_df[col_name].values.tolist(), index=joined_df.index)

In [29]:
joined_df.head(2)

Unnamed: 0,event_index,KEY,KM,KM_END,KM_START,DATETIME_UTC,event_duration,SPEED_AVG,SPEED_SD,SPEED_MAX,SPEED_MIN,N_VEHICLES,EMERGENCY_LANE,LANES,ROAD_TYPE,EVENT_DETAIL,EVENT_TYPE,WEATHER,DISTANCE,TEMPERATURE,MIN_TEMPERATURE,MAX_TEMPERATURE,distance_start,distance_end,DATETIME_UTC_y,SPEED_AVG_Y,DATETIME_UTC_-4,DATETIME_UTC_-3,DATETIME_UTC_-2,DATETIME_UTC_-1,DATETIME_UTC_y_0,DATETIME_UTC_y_1,DATETIME_UTC_y_2,DATETIME_UTC_y_3,SPEED_AVG_-4,SPEED_AVG_-3,SPEED_AVG_-2,SPEED_AVG_-1,SPEED_AVG_Y_0,SPEED_AVG_Y_1,SPEED_AVG_Y_2,SPEED_AVG_Y_3,SPEED_SD_-4,SPEED_SD_-3,SPEED_SD_-2,SPEED_SD_-1,SPEED_MAX_-4,SPEED_MAX_-3,SPEED_MAX_-2,SPEED_MAX_-1,SPEED_MIN_-4,SPEED_MIN_-3,SPEED_MIN_-2,SPEED_MIN_-1,N_VEHICLES_-4,N_VEHICLES_-3,N_VEHICLES_-2,N_VEHICLES_-1,WEATHER_-4,WEATHER_-3,WEATHER_-2,WEATHER_-1,DISTANCE_-4,DISTANCE_-3,DISTANCE_-2,DISTANCE_-1,TEMPERATURE_-4,TEMPERATURE_-3,TEMPERATURE_-2,TEMPERATURE_-1,MIN_TEMPERATURE_-4,MIN_TEMPERATURE_-3,MIN_TEMPERATURE_-2,MIN_TEMPERATURE_-1,MAX_TEMPERATURE_-4,MAX_TEMPERATURE_-3,MAX_TEMPERATURE_-2,MAX_TEMPERATURE_-1
0,0,146,997,1001,997,"[2019-01-01 06:45:00, 2019-01-01 07:00:00, 2019-01-01 07:15:00, 2019-01-01 07:30:00]",3,"[176.80327272727277, 188.395875, 196.9915, 197.6117333333333]","[22.479033133526503, 25.934938436724053, 32.09191956863908, 24.74963334639429]","[210.34, 252.408, 276.678, 244.318]","[144.002, 144.002, 156.946, 160.18200000000004]","[11.0, 16.0, 16.0, 15.0]",0,1,0,15.0,Veicolo_in_avaria,"[Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno]","[10.0, 10.0, 10.0, 10.0]","[9.2, 9.2, 9.2, 9.2]","[9.2, 9.2, 9.2, 9.2]","[11.4, 11.4, 11.4, 11.4]",0,-4,"[2019-01-01 07:45:00, 2019-01-01 08:00:00, 2019-01-01 08:15:00, 2019-01-01 08:30:00]","[182.63175, 189.4304615384616, 183.5621, 185.8210769230769]",2019-01-01 06:45:00,2019-01-01 07:00:00,2019-01-01 07:15:00,2019-01-01 07:30:00,2019-01-01 07:45:00,2019-01-01 08:00:00,2019-01-01 08:15:00,2019-01-01 08:30:00,176.803273,188.395875,196.9915,197.611733,182.63175,189.430462,183.5621,185.821077,22.479033,25.934938,32.09192,24.749633,210.34,252.408,276.678,244.318,144.002,144.002,156.946,160.182,11.0,16.0,16.0,15.0,Quasi Sereno,Quasi Sereno,Quasi Sereno,Quasi Sereno,10.0,10.0,10.0,10.0,9.2,9.2,9.2,9.2,9.2,9.2,9.2,9.2,11.4,11.4,11.4,11.4
1,7,523,965,967,963,"[2019-01-04 11:15:00, 2019-01-04 11:30:00, 2019-01-04 11:45:00, 2019-01-04 12:00:00]",3,"[nan, nan, nan, nan]","[nan, nan, nan, nan]","[nan, nan, nan, nan]","[nan, nan, nan, nan]","[nan, nan, nan, nan]",0,2,0,2.0,Ostacolo_in_carreggiata,"[Quasi Sereno, Quasi Sereno, Quasi Sereno, Quasi Sereno]","[2.0, 2.0, 2.0, 2.0]","[17.4, 17.4, 17.4, 17.4]","[9.1, 9.1, 9.1, 9.1]","[17.4, 17.4, 17.4, 17.4]",2,-2,"[2019-01-04 12:15:00, 2019-01-04 12:30:00, 2019-01-04 12:45:00, 2019-01-04 13:00:00]","[nan, nan, nan, nan]",2019-01-04 11:15:00,2019-01-04 11:30:00,2019-01-04 11:45:00,2019-01-04 12:00:00,2019-01-04 12:15:00,2019-01-04 12:30:00,2019-01-04 12:45:00,2019-01-04 13:00:00,,,,,,,,,,,,,,,,,,,,,,,,,Quasi Sereno,Quasi Sereno,Quasi Sereno,Quasi Sereno,2.0,2.0,2.0,2.0,17.4,17.4,17.4,17.4,9.1,9.1,9.1,9.1,17.4,17.4,17.4,17.4


In [48]:
joined_df[joined_df.event_index == 5265]

Unnamed: 0,event_index,KEY,KM,DATETIME_UTC,event_duration,SPEED_AVG,SPEED_SD,SPEED_MAX,SPEED_MIN,N_VEHICLES,EMERGENCY_LANE,LANES,ROAD_TYPE,EVENT_DETAIL,EVENT_TYPE,WEATHER,DISTANCE,TEMPERATURE,MIN_TEMPERATURE,MAX_TEMPERATURE,KM_START,DATETIME_UTC_y,SPEED_AVG_Y,DATETIME_UTC_-4,DATETIME_UTC_-3,DATETIME_UTC_-2,DATETIME_UTC_-1,DATETIME_UTC_y_0,DATETIME_UTC_y_1,DATETIME_UTC_y_2,DATETIME_UTC_y_3,SPEED_AVG_-4,SPEED_AVG_-3,SPEED_AVG_-2,SPEED_AVG_-1,SPEED_AVG_Y_0,SPEED_AVG_Y_1,SPEED_AVG_Y_2,SPEED_AVG_Y_3,SPEED_SD_-4,SPEED_SD_-3,SPEED_SD_-2,SPEED_SD_-1,SPEED_MAX_-4,SPEED_MAX_-3,SPEED_MAX_-2,SPEED_MAX_-1,SPEED_MIN_-4,SPEED_MIN_-3,SPEED_MIN_-2,SPEED_MIN_-1,N_VEHICLES_-4,N_VEHICLES_-3,N_VEHICLES_-2,N_VEHICLES_-1,WEATHER_-4,WEATHER_-3,WEATHER_-2,WEATHER_-1,DISTANCE_-4,DISTANCE_-3,DISTANCE_-2,DISTANCE_-1,TEMPERATURE_-4,TEMPERATURE_-3,TEMPERATURE_-2,TEMPERATURE_-1,MIN_TEMPERATURE_-4,MIN_TEMPERATURE_-3,MIN_TEMPERATURE_-2,MIN_TEMPERATURE_-1,MAX_TEMPERATURE_-4,MAX_TEMPERATURE_-3,MAX_TEMPERATURE_-2,MAX_TEMPERATURE_-1


In [51]:
joined_df = joined_df.drop(columns_to_split, axis=1)

In [31]:
joined_df[joined_df.event_index == 5265]

Unnamed: 0,event_index,KEY,KM,EMERGENCY_LANE,LANES,ROAD_TYPE,EVENT_DETAIL,EVENT_TYPE,KM_START,DATETIME_UTC_-4,DATETIME_UTC_-3,DATETIME_UTC_-2,DATETIME_UTC_-1,DATETIME_UTC_y_0,DATETIME_UTC_y_1,DATETIME_UTC_y_2,DATETIME_UTC_y_3,SPEED_AVG_-4,SPEED_AVG_-3,SPEED_AVG_-2,SPEED_AVG_-1,SPEED_AVG_Y_0,SPEED_AVG_Y_1,SPEED_AVG_Y_2,SPEED_AVG_Y_3,SPEED_SD_-4,SPEED_SD_-3,SPEED_SD_-2,SPEED_SD_-1,SPEED_MAX_-4,SPEED_MAX_-3,SPEED_MAX_-2,SPEED_MAX_-1,SPEED_MIN_-4,SPEED_MIN_-3,SPEED_MIN_-2,SPEED_MIN_-1,N_VEHICLES_-4,N_VEHICLES_-3,N_VEHICLES_-2,N_VEHICLES_-1,WEATHER_-4,WEATHER_-3,WEATHER_-2,WEATHER_-1,DISTANCE_-4,DISTANCE_-3,DISTANCE_-2,DISTANCE_-1,TEMPERATURE_-4,TEMPERATURE_-3,TEMPERATURE_-2,TEMPERATURE_-1,MIN_TEMPERATURE_-4,MIN_TEMPERATURE_-3,MIN_TEMPERATURE_-2,MIN_TEMPERATURE_-1,MAX_TEMPERATURE_-4,MAX_TEMPERATURE_-3,MAX_TEMPERATURE_-2,MAX_TEMPERATURE_-1
986,5265,537,305,0,2,0,6.0,extended_accident,8,2019-01-12 15:00:00,2019-01-12 15:15:00,2019-01-12 15:30:00,2019-01-12 15:45:00,2019-01-12 16:00:00,2019-01-12 16:15:00,2019-01-12 16:30:00,2019-01-12 16:45:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [52]:
joined_df = joined_df.astype({'EMERGENCY_LANE': 'int', 'LANES': 'int',
                                      'ROAD_TYPE': 'int', 'EVENT_DETAIL': 'int',
                                      'KEY': 'int', 'KM': 'int', 'event_index':'int'})

In [53]:
joined_df.head(2)

Unnamed: 0,event_index,KEY,KM,event_duration,EMERGENCY_LANE,LANES,ROAD_TYPE,EVENT_DETAIL,EVENT_TYPE,KM_START,DATETIME_UTC_-4,DATETIME_UTC_-3,DATETIME_UTC_-2,DATETIME_UTC_-1,DATETIME_UTC_y_0,DATETIME_UTC_y_1,DATETIME_UTC_y_2,DATETIME_UTC_y_3,SPEED_AVG_-4,SPEED_AVG_-3,SPEED_AVG_-2,SPEED_AVG_-1,SPEED_AVG_Y_0,SPEED_AVG_Y_1,SPEED_AVG_Y_2,SPEED_AVG_Y_3,SPEED_SD_-4,SPEED_SD_-3,SPEED_SD_-2,SPEED_SD_-1,SPEED_MAX_-4,SPEED_MAX_-3,SPEED_MAX_-2,SPEED_MAX_-1,SPEED_MIN_-4,SPEED_MIN_-3,SPEED_MIN_-2,SPEED_MIN_-1,N_VEHICLES_-4,N_VEHICLES_-3,N_VEHICLES_-2,N_VEHICLES_-1,WEATHER_-4,WEATHER_-3,WEATHER_-2,WEATHER_-1,DISTANCE_-4,DISTANCE_-3,DISTANCE_-2,DISTANCE_-1,TEMPERATURE_-4,TEMPERATURE_-3,TEMPERATURE_-2,TEMPERATURE_-1,MIN_TEMPERATURE_-4,MIN_TEMPERATURE_-3,MIN_TEMPERATURE_-2,MIN_TEMPERATURE_-1,MAX_TEMPERATURE_-4,MAX_TEMPERATURE_-3,MAX_TEMPERATURE_-2,MAX_TEMPERATURE_-1
0,0,146,997,3,0,1,0,15,Veicolo_in_avaria,8,2019-01-01 06:45:00,2019-01-01 07:00:00,2019-01-01 07:15:00,2019-01-01 07:30:00,2019-01-01 07:45:00,2019-01-01 08:00:00,2019-01-01 08:15:00,2019-01-01 08:30:00,176.803273,188.395875,196.9915,197.611733,182.63175,189.430462,183.5621,185.821077,22.479033,25.934938,32.09192,24.749633,210.34,252.408,276.678,244.318,144.002,144.002,156.946,160.182,11.0,16.0,16.0,15.0,,,,,,,,,,,,,,,,,,,,
1,7,523,965,3,0,2,0,2,Ostacolo_in_carreggiata,8,2019-01-04 11:15:00,2019-01-04 11:30:00,2019-01-04 11:45:00,2019-01-04 12:00:00,2019-01-04 12:15:00,2019-01-04 12:30:00,2019-01-04 12:45:00,2019-01-04 13:00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [33]:
joined_df[joined_df.event_index == 5265]

Unnamed: 0,event_index,KEY,KM,EMERGENCY_LANE,LANES,ROAD_TYPE,EVENT_DETAIL,EVENT_TYPE,KM_START,DATETIME_UTC_-4,DATETIME_UTC_-3,DATETIME_UTC_-2,DATETIME_UTC_-1,DATETIME_UTC_y_0,DATETIME_UTC_y_1,DATETIME_UTC_y_2,DATETIME_UTC_y_3,SPEED_AVG_-4,SPEED_AVG_-3,SPEED_AVG_-2,SPEED_AVG_-1,SPEED_AVG_Y_0,SPEED_AVG_Y_1,SPEED_AVG_Y_2,SPEED_AVG_Y_3,SPEED_SD_-4,SPEED_SD_-3,SPEED_SD_-2,SPEED_SD_-1,SPEED_MAX_-4,SPEED_MAX_-3,SPEED_MAX_-2,SPEED_MAX_-1,SPEED_MIN_-4,SPEED_MIN_-3,SPEED_MIN_-2,SPEED_MIN_-1,N_VEHICLES_-4,N_VEHICLES_-3,N_VEHICLES_-2,N_VEHICLES_-1,WEATHER_-4,WEATHER_-3,WEATHER_-2,WEATHER_-1,DISTANCE_-4,DISTANCE_-3,DISTANCE_-2,DISTANCE_-1,TEMPERATURE_-4,TEMPERATURE_-3,TEMPERATURE_-2,TEMPERATURE_-1,MIN_TEMPERATURE_-4,MIN_TEMPERATURE_-3,MIN_TEMPERATURE_-2,MIN_TEMPERATURE_-1,MAX_TEMPERATURE_-4,MAX_TEMPERATURE_-3,MAX_TEMPERATURE_-2,MAX_TEMPERATURE_-1
986,5265,537,305,0,2,0,6,extended_accident,8,2019-01-12 15:00:00,2019-01-12 15:15:00,2019-01-12 15:30:00,2019-01-12 15:45:00,2019-01-12 16:00:00,2019-01-12 16:15:00,2019-01-12 16:30:00,2019-01-12 16:45:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [34]:
# save the base dataset
filepath = data.get_path_preprocessed(mode, t, 'base_dataset.csv.gz')

caching resources/dataset/preprocessed/full/test/base_dataset.csv.gz


In [59]:
joined_df.head()

Unnamed: 0,event_index,KEY,KM,event_duration,EMERGENCY_LANE,LANES,ROAD_TYPE,EVENT_DETAIL,EVENT_TYPE,KM_START,DATETIME_UTC_-4,DATETIME_UTC_-3,DATETIME_UTC_-2,DATETIME_UTC_-1,DATETIME_UTC_y_0,DATETIME_UTC_y_1,DATETIME_UTC_y_2,DATETIME_UTC_y_3,SPEED_AVG_-4,SPEED_AVG_-3,SPEED_AVG_-2,SPEED_AVG_-1,SPEED_AVG_Y_0,SPEED_AVG_Y_1,SPEED_AVG_Y_2,SPEED_AVG_Y_3,SPEED_SD_-4,SPEED_SD_-3,SPEED_SD_-2,SPEED_SD_-1,SPEED_MAX_-4,SPEED_MAX_-3,SPEED_MAX_-2,SPEED_MAX_-1,SPEED_MIN_-4,SPEED_MIN_-3,SPEED_MIN_-2,SPEED_MIN_-1,N_VEHICLES_-4,N_VEHICLES_-3,N_VEHICLES_-2,N_VEHICLES_-1,WEATHER_-4,WEATHER_-3,WEATHER_-2,WEATHER_-1,DISTANCE_-4,DISTANCE_-3,DISTANCE_-2,DISTANCE_-1,TEMPERATURE_-4,TEMPERATURE_-3,TEMPERATURE_-2,TEMPERATURE_-1,MIN_TEMPERATURE_-4,MIN_TEMPERATURE_-3,MIN_TEMPERATURE_-2,MIN_TEMPERATURE_-1,MAX_TEMPERATURE_-4,MAX_TEMPERATURE_-3,MAX_TEMPERATURE_-2,MAX_TEMPERATURE_-1
0,0,146,997,3,0,1,0,15,Veicolo_in_avaria,8,2019-01-01 06:45:00,2019-01-01 07:00:00,2019-01-01 07:15:00,2019-01-01 07:30:00,2019-01-01 07:45:00,2019-01-01 08:00:00,2019-01-01 08:15:00,2019-01-01 08:30:00,176.803273,188.395875,196.9915,197.611733,182.63175,189.430462,183.5621,185.821077,22.479033,25.934938,32.09192,24.749633,210.34,252.408,276.678,244.318,144.002,144.002,156.946,160.182,11.0,16.0,16.0,15.0,,,,,,,,,,,,,,,,,,,,
1,7,523,965,3,0,2,0,2,Ostacolo_in_carreggiata,8,2019-01-04 11:15:00,2019-01-04 11:30:00,2019-01-04 11:45:00,2019-01-04 12:00:00,2019-01-04 12:15:00,2019-01-04 12:30:00,2019-01-04 12:45:00,2019-01-04 13:00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,8,146,997,5,0,1,0,22,Veicolo_in_avaria,8,2019-01-04 11:15:00,2019-01-04 11:30:00,2019-01-04 11:45:00,2019-01-04 12:00:00,2019-01-04 12:15:00,2019-01-04 12:30:00,2019-01-04 12:45:00,2019-01-04 13:00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,11,525,278,5,0,2,0,2,Ostacolo_in_carreggiata,8,2019-01-05 06:45:00,2019-01-05 07:00:00,2019-01-05 07:15:00,2019-01-05 07:30:00,2019-01-05 07:45:00,2019-01-05 08:00:00,2019-01-05 08:15:00,2019-01-05 08:30:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,25,525,319,33,0,2,0,8,Segnaletica_orizzontale,8,2019-01-07 07:30:00,2019-01-07 07:45:00,2019-01-07 08:00:00,2019-01-07 08:15:00,2019-01-07 08:30:00,2019-01-07 08:45:00,2019-01-07 09:00:00,2019-01-07 09:15:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [56]:
aaa = joined_df.copy()

In [63]:
aaa.loc[aaa['event_duration'] == 3, 'SPEED_AVG_Y_3'] = np.nan
aaa.loc[aaa['event_duration'] == 2, ['SPEED_AVG_Y_2','SPEED_AVG_Y_3']] = np.nan
aaa.loc[aaa['event_duration'] == 1, ['SPEED_AVG_Y_1','SPEED_AVG_Y_2','SPEED_AVG_Y_3']] = np.nan

In [64]:
aaa.head()

Unnamed: 0,event_index,KEY,KM,event_duration,EMERGENCY_LANE,LANES,ROAD_TYPE,EVENT_DETAIL,EVENT_TYPE,KM_START,DATETIME_UTC_-4,DATETIME_UTC_-3,DATETIME_UTC_-2,DATETIME_UTC_-1,DATETIME_UTC_y_0,DATETIME_UTC_y_1,DATETIME_UTC_y_2,DATETIME_UTC_y_3,SPEED_AVG_-4,SPEED_AVG_-3,SPEED_AVG_-2,SPEED_AVG_-1,SPEED_AVG_Y_0,SPEED_AVG_Y_1,SPEED_AVG_Y_2,SPEED_AVG_Y_3,SPEED_SD_-4,SPEED_SD_-3,SPEED_SD_-2,SPEED_SD_-1,SPEED_MAX_-4,SPEED_MAX_-3,SPEED_MAX_-2,SPEED_MAX_-1,SPEED_MIN_-4,SPEED_MIN_-3,SPEED_MIN_-2,SPEED_MIN_-1,N_VEHICLES_-4,N_VEHICLES_-3,N_VEHICLES_-2,N_VEHICLES_-1,WEATHER_-4,WEATHER_-3,WEATHER_-2,WEATHER_-1,DISTANCE_-4,DISTANCE_-3,DISTANCE_-2,DISTANCE_-1,TEMPERATURE_-4,TEMPERATURE_-3,TEMPERATURE_-2,TEMPERATURE_-1,MIN_TEMPERATURE_-4,MIN_TEMPERATURE_-3,MIN_TEMPERATURE_-2,MIN_TEMPERATURE_-1,MAX_TEMPERATURE_-4,MAX_TEMPERATURE_-3,MAX_TEMPERATURE_-2,MAX_TEMPERATURE_-1
0,0,146,997,3,0,1,0,15,Veicolo_in_avaria,8,2019-01-01 06:45:00,2019-01-01 07:00:00,2019-01-01 07:15:00,2019-01-01 07:30:00,2019-01-01 07:45:00,2019-01-01 08:00:00,2019-01-01 08:15:00,2019-01-01 08:30:00,176.803273,188.395875,196.9915,197.611733,182.63175,189.430462,183.5621,,22.479033,25.934938,32.09192,24.749633,210.34,252.408,276.678,244.318,144.002,144.002,156.946,160.182,11.0,16.0,16.0,15.0,,,,,,,,,,,,,,,,,,,,
1,7,523,965,3,0,2,0,2,Ostacolo_in_carreggiata,8,2019-01-04 11:15:00,2019-01-04 11:30:00,2019-01-04 11:45:00,2019-01-04 12:00:00,2019-01-04 12:15:00,2019-01-04 12:30:00,2019-01-04 12:45:00,2019-01-04 13:00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,8,146,997,5,0,1,0,22,Veicolo_in_avaria,8,2019-01-04 11:15:00,2019-01-04 11:30:00,2019-01-04 11:45:00,2019-01-04 12:00:00,2019-01-04 12:15:00,2019-01-04 12:30:00,2019-01-04 12:45:00,2019-01-04 13:00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,11,525,278,5,0,2,0,2,Ostacolo_in_carreggiata,8,2019-01-05 06:45:00,2019-01-05 07:00:00,2019-01-05 07:15:00,2019-01-05 07:30:00,2019-01-05 07:45:00,2019-01-05 08:00:00,2019-01-05 08:15:00,2019-01-05 08:30:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,25,525,319,33,0,2,0,8,Segnaletica_orizzontale,8,2019-01-07 07:30:00,2019-01-07 07:45:00,2019-01-07 08:00:00,2019-01-07 08:15:00,2019-01-07 08:30:00,2019-01-07 08:45:00,2019-01-07 09:00:00,2019-01-07 09:15:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [35]:
#print('Saving base dataframe to {}'.format(filepath))
#joined_df.to_csv(filepath, index=False, compression='gzip')
#print('Done')

Saving base dataframe to resources/dataset/preprocessed/full/test/base_dataset.csv.gz
Done


In [52]:
dataset = data.base_dataset('full','test')
dataset.head(3)

caching resources/dataset/preprocessed/full/test/base_dataset.csv.gz


Unnamed: 0,event_index,KEY,KM,EMERGENCY_LANE,LANES,ROAD_TYPE,EVENT_DETAIL,EVENT_TYPE,DATETIME_UTC_-4,DATETIME_UTC_-3,DATETIME_UTC_-2,DATETIME_UTC_-1,DATETIME_UTC_y_0,DATETIME_UTC_y_1,DATETIME_UTC_y_2,DATETIME_UTC_y_3,SPEED_AVG_-4,SPEED_AVG_-3,SPEED_AVG_-2,SPEED_AVG_-1,SPEED_AVG_Y_0,SPEED_AVG_Y_1,SPEED_AVG_Y_2,SPEED_AVG_Y_3,SPEED_SD_-4,SPEED_SD_-3,SPEED_SD_-2,SPEED_SD_-1,SPEED_MAX_-4,SPEED_MAX_-3,SPEED_MAX_-2,SPEED_MAX_-1,SPEED_MIN_-4,SPEED_MIN_-3,SPEED_MIN_-2,SPEED_MIN_-1,N_VEHICLES_-4,N_VEHICLES_-3,N_VEHICLES_-2,N_VEHICLES_-1,WEATHER_-4,WEATHER_-3,WEATHER_-2,WEATHER_-1,DISTANCE_-4,DISTANCE_-3,DISTANCE_-2,DISTANCE_-1,TEMPERATURE_-4,TEMPERATURE_-3,TEMPERATURE_-2,TEMPERATURE_-1,MIN_TEMPERATURE_-4,MIN_TEMPERATURE_-3,MIN_TEMPERATURE_-2,MIN_TEMPERATURE_-1,MAX_TEMPERATURE_-4,MAX_TEMPERATURE_-3,MAX_TEMPERATURE_-2,MAX_TEMPERATURE_-1
0,0,146,997,0,1,0,15,Veicolo_in_avaria,2019-01-01 08:30:00,2019-01-01 08:15:00,2019-01-01 08:00:00,2019-01-01 07:45:00,2019-01-01 07:30:00,2019-01-01 07:15:00,2019-01-01 07:00:00,2019-01-01 06:45:00,118.481162,120.078239,,,146.585878,151.662442,153.059376,154.153645,13.991577,17.871658,,,142.384,163.418,,,67.956,69.574,,,260.0,243.0,,,,,,,,,,,,,,,,,,,,,,
1,7,523,965,0,2,0,2,Ostacolo_in_carreggiata,2019-01-04 12:45:00,2019-01-04 12:30:00,2019-01-04 12:15:00,2019-01-04 12:00:00,2019-01-04 11:45:00,2019-01-04 11:30:00,2019-01-04 11:15:00,2019-01-04 13:00:00,238.691959,,,232.734779,230.800631,228.083461,230.829078,236.901522,31.252007,,,39.212064,354.342,,,326.836,171.508,,,66.338,197.0,,,195.0,,,,,,,,,,,,,,,,,,,,
2,8,146,997,0,1,0,22,Veicolo_in_avaria,2019-01-04 12:45:00,2019-01-04 12:30:00,2019-01-04 12:15:00,2019-01-04 12:00:00,2019-01-04 11:45:00,2019-01-04 11:30:00,2019-01-04 11:15:00,2019-01-04 13:00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [53]:
dataset[dataset.event_index == 10296]

Unnamed: 0,event_index,KEY,KM,EMERGENCY_LANE,LANES,ROAD_TYPE,EVENT_DETAIL,EVENT_TYPE,DATETIME_UTC_-4,DATETIME_UTC_-3,DATETIME_UTC_-2,DATETIME_UTC_-1,DATETIME_UTC_y_0,DATETIME_UTC_y_1,DATETIME_UTC_y_2,DATETIME_UTC_y_3,SPEED_AVG_-4,SPEED_AVG_-3,SPEED_AVG_-2,SPEED_AVG_-1,SPEED_AVG_Y_0,SPEED_AVG_Y_1,SPEED_AVG_Y_2,SPEED_AVG_Y_3,SPEED_SD_-4,SPEED_SD_-3,SPEED_SD_-2,SPEED_SD_-1,SPEED_MAX_-4,SPEED_MAX_-3,SPEED_MAX_-2,SPEED_MAX_-1,SPEED_MIN_-4,SPEED_MIN_-3,SPEED_MIN_-2,SPEED_MIN_-1,N_VEHICLES_-4,N_VEHICLES_-3,N_VEHICLES_-2,N_VEHICLES_-1,WEATHER_-4,WEATHER_-3,WEATHER_-2,WEATHER_-1,DISTANCE_-4,DISTANCE_-3,DISTANCE_-2,DISTANCE_-1,TEMPERATURE_-4,TEMPERATURE_-3,TEMPERATURE_-2,TEMPERATURE_-1,MIN_TEMPERATURE_-4,MIN_TEMPERATURE_-3,MIN_TEMPERATURE_-2,MIN_TEMPERATURE_-1,MAX_TEMPERATURE_-4,MAX_TEMPERATURE_-3,MAX_TEMPERATURE_-2,MAX_TEMPERATURE_-1
2913,10296,102,837,0,1,0,19,Manutenzione_opere_in_verde,2019-01-05 08:00:00,2019-01-05 08:45:00,2019-01-05 09:00:00,2019-01-05 07:15:00,2019-01-05 07:30:00,2019-01-05 07:45:00,2019-01-05 08:30:00,2019-01-05 08:15:00,188.828897,,,191.275739,196.68331,189.601976,,,21.971013,,,20.643948,239.464,,,255.644,108.406,,,158.564,78.0,,,69.0,,,,,,,,,,,,,,,,,,,,
