In [None]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
import os
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
sns.set(style="white", color_codes=True)
sns.set_context(rc={'font.family': 'sans', 'font.size': 24, 'axes.titlesize':24, 'axes.labelsize':24})
#if you want to know current working dir
os.chdir('../..')

from src.utils import *
from src.utility import merge_speed_events
import src.data as data
import src.utility as utils
from src.preprocessing.other_features import avg_speed_for_roadtype_event

%matplotlib inline

In [None]:
print('Extracting min and max timestamps...')
min_datetime = data.speeds_original(mode='train').DATETIME_UTC.min()
max_datetime = data.speeds_original(mode='test').DATETIME_UTC.max()
speeds = data.speeds_original()
sensors = data.sensors().drop_duplicates([KEY, KM])
print('Done')

In [None]:
datetimes_df = pd.DataFrame(pd.date_range(min_datetime, max_datetime, freq='15min').to_series()).reset_index()
datetimes_df[DATETIME] = pd.to_datetime(datetimes_df['index'])
datetimes_df = datetimes_df[[DATETIME]]
datetimes_df['DATETIME_HOUR'] = pd.to_datetime(datetimes_df[DATETIME]).apply(lambda x: x.floor('1H'))
datetimes_df['DATETIME_HOUR'] = datetimes_df['DATETIME_HOUR'] - pd.DateOffset(1)
print(datetimes_df.shape)
datetimes_df.head(4)

In [None]:
datetimes_df['MERGE'] = 0
sensors['MERGE'] = 0
print(sensors.shape)
sensors.head(2)

In [None]:
datetimes_df.head(4)

In [None]:
skeleton = pd.merge(sensors[[KEY, KM, 'MERGE']], datetimes_df, on='MERGE')
skeleton[DATETIME] = pd.to_datetime(skeleton[DATETIME])
skeleton.set_index(DATETIME, inplace=True)
print(skeleton.shape)
skeleton.head(3)

In [None]:
speeds.head(2)

In [None]:
resampled_speeds = speeds\
    .groupby([KEY, KM])\
    .apply(lambda x: x.set_index(DATETIME)\
    .resample('H').mean()[[SPEED_AVG, SPEED_MAX, SPEED_MIN, SPEED_SD, N_CARS]]).reset_index()
resampled_speeds.head(5)

In [None]:
skeleton_merge = skeleton.reset_index()
df = pd.merge(skeleton_merge, resampled_speeds, left_on=[KEY, KM, 'DATETIME_HOUR'], right_on=[KEY, KM, DATETIME])
print(df.shape)
df.head(2)

In [None]:
# df = df.drop(['DATETIME_HOUR', 'DATETIME_UTC_y', 'MERGE'], axis=1)
df = df.rename(columns={'DATETIME_UTC_x': 'DATETIME_UTC', SPEED_AVG: 'SPEED_AVG_D-1',
                       SPEED_MAX: 'SPEED_MAX_D-1', SPEED_MIN: 'SPEED_MIN_D-1',
                        SPEED_SD: 'SPEED_SD_D-1', N_CARS: 'N_VEHICLES_D-1'})
df.head(2)