# Library Imports

### giotto-time

In [14]:
# feature creation
from giottotime.feature_creation import CalendarFeature
from giottotime.feature_creation import DetrendedFeature
from giottotime.feature_creation import PeriodicSeasonalFeature
from giottotime.feature_creation import ShiftFeature, MovingAverageFeature, ExogenousFeature
from giottotime.feature_creation import FeatureCreation


# causality testing
from giottotime.causality_tests.shifted_pearson_correlation import ShiftedPearsonCorrelation
from giottotime.causality_tests.shifted_linear_coefficient import ShiftedLinearCoefficient

#Models
from giottotime.models.time_series_models import GAR
from giottotime.model_selection import FeatureSplitter

### Other imports

In [70]:
# Data handling
import pandas as pd

# Scikit-learn
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Import Data

In [58]:
data = pd.read_csv('data/raw/df_accidents.csv')
data.set_index('date', inplace=True)

In [31]:
time_series = data[['number of accidents', 'date']].copy()
time_series['date'] = [pd.to_datetime(d) for d in time_series['date'].values]
time_series.set_index('date', inplace=True) 
time_series.head()

Unnamed: 0_level_0,number of accidents
date,Unnamed: 1_level_1
2014-01-01,608
2014-01-02,1702
2014-01-03,1371
2014-01-04,903
2014-01-05,775


# Detrending

# Causality Testing

In [10]:
cause = ShiftedLinearCoefficient(target_col="number of accidents")
cause.fit(time_series[['number of accidents', 'Temperature', 'windspeed']])
cause.max_corrs_

y,Temperature,number of accidents,windspeed
x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Temperature,0.903896,3.997269,0.0226
number of accidents,-0.002424,0.316543,0.00071
windspeed,-0.217181,5.418223,0.413698


In [12]:
cause.best_shifts_

y,Temperature,number of accidents,windspeed
x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Temperature,1,9,4
number of accidents,9,7,1
windspeed,5,1,1


# Feature Creation

In [65]:
# List of all features
temperature_feature = ExogenousFeature(data['Temperature'], output_name='temperature')
windspeed_feature = ExogenousFeature(data['windspeed'], output_name='windspeed')
weekly_period = PeriodicSeasonalFeature(start_date=time_series.index.min(), output_name='weekly')

features_creation = FeatureCreation(
    horizon=20,
    time_series_features = [
        temperature_feature, 
        windspeed_feature, 
        weekly_period
    ]
)

X, y = features_creation.fit_transform(time_series)

Float64Index([                  0.0, 0.0027397260273972603,
               0.005479452054794521,   0.00821917808219178,
               0.010958904109589041,    0.0136986301369863,
                0.01643835616438356,  0.019178082191780823,
               0.021917808219178082,  0.024657534246575342,
              ...
                 2.9753424657534246,     2.978082191780822,
                 2.9808219178082194,    2.9835616438356163,
                 2.9863013698630136,     2.989041095890411,
                 2.9917808219178084,    2.9945205479452053,
                 2.9972602739726026,                   3.0],
             dtype='float64', name='date', length=1096)


# Prediction

In [69]:
train_test_splitter = FeatureSplitter()
time_series_model = GAR(base_model=RandomForestRegressor())

X_train, y_train, X_test, y_test = train_test_splitter.transform(X, y)

time_series_model.fit(X_train, y_train)
predictions = time_series_model.predict(X_test)
predictions

Unnamed: 0_level_0,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,y_10,y_11,y_12,y_13,y_14,y_15,y_16,y_17,y_18,y_19,y_20
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2016-12-12,880.55,863.1,984.57,1014.46,924.57,883.09,839.99,852.06,997.39,881.86,851.24,726.76,639.09,696.65,758.67,774.48,735.2,886.83,789.64,742.64
2016-12-13,911.35,898.19,947.04,1016.11,927.21,785.82,811.4,850.81,927.05,774.98,610.08,748.46,657.64,700.39,770.56,806.71,746.45,921.88,745.45,877.95
2016-12-14,908.87,921.65,960.8,943.19,820.11,801.38,838.13,851.63,916.21,613.18,658.54,707.33,699.69,736.9,745.24,788.54,738.34,908.73,971.38,777.15
2016-12-15,1122.6,1054.88,983.02,970.72,912.15,855.88,920.46,816.23,785.95,701.24,712.36,707.33,693.69,712.86,757.67,709.08,660.65,855.88,783.67,985.34
2016-12-16,889.97,935.62,1250.29,943.88,844.77,898.61,843.37,789.24,780.12,727.72,714.62,787.55,707.54,733.8,734.44,755.75,937.19,1042.45,877.91,914.21
2016-12-17,1053.15,988.96,963.39,965.2,898.82,942.16,644.97,717.5,814.9,786.7,690.39,714.42,749.09,782.87,798.36,837.09,871.5,980.42,973.96,1038.3
2016-12-18,898.17,841.62,895.78,809.02,771.39,667.33,689.47,808.37,791.88,738.26,694.71,686.87,669.1,792.12,956.2,770.37,829.25,828.87,806.54,926.69
2016-12-19,950.35,837.18,1037.11,901.22,537.08,788.06,669.42,758.52,760.54,757.42,773.82,712.99,754.77,917.8,906.8,764.09,798.18,856.48,958.86,981.35
2016-12-20,787.7,875.98,1272.83,709.48,652.11,1036.98,748.29,762.69,1035.35,826.98,699.14,984.29,989.77,880.1,895.92,983.62,980.34,1318.64,1082.75,971.82
2016-12-21,860.87,881.1,675.29,685.13,661.23,679.57,661.16,721.51,804.45,781.85,666.43,864.74,969.97,930.52,878.14,969.46,1093.04,1014.43,1167.92,1016.03


In [72]:
# Score (how to do this?)
# mean_absolute_error(predictions, y_test)

# Comparison