# Using Classical ML models fot Time Series Predictions

## Importing useful libraries

In [42]:
import pandas as pd
import pandas_profiling
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import holidays
fr_holidays = holidays.France()
import math as mt

import tensorflow as tf
import keras
from keras.layers import LSTM, Dense, Flatten, Dropout, Activation, SimpleRNN
from keras.models import Sequential

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.base import BaseEstimator
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import RidgeCV
import numpy as np
from sklearn.kernel_ridge import KernelRidge
from sklearn import ensemble
import xgboost as xgb
from xgboost.sklearn import XGBRegressor
from sklearn.preprocessing import StandardScaler

import datetime

## Loading Data

In [2]:
X_train = pd.read_csv(
    'provided_data_and_metric/X_train_6GWGSxz.csv',
)
Y_train = pd.read_csv(
    'provided_data_and_metric/y_train_2G60rOL.csv',
)
X_test = pd.read_csv(
    'provided_data_and_metric/X_test_c2uBt2s.csv', 
)

In [3]:
X_train.head()

Unnamed: 0,time_step,consumption,visibility,temperature,humidity,humidex,windchill,wind,pressure,Unnamed: 9
0,2013-03-17T00:01:00.0,550.4,,,,,,,,
1,2013-03-17T00:02:00.0,548.6,,,,,,,,
2,2013-03-17T00:03:00.0,549.3,,,,,,,,
3,2013-03-17T00:04:00.0,549.3667,,,,,,,,
4,2013-03-17T00:05:00.0,548.8909,,,,,,,,


In [7]:
Y_train.head()

Unnamed: 0,time_step,washing_machine,fridge_freezer,TV,kettle
0,2013-03-17T00:01:00.0,0.0,79.2,7.0,0.0
1,2013-03-17T00:02:00.0,0.0,78.0,7.0,0.0
2,2013-03-17T00:03:00.0,0.0,76.9,7.0,0.0
3,2013-03-17T00:04:00.0,0.0,76.1111,7.0,0.0
4,2013-03-17T00:05:00.0,0.0,75.2727,7.0,0.0


## Data Preprocessing and Feature Engineering

In [4]:
class DataImputer(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        self.X = None
    
    def fit(self, X, y=None):
        return X
    
    def transform(self, X, y=None):
        try:
            X.drop('Unnamed: 9', axis = 1, inplace = True)
        except KeyError as e:
            pass
        X = X.interpolate(method='linear').fillna(method='bfill')
        return X
class YImputer(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        self.X = None
    
    def fit(self, X, y=None):
        return X
    
    def transform(self, X, y=None):
        X = X.interpolate(method='linear').fillna(method='bfill')
        return X
class DataAugmenter(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        self.X = None
    
    def fit(self, X, y=None):
        return X
    
    def transform(self, X, y=None):
        X["time_step"] = pd.to_datetime(X["time_step"])
        X["weekday"] = X.time_step.dt.dayofweek
        X["week"] = X.time_step.dt.week
        X["month"] = X.time_step.dt.month
        X["hour"] = X.time_step.dt.hour
        X["minute"] = X.time_step.dt.minute
        X["is_weekend"] = np.zeros(X.shape[0])  
        X.loc[X["weekday"] > 4, "is_weekend"] = 1
        X["is_holidays"] = np.zeros(X.shape[0])  
        X.loc[X.time_step.dt.date.isin(fr_holidays), "is_holidays"] = 1
        X.drop(["time_step", "visibility", "humidity", "humidex", "windchill", "wind", "pressure"], axis=1, inplace=True)
        
        return X

In [5]:
p1 = Pipeline([
    (
        '1',
        DataImputer()
    ),
    (
        '2',
        DataAugmenter()
    )
])
p2 = Pipeline([
    (
        '1',
        YImputer()
    )
])

In [6]:
X = p1.transform(X_train)
X_t = p1.transform(X_test)
X.head()

Unnamed: 0,consumption,temperature,weekday,week,month,hour,minute,is_weekend,is_holidays
0,550.4,8.9,6,11,3,0,1,1.0,0.0
1,548.6,8.9,6,11,3,0,2,1.0,0.0
2,549.3,8.9,6,11,3,0,3,1.0,0.0
3,549.3667,8.9,6,11,3,0,4,1.0,0.0
4,548.8909,8.9,6,11,3,0,5,1.0,0.0


In [8]:
Y = p2.transform(Y_train)
Y.head()

Unnamed: 0,time_step,washing_machine,fridge_freezer,TV,kettle
0,2013-03-17T00:01:00.0,0.0,79.2,7.0,0.0
1,2013-03-17T00:02:00.0,0.0,78.0,7.0,0.0
2,2013-03-17T00:03:00.0,0.0,76.9,7.0,0.0
3,2013-03-17T00:04:00.0,0.0,76.1111,7.0,0.0
4,2013-03-17T00:05:00.0,0.0,75.2727,7.0,0.0


In [63]:
time_step = Y['time_step']
type(time_step)

pandas.core.series.Series

In [8]:
#X["TV"] = Y["TV"]
#X["washing_machine"] = Y["washing_machine"]
#X["fridge_freezer"] = Y["fridge_freezer"]
#X["kettle"] = Y["kettle"]

## Build a regressor for each of the targets seperately

In [14]:
y1 = Y["TV"]
y2 = Y["kettle"]
y3 = Y["washing_machine"]
y4 = Y["fridge_freezer"]

In [47]:
class Regressor():
    def __init__(self):
        self.scaler = StandardScaler()
        params = {'learning_rate': 0.1,
            'max_depth': 8,
            'max_features': 10,
            'min_samples_leaf': 13,
            'n_estimators': 3000,
            'min_samples_split': 14}

        params_2 = {'learning_rate': 0.1,
            'max_depth': 10,
            'max_features': 0.4,
            'min_samples_leaf': 9,
            'min_samples_split': 10}
        #self.reg = ensemble.GradientBoostingRegressor(**params)
        '''
        self.reg = XGBRegressor(colsample_bytree=0.4,
                 gamma=0,                 
                 learning_rate=0.07,
                 max_depth=3,
                 min_child_weight=1.5,
                 n_estimators=10000,                          
                 reg_alpha=0.75,
                 reg_lambda=0.45,
                 subsample=0.6,
                 seed=42)
        
        self.reg = XGBRegressor(base_score=0.5, booster='gbtree', 
             colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1.0, gamma=0.6,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=7, min_child_weight=5, missing=None, n_estimators=1000,
             n_jobs=1, nthread=-1, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=0.6, verbosity=1) 
        '''
        self.reg = RandomForestRegressor()
        
        #self.reg = GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
        #                  learning_rate=0.1, loss='ls', max_depth=10,
        #                  max_features=0.4, max_leaf_nodes=None,
        #                  min_impurity_decrease=0.0, min_impurity_split=None,
        #                  min_samples_leaf=9, min_samples_split=10,
        #                  min_weight_fraction_leaf=0.0, n_estimators=100,
        #                  n_iter_no_change=None, presort='auto',
        #                  random_state=None, subsample=1.0, tol=0.0001,
        #                  validation_fraction=0.1, verbose=0, warm_start=False)
        
        #self.reg = DecisionTreeRegressor(max_depth = 15)
        #self.reg = KernelRidge(alpha=1.0, coef0=1, degree=3, gamma=None, kernel='linear',
        #    kernel_params=None)
        #self.reg = LinearRegression()
    def fit(self, X, y):
        X_sc = self.scaler.fit_transform(X)
        self.reg.fit(X_sc, y)

    def predict(self, X):
        X_sc = self.scaler.transform(X)
        return self.reg.predict(X_sc)

In [48]:
regressor_1 = Regressor()
regressor_2 = Regressor()
regressor_3 = Regressor()
regressor_4 = Regressor()

In [49]:
regressor_1.fit(X, y1)



In [50]:
regressor_2.fit(X,y2)



In [51]:
regressor_3.fit(X,y3)



In [52]:
regressor_4.fit(X,y4)



In [65]:
#pred_1 = regressor_1.predict(X_t)
#pred_2 = regressor_2.predict(X_t)
#pred_3 = regressor_3.predict(X_t)
#pred_4 = regressor_4.predict(X_t)

pred_1 = regressor_1.predict(X)
pred_2 = regressor_2.predict(X)
pred_3 = regressor_3.predict(X)
pred_4 = regressor_4.predict(X)

In [69]:
pred = pd.DataFrame({'time_step': time_step,'TV':pred_1, 'kettle':pred_2, 'washing_machine': pred_3, 'fridge_freezer': pred_4})

In [70]:
pred.head()

Unnamed: 0,time_step,TV,kettle,washing_machine,fridge_freezer
0,2013-03-17T00:01:00.0,7.01818,0.0,0.0,64.15455
1,2013-03-17T00:02:00.0,7.0,0.0,0.0,70.09
2,2013-03-17T00:03:00.0,7.0,0.0,0.0,77.04111
3,2013-03-17T00:04:00.0,7.0,0.0,0.0,76.18504
4,2013-03-17T00:05:00.0,7.0,0.0,0.0,66.63473


In [73]:
Y.head()

Unnamed: 0,time_step,washing_machine,fridge_freezer,TV,kettle
0,2013-03-17T00:01:00.0,0.0,79.2,7.0,0.0
1,2013-03-17T00:02:00.0,0.0,78.0,7.0,0.0
2,2013-03-17T00:03:00.0,0.0,76.9,7.0,0.0
3,2013-03-17T00:04:00.0,0.0,76.1111,7.0,0.0
4,2013-03-17T00:05:00.0,0.0,75.2727,7.0,0.0


In [68]:
print(len(pred))
print(len(Y))

417599
417599


In [71]:
def metric_nilm(dataframe_y_true, dataframe_y_pred):
    score = 0.0
    test = dataframe_y_true['washing_machine']
    pred = dataframe_y_pred['washing_machine']
    score += mt.sqrt(sum((pred.values - test.values)**2)/len(test))*5.55
    test = dataframe_y_true['fridge_freezer']
    pred = dataframe_y_pred['fridge_freezer']
    score += mt.sqrt(sum((pred.values - test.values)**2)/len(test))*49.79
    test = dataframe_y_true['TV']
    pred = dataframe_y_pred['TV']
    score += mt.sqrt(sum((pred.values - test.values)**2)/len(test))*14.57
    test = dataframe_y_true['kettle']
    pred = dataframe_y_pred['kettle']
    score += mt.sqrt(sum((pred.values - test.values)**2)/len(test))*4.95
    score /= 74.86
    return score

In [72]:
metric_nilm(Y, pred)

8.869957226787033