In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#import pmdarima as pm
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import acf,pacf
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm
import matplotlib.dates as dates
from sklearn.model_selection import RandomizedSearchCV

In [2]:
from sklearn import svm
from sklearn.model_selection import cross_validate
from sklearn import preprocessing as pre

In [3]:
daily_data_7 = pd.read_csv("daily_data_seasonality_7.csv",header=0, index_col=0, parse_dates=True, squeeze=True)
daily_data_14 = pd.read_csv("daily_data_seasonality_14.csv",header=0, index_col=0, parse_dates=True, squeeze=True)
daily_data_21 = pd.read_csv("daily_data_seasonality_21.csv",header=0, index_col=0, parse_dates=True, squeeze=True)
daily_data_28 = pd.read_csv("daily_data_seasonality_28.csv",header=0, index_col=0, parse_dates=True, squeeze=True)


In [4]:
# Define training and testing periods
train_start = '2016-01-01'
train_end = '2018-12-31'
test_start = '2019-01-01'
test_end = '2019-12-31'

In [5]:
# Split up into training and testing sets

# _7 days window
X_train_df_7 = daily_data_7[train_start:train_end]
del X_train_df_7['Consumption']


y_train_df_7 = daily_data_7['Consumption'][train_start:train_end]

X_test_df_7 = daily_data_7[test_start:test_end]
del X_test_df_7['Consumption']

y_test_df_7 = daily_data_7['Consumption'][test_start:test_end]

# _14 days window
X_train_df_14 = daily_data_14[train_start:train_end]
del X_train_df_14['Consumption']


y_train_df_14 = daily_data_14['Consumption'][train_start:train_end]

X_test_df_14 = daily_data_14[test_start:test_end]
del X_test_df_14['Consumption']

y_test_df_14 = daily_data_14['Consumption'][test_start:test_end]

# _21 days window
X_train_df_21 = daily_data_21[train_start:train_end]
del X_train_df_21['Consumption']


y_train_df_21 = daily_data_21['Consumption'][train_start:train_end]

X_test_df_21 = daily_data_21[test_start:test_end]
del X_test_df_21['Consumption']

y_test_df_21 = daily_data_21['Consumption'][test_start:test_end]

# _28 days window
X_train_df_28 = daily_data_28[train_start:train_end]
del X_train_df_28['Consumption']


y_train_df_28 = daily_data_28['Consumption'][train_start:train_end]

X_test_df_28 = daily_data_28[test_start:test_end]
del X_test_df_28['Consumption']

y_test_df_28 = daily_data_28['Consumption'][test_start:test_end]

To use sklearn, they need to be converted to NumPy arrays.

In [6]:
# Numpy arrays for sklearn

# _7 days
X_train_7 = np.array(X_train_df_7)
X_test_7 = np.array(X_test_df_7)
y_train_7 = np.array(y_train_df_7)
y_test_7 = np.array(y_test_df_7)

# _14 days
X_train_14 = np.array(X_train_df_14)
X_test_14 = np.array(X_test_df_14)
y_train_14 = np.array(y_train_df_14)
y_test_14 = np.array(y_test_df_14)

# _21 days
X_train_21 = np.array(X_train_df_21)
X_test_21 = np.array(X_test_df_21)
y_train_21 = np.array(y_train_df_21)
y_test_21 = np.array(y_test_df_21)

# _28 days
X_train_28 = np.array(X_train_df_28)
X_test_28 = np.array(X_test_df_28)
y_train_28 = np.array(y_train_df_28)
y_test_28 = np.array(y_test_df_28)

In [7]:
from sklearn import preprocessing as pre

# _7 days
scaler_7 = pre.StandardScaler().fit(X_train_7)
X_train_scaled_7 = scaler_7.transform(X_train_7)
X_test_scaled_7 = scaler_7.transform(X_test_7)

# _14 days
scaler_14 = pre.StandardScaler().fit(X_train_14)
X_train_scaled_14 = scaler_14.transform(X_train_14)
X_test_scaled_14 = scaler_14.transform(X_test_14)

# _21 days
scaler_21 = pre.StandardScaler().fit(X_train_21)
X_train_scaled_21 = scaler_21.transform(X_train_21)
X_test_scaled_21 = scaler_21.transform(X_test_21)

# _28 days
scaler_28 = pre.StandardScaler().fit(X_train_28)
X_train_scaled_28 = scaler_28.transform(X_train_28)
X_test_scaled_28 = scaler_28.transform(X_test_28)

# 7-days window

## Creating Models

In [8]:
grid_model_7_rbf= svm.SVR(kernel='rbf',C=20000, gamma=0.01, cache_size=10000).fit(X_train_scaled_7,y_train_7)
grid_model_7_lin = svm.LinearSVR(C=510000,max_iter=1000000).fit(X_train_scaled_7,y_train_7)
random_model_7_rbf = svm.SVR(kernel='rbf',C=886663.4812532668, gamma=0.01, cache_size=10000).fit(X_train_scaled_7,y_train_7)
random_model_7_lin = svm.LinearSVR(C=850162.5654928379,max_iter=1000000).fit(X_train_scaled_7,y_train_7)



In [9]:
grid_predict_y_array_rbf_7 = grid_model_7_rbf.predict(X_test_scaled_7)
grid_predict_y_array_lin_7 = grid_model_7_lin.predict(X_test_scaled_7)
random_predict_y_array_rbf_7 = grid_model_7_rbf.predict(X_test_scaled_7)
random_predict_y_array_lin_7 = random_model_7_lin.predict(X_test_scaled_7)



## Selecting the best

In [10]:
mape_7 = {}
mape_7['grid_rbf'] = (abs((y_test_df_7 - grid_predict_y_array_rbf_7)/y_test_df_7)*100).mean()
mape_7['grid_lin'] = (abs((y_test_df_7 - grid_predict_y_array_lin_7)/y_test_df_7)*100).mean()
mape_7['random_rbf'] = (abs((y_test_df_7 - random_predict_y_array_rbf_7)/y_test_df_7)*100).mean()
mape_7['random_lin'] = (abs((y_test_df_7 - random_predict_y_array_lin_7)/y_test_df_7)*100).mean()

min(mape_7, key=mape_7.get) 

'random_lin'

# 14-days window

## Creating Models

In [11]:
grid_model_14_rbf= svm.SVR(kernel='rbf',C=20000, gamma=0.01, cache_size=10000).fit(X_train_scaled_14,y_train_14)
grid_model_14_lin = svm.LinearSVR(C=940000,max_iter=1000000).fit(X_train_scaled_14,y_train_14)
random_model_14_rbf = svm.SVR(kernel='rbf',C=886968.1405918691, gamma=0.01, cache_size=10000).fit(X_train_scaled_14,y_train_14)
random_model_14_lin = svm.LinearSVR(C=771683.9396441513,max_iter=1000000).fit(X_train_scaled_14,y_train_14)


In [12]:
grid_predict_y_array_rbf_14 = grid_model_14_rbf.predict(X_test_scaled_14)
grid_predict_y_array_lin_14 = grid_model_14_lin.predict(X_test_scaled_14)
random_predict_y_array_rbf_14 = grid_model_14_rbf.predict(X_test_scaled_14)
random_predict_y_array_lin_14 = random_model_14_lin.predict(X_test_scaled_14)


## Selecting the best

In [13]:

mape_14 = {}
mape_14['grid_rbf'] = (abs((y_test_df_14 - grid_predict_y_array_rbf_14)/y_test_df_14)*100).mean()
mape_14['grid_lin'] = (abs((y_test_df_14 - grid_predict_y_array_lin_14)/y_test_df_14)*100).mean()
mape_14['random_rbf'] = (abs((y_test_df_14 - random_predict_y_array_rbf_14)/y_test_df_14)*100).mean()
mape_14['random_lin'] = (abs((y_test_df_14 - random_predict_y_array_lin_14)/y_test_df_14)*100).mean()

min(mape_14, key=mape_14.get) 

'grid_lin'

In [14]:
mape_14

{'grid_rbf': 3.027705898179774,
 'grid_lin': 2.4040273945009862,
 'random_rbf': 3.027705898179774,
 'random_lin': 2.4047837664796217}

# 21-days window

## Creating Models

In [15]:
grid_model_21_rbf= svm.SVR(kernel='rbf',C=20000, gamma=0.01, cache_size=10000).fit(X_train_scaled_21,y_train_21)
grid_model_21_lin = svm.LinearSVR(C=900000,max_iter=1000000).fit(X_train_scaled_21,y_train_21)
random_model_21_rbf = svm.SVR(kernel='rbf',C=887135.4162493473, gamma=0.01, cache_size=10000).fit(X_train_scaled_21,y_train_21)
random_model_21_lin = svm.LinearSVR(C=834611.6566123376,max_iter=1000000).fit(X_train_scaled_21,y_train_21)


In [16]:

grid_predict_y_array_rbf_21 = grid_model_21_rbf.predict(X_test_scaled_21)
grid_predict_y_array_lin_21 = grid_model_21_lin.predict(X_test_scaled_21)
random_predict_y_array_rbf_21 = grid_model_21_rbf.predict(X_test_scaled_21)
random_predict_y_array_lin_21 = random_model_21_lin.predict(X_test_scaled_21)


## Selecting the best

In [17]:
mape_21 = {}
mape_21['grid_rbf'] = (abs((y_test_df_21 - grid_predict_y_array_rbf_21)/y_test_df_21)*100).mean()
mape_21['grid_lin'] = (abs((y_test_df_21 - grid_predict_y_array_lin_21)/y_test_df_21)*100).mean()
mape_21['random_rbf'] = (abs((y_test_df_21 - random_predict_y_array_rbf_21)/y_test_df_21)*100).mean()
mape_21['random_lin'] = (abs((y_test_df_21 - random_predict_y_array_lin_21)/y_test_df_21)*100).mean()

min(mape_21, key=mape_21.get) 

'random_lin'

# 28-days window

## Creating Models

In [18]:
grid_model_28_rbf= svm.SVR(kernel='rbf',C=10000, gamma=0.01, cache_size=10000).fit(X_train_scaled_28,y_train_28)
grid_model_28_lin = svm.LinearSVR(C=840000,max_iter=1000000).fit(X_train_scaled_28,y_train_28)
random_model_28_rbf = svm.SVR(kernel='rbf',C=887225.2618243571, gamma=0.01, cache_size=10000).fit(X_train_scaled_28,y_train_28)
random_model_28_lin = svm.LinearSVR(C=771610.0024368193,max_iter=1000000).fit(X_train_scaled_28,y_train_28)


In [19]:
grid_predict_y_array_rbf_28 = grid_model_28_rbf.predict(X_test_scaled_28)
grid_predict_y_array_lin_28 = grid_model_28_lin.predict(X_test_scaled_28)
random_predict_y_array_rbf_28 = grid_model_28_rbf.predict(X_test_scaled_28)
random_predict_y_array_lin_28 = random_model_28_lin.predict(X_test_scaled_28)


## Selecting the best

In [20]:
mape_28 = {}
mape_28['grid_rbf'] = (abs((y_test_df_28 - grid_predict_y_array_rbf_28)/y_test_df_28)*100).mean()
mape_28['grid_lin'] = (abs((y_test_df_28 - grid_predict_y_array_lin_28)/y_test_df_28)*100).mean()
mape_28['random_rbf'] = (abs((y_test_df_28 - random_predict_y_array_rbf_28)/y_test_df_28)*100).mean()
mape_28['random_lin'] = (abs((y_test_df_28 - random_predict_y_array_lin_28)/y_test_df_28)*100).mean()

min(mape_28, key=mape_28.get) 

'random_lin'

In [21]:
mapes = {}
mapes["SVM-7-days"] = mape_7[min(mape_7, key=mape_7.get)]
mapes["SVM-14-days"] = mape_14[min(mape_14, key=mape_14.get)]
mapes["SVM-21-days"] = mape_21[min(mape_21, key=mape_21.get)]
mapes["SVM-28-days"] = mape_28[min(mape_28, key=mape_28.get)]

In [22]:
min(mapes, key=mapes.get) 

'SVM-28-days'

In [23]:
min(mape_28, key=mape_28.get) 

'random_lin'

### The best RNN model is 28-days feature with Linear kernel which is tuned by RandomSearchCV. 
### Save predicts to future comparison.

In [24]:
index_column = pd.date_range(start ='2019-1-1', end = '2019-12-31', freq ='D')
best_pred = pd.DataFrame(data=random_predict_y_array_lin_28,
              index=index_column)
best_pred.rename(columns={0 : "Consumption"}, inplace=True)
best_pred.to_csv("the_best_svr_pred.csv",index = True)


In [25]:
mapes_df = pd.DataFrame.from_dict(mapes, orient='index')
mapes_df.to_csv("mapes_svm.csv",index=True)