In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore")

In [3]:
file_csv = "data/classify_reduced.csv"
svm_df = pd.read_csv(file_csv, encoding="ISO-8859-1")
svm_df.head()

Unnamed: 0,IsHoliday,Weekly_Sales,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5
0,0,24924.5,,,,,
1,0,41595.55,,,,,
2,0,19403.54,,,,,
3,0,21827.9,,,,,
4,0,21043.39,,,,,


In [4]:
# Replace missing values with 0
svm_df['MarkDown1'] = svm_df['MarkDown1'].fillna(0)
svm_df['MarkDown2'] = svm_df['MarkDown2'].fillna(0)
svm_df['MarkDown3'] = svm_df['MarkDown3'].fillna(0)
svm_df['MarkDown4'] = svm_df['MarkDown4'].fillna(0)
svm_df['MarkDown5'] = svm_df['MarkDown5'].fillna(0)
svm_df['Weekly_Sales'] = svm_df['Weekly_Sales'].fillna(0)

svm_df.head()

Unnamed: 0,IsHoliday,Weekly_Sales,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5
0,0,24924.5,0.0,0.0,0.0,0.0,0.0
1,0,41595.55,0.0,0.0,0.0,0.0,0.0
2,0,19403.54,0.0,0.0,0.0,0.0,0.0
3,0,21827.9,0.0,0.0,0.0,0.0,0.0
4,0,21043.39,0.0,0.0,0.0,0.0,0.0


In [5]:
X = svm_df.loc[:, svm_df.columns != 'IsHoliday']
X.head()

Unnamed: 0,Weekly_Sales,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5
0,24924.5,0.0,0.0,0.0,0.0,0.0
1,41595.55,0.0,0.0,0.0,0.0,0.0
2,19403.54,0.0,0.0,0.0,0.0,0.0
3,21827.9,0.0,0.0,0.0,0.0,0.0
4,21043.39,0.0,0.0,0.0,0.0,0.0


In [6]:
y = svm_df['IsHoliday'].to_frame()
y.head()

Unnamed: 0,IsHoliday
0,0
1,0
2,0
3,0
4,0


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [8]:
model = SVC(cache_size=7000)

In [None]:
model.fit(X_train,y_train)

In [None]:
predictions = model.predict(X_test)

In [None]:
print(confusion_matrix(y_test,predictions))

In [None]:
print(classification_report(y_test,predictions))

In [None]:
MAE = mean_absolute_error(y_test,predictions)3
print(MAE)

In [None]:
param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} 

In [None]:
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)

In [None]:
grid.fit(X_train,y_train)

In [None]:
grid.best_params_

In [None]:
grid.best_estimator_

In [None]:
grid_predictions = grid.predict(X_test)

In [None]:
print(confusion_matrix(y_test,grid_predictions))

In [None]:
print(classification_report(y_test,grid_predictions))