In [1]:
import pandas as pd
import sqlite3
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

In [3]:
df = pd.read_csv('../data/dayofweek-not-scaled.csv')
y = df['dayofweek'].values
x = df.drop(columns='dayofweek')

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=21, stratify=y)

## SVC
## take the best parameters from the previous exercise and train the model
## your code from the cell should calculate accuracy, precision, recall, ROC AUC
## precision and recall should be calculated for each class (use average=’weighted’)


In [5]:
svc = SVC(kernel='rbf', gamma='auto', C=10, class_weight=None, random_state=21, probability=True)
svc.fit(x_train, y_train)
predict = svc.predict(x_test)
print(f'accuracy is {accuracy_score(y_test, predict):.5f}')
print(f'precision is {precision_score(y_test, predict, average="weighted"):.5f}')
print(f'recall is {recall_score(y_test, predict, average="weighted"):.5f}')
print(f'roc_auc is {roc_auc_score(y_test, svc.predict_proba(x_test), multi_class="ovr"):.5f}')


accuracy is 0.88757
precision is 0.89267
recall is 0.88757
roc_auc is 0.97751


## Decision tree
## take the best parameters from the previous exercise and train the model
## your code from the cell should calculate accuracy, precision, recall, ROC AUC
## precision and recall should be calculated for each class (use average=’weighted’)


In [6]:
dt = DecisionTreeClassifier(random_state=21, class_weight='balanced', criterion='gini', max_depth=21)
dt.fit(x_train, y_train)
predict = dt.predict(x_test)
print(f'accuracy is {accuracy_score(y_test, predict):.5f}')
print(f'precision is {precision_score(y_test, predict, average="weighted"):.5f}')
print(f'recall is {recall_score(y_test, predict, average="weighted"):.5f}')
print(f'roc_auc is {roc_auc_score(y_test, dt.predict_proba(x_test), multi_class="ovr"):.5f}')


accuracy is 0.88462
precision is 0.88765
recall is 0.88462
roc_auc is 0.93459


## Random forest
## take the best parameters from the previous exercise and train the model
## calculate accuracy, precision, recall, ROC AUC
## precision and recall should be calculated for each class (use average=’weighted’)


In [7]:
rf = RandomForestClassifier(n_estimators=100, criterion='entropy', max_depth=24, class_weight='balanced', random_state=21)
rf.fit(x_train, y_train)
predict = dt.predict(x_test)
print(f'accuracy is {accuracy_score(y_test, predict):.5f}')
print(f'precision is {precision_score(y_test, predict, average="weighted"):.5f}')
print(f'recall is {recall_score(y_test, predict, average="weighted"):.5f}')
print(f'roc_auc is {roc_auc_score(y_test, rf.predict_proba(x_test), multi_class="ovr"):.5f}')

accuracy is 0.88462
precision is 0.88765
recall is 0.88462
roc_auc is 0.98834


## predictions

In [10]:
rf = RandomForestClassifier(n_estimators=100, criterion='entropy', max_depth=24, class_weight='balanced', random_state=21)
rf.fit(x_train, y_train)
predict = dt.predict(x_test)
print(f'Final accuracy: {rf.score(x_test, y_test):.5f}')

Final accuracy: 0.92604
