# **IMPORT LIBRARIES**

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, StratifiedKFold, RepeatedStratifiedKFold, RandomizedSearchCV
from sklearn.metrics import f1_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.inspection import permutation_importance

from sklearn.svm import LinearSVC, SVC

from scipy.stats import uniform, loguniform

import warnings
warnings.filterwarnings("ignore")

# **IMPORT DATA, TEST/TRAIN SPLIT, SCALE**

In [2]:
features = pd.read_csv("../data/paris_lille/geometric_features.csv")

X = features.drop(['class'], axis=1)
y = features[['class']].values.flatten()

X_train, X_test, y_train, y_test = train_test_split(X, y)

# **BASELINE SVC**

In [3]:
svc_model = LinearSVC()
svc_model.fit(X_train, y_train)
y_pred = svc_model.predict(X_test)

print(classification_report(y_test, y_pred, digits=3))
print(f1_score(y_test, y_pred, average='micro'))

                  precision    recall  f1-score   support

         Barrier      0.000     0.000     0.000         6
    Bicycle Rack      1.000     0.333     0.500         3
Bicycle Terminal      0.000     0.000     0.000        14
         Bollard      0.538     0.955     0.689        66
        Building      0.000     0.000     0.000        11
           Chair      0.000     0.000     0.000         2
Distribution Box      0.111     0.333     0.167         3
      Floor Lamp      0.880     0.880     0.880        25
          Island      0.000     0.000     0.000         8
Lighting Console      0.821     0.523     0.639        44
        Low Wall      0.000     0.000     0.000        17
         Mailbox      0.000     0.000     0.000         1
           Meter      0.000     0.000     0.000         0
      Mobile Car      0.000     0.000     0.000        11
  Mobile Scooter      0.000     0.000     0.000         1
      Mobile Van      0.000     0.000     0.000         1
         Natu

# **HYPERPARAMETER TUNING**

In [3]:
model = SVC()

#params_grid = [
#  {'C': [1], 'class_weight': ['balanced'], 'decision_function_shape' : ['ovo', 'ovr'], 'kernel': ['linear']},
#  {'C': [1], 'class_weight': ['balanced'], 'decision_function_shape' : ['ovo', 'ovr'], 'gamma': uniform(10, 100), 'kernel': ['rbf']},
#  {'C': [1], 'class_weight': ['balanced'], 'decision_function_shape' : ['ovo', 'ovr'], 'gamma': uniform(10, 100), 'kernel': ['poly'], 'degree': [2, 3, 4, 5]}, 
#  {'C': [1], 'class_weight': ['balanced'], 'decision_function_shape' : ['ovo', 'ovr'], 'gamma': uniform(10, 100), 'kernel': ['sigmoid']}
#]

params_grid = [
  {'C': [1], 'class_weight': ['balanced'],'kernel': ['linear']},
  {'C': [1], 'class_weight': ['balanced'],'gamma': uniform(50, 100), 'kernel': ['rbf']},
  {'C': [1], 'class_weight': ['balanced'],'gamma': uniform(50, 100), 'kernel': ['poly'], 'degree': [3, 4, 5]}, 
  {'C': [1], 'class_weight': ['balanced'],'gamma': uniform(50, 100), 'kernel': ['sigmoid']}
]

In [None]:
#cv = RepeatedStratifiedKFold(n_split=5, n_repeats=2, random_state=0)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
random_search = RandomizedSearchCV(estimator=model, n_jobs=-1, cv=cv, param_distributions=params_grid, scoring='f1_micro')
search_results = random_search.fit(X_train, y_train)

In [None]:
search_results.best_estimator_

In [None]:
best_model = search_results.best_estimator_
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)

print(classification_report(y_test, y_pred, digits=3))
print(f1_score(y_test, y_pred, average='micro'))

In [None]:
model = SVC(C=1, class_weight='balanced', degree=4, gamma=91.13708634647797, kernel='poly')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred, digits=3))
print(f1_score(y_test, y_pred, average='micro'))

# **CONFUSION MATRIX**

In [None]:
cm = confusion_matrix(y_test, y_pred, labels=best_model.classes_)
cmd = ConfusionMatrixDisplay(cm, display_labels=best_model.classes_)
fig = plt.figure(figsize=(20,20))
ax = fig.add_subplot(111)
cmd.plot(ax=ax, xticks_rotation='vertical');

plt.savefig('confusionmatrix_svm.png', dpi=600)

# **FEATURE IMPORTANCE**

In [None]:
results = permutation_importance(best_model, X_test, y_test, scoring='f1_micro')
importance = results.importances_mean
for i,v in enumerate(importance):
    print('Feature: %0d, Score %.5f' % (i,v))

In [None]:
plt.figure(figsize=(25,15))
plt.bar(range(len(results.importances_mean)), results.importances_mean)
plt.xticks(range(len(results.importances_mean)), X.columns)
plt.savefig('featureimportance_svm.png', dpi=600)
plt.show()