# Non linear SVM

Prima di provare tutte le combinazioni di attributi e parametri, controlliamo come i parametri influenzano il modello

In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from sklearn.svm import LinearSVC
from sklearn.svm import SVC
#from sklearn.model_selection import train_test_split, cross_val_score 

from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.metrics import roc_curve, auc, roc_auc_score

In [2]:
class_name = 'Occupancy'
df_training = pd.read_csv('training.csv', skipinitialspace=True, na_values='?', keep_default_na=True)
df_test = pd.read_csv("test.csv", skipinitialspace=True, na_values='?', keep_default_na=True)


columns2remove = ['date', 'Unnamed: 0', 'cumulative_hour', 'cumulative_minute', 'day', 'hour', 'minute', 'day_minute', 'weekend']
df_training.drop(columns2remove, inplace=True, axis=1)
df_training.head()

Unnamed: 0,Temperature,Humidity,Light,CO2,HumidityRatio,Occupancy
0,23.7,26.272,585.2,749.2,0.004764,1
1,23.718,26.29,578.4,760.4,0.004773,1
2,23.73,26.23,572.666667,769.666667,0.004765,1
3,23.7225,26.125,493.75,774.75,0.004744,1
4,23.754,26.2,488.6,779.0,0.004767,1


In [3]:
scaler = StandardScaler()
attributes = [col for col in df_training.columns if col != class_name]

X_train = scaler.fit_transform(df_training[attributes].values)
y_train = df_training[class_name]

X_test = scaler.fit_transform(df_test[attributes].values)
y_test = df_test[class_name]

In [4]:
pca = PCA(n_components=2)
pca.fit(X_train)
X_pca = pca.transform(X_train)

In [5]:
clf = SVC(gamma='auto')
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.9868677042801557
F1-score [0.99195071 0.96436428]
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      5071
           1       0.93      1.00      0.96      1097

    accuracy                           0.99      6168
   macro avg       0.97      0.99      0.98      6168
weighted avg       0.99      0.99      0.99      6168



In [6]:
clf = SVC(gamma='auto', C=0.1, kernel='rbf', random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print('Accuracy %s' % accuracy_score(y_test, y_pred))
print('F1-score %s' % f1_score(y_test, y_pred, average=None))
print(classification_report(y_test, y_pred))

Accuracy 0.9917315175097277
F1-score [0.99494699 0.97726259]
              precision    recall  f1-score   support

           0       1.00      0.99      0.99      5071
           1       0.96      1.00      0.98      1097

    accuracy                           0.99      6168
   macro avg       0.98      0.99      0.99      6168
weighted avg       0.99      0.99      0.99      6168



## Effect of different kernels

In [7]:
%%time
scaler = StandardScaler()
y_train = df_training[class_name]
y_test = df_test[class_name]

for kernel in ['poly', 'rbf', 'sigmoid']:
        print("Kernel: {}".format(kernel))
        X_train = scaler.fit_transform(df_training[['Temperature', 'Light', 'CO2']].values)
        X_test = scaler.fit_transform(df_test[['Temperature', 'Light', 'CO2']].values)
        
        clf = SVC(gamma='auto', C=0.06, kernel=kernel, random_state=42)
        clf.fit(X_train, y_train)

        y_pred = clf.predict(X_test)

        print()
        print('Accuracy %s' % accuracy_score(y_test, y_pred))
        print('F1-score %s' % f1_score(y_test, y_pred, average=None))
        print(classification_report(y_test, y_pred))
        report = classification_report(y_test, y_pred)
        
        print("---------------------------------------------------------\n\n\n")

Kernel: poly

Accuracy 0.9925421530479897
F1-score [0.99544464 0.97944593]
              precision    recall  f1-score   support

           0       1.00      0.99      1.00      5071
           1       0.96      1.00      0.98      1097

    accuracy                           0.99      6168
   macro avg       0.98      1.00      0.99      6168
weighted avg       0.99      0.99      0.99      6168

---------------------------------------------------------



Kernel: rbf

Accuracy 0.9917315175097277
F1-score [0.99494699 0.97726259]
              precision    recall  f1-score   support

           0       1.00      0.99      0.99      5071
           1       0.96      1.00      0.98      1097

    accuracy                           0.99      6168
   macro avg       0.98      0.99      0.99      6168
weighted avg       0.99      0.99      0.99      6168

---------------------------------------------------------



Kernel: sigmoid

Accuracy 0.8425745784695201
F1-score [0.89506106 0.6850470

In [None]:
X = pd.concat([df_training[attributes], df_test[attributes]], axis = 0)
y = pd.concat([df_training[class_name], df_test[class_name]], axis = 0)

pca.fit(X)
X_pca = pca.transform(X)
pca.fit(X_train)
# per il modello uso X_train e X_test come al solito
X_train_pca = pca.transform(X_train)
pca.fit(X_test)
X_test_pca = pca.transform(X_test)

# fit the model
for kernel in ('linear', 'rbf', 'poly'):
    clf = SVC(kernel=kernel, gamma=10)
    clf.fit(X_train_pca, y_train)
    print("Fit the model with {} kernel".format(kernel))

    plt.figure()
    plt.clf()
    plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, zorder=10, cmap=plt.cm.Paired,
                edgecolor='k', s=20)

    # Circle out the test data
    plt.scatter(X_test_pca[:, 0], X_test_pca[:, 1], s=80, facecolors='none',
                zorder=10, edgecolor='k')

    plt.axis('tight')
    x_min = X_pca[:, 0].min()
    x_max = X_pca[:, 0].max()
    y_min = X_pca[:, 1].min()
    y_max = X_pca[:, 1].max()

    XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
    Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(XX.shape)
    plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
    plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
                linestyles=['--', '-', '--'], levels=[-.5, 0, .5])
    plt.title(kernel)
plt.show()

Fit the model with linear kernel
Fit the model with rbf kernel


  plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],


In [None]:
for i in range(len(summary2)):
    print(i, summary2[i]['f1-score [1]'])

print()
for i in range(len(summary2)):
    print(i, summary2[i]['precision [1]'])

In [None]:
for c in range(1, 11):
        # 'attr' è la combinazione di attributi da testare per ogni iterazione
        print("C =", c/100)
        X_train = scaler.fit_transform(df_training[['Temperature', 'Light', 'CO2']].values)
        X_test = scaler.fit_transform(df_test[['Temperature', 'Light', 'CO2']].values)
        
        #clf = SVC(gamma='auto', C=0.1, kernel='rbf', random_state=42)
        #clf.fit(X_train, y_train)

        #y_pred = clf.predict(X_test)
        
        clf = SVC(C=c/100, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma='auto', kernel='poly',
                     max_iter=-1, probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False)

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        print()
        print('Accuracy %s' % accuracy_score(y_test, y_pred))
        print('F1-score %s' % f1_score(y_test, y_pred, average=None))
        print(classification_report(y_test, y_pred))
        report = classification_report(y_test, y_pred, output_dict=True)
        print('precision', report['1']['precision'])
        print('precision', report['1']['recall'])
        print("---------------------------------------------------------\n\n\n")
        
        i+=1

In [None]:
attributes

In [None]:
scaler = StandardScaler()
attributes = [col for col in df_training.columns if col != class_name]

df = pd.concat([df_training, df_test], axis = 0)
y = df['Occupancy'].values

X = scaler.fit_transform(df[['Temperature', 'Light', 'CO2']].values)
X_train = scaler.fit_transform(df_training[['Temperature', 'Light', 'CO2']].values)
X_test = scaler.fit_transform(df_test[['Temperature', 'Light', 'CO2']].values)

pca = PCA(n_components=2) 
pca.fit(X)

X_pca = pca.transform(X)
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

In [None]:
clf = SVC(C=0.06, break_ties=False, cache_size=2048, class_weight=None, coef0=0.0,
             decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
             max_iter=10000, probability=False, random_state=None, shrinking=True,
             tol=0.001, verbose=True).fit(X_train_pca, y_train)

In [None]:
plt.axis('tight')
x_min = X_pca[:, 0].min()
x_max = X_pca[:, 0].max()
y_min = X_pca[:, 1].min()
y_max = X_pca[:, 1].max()

XX, YY = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()])
Z = Z.reshape(XX.shape)
#contours = plt.contour(XX, YY, Z, cmap=plt.cm.coolwarm, alpha=0.8)

plt.contourf(XX, YY, Z, cmap=plt.cm.coolwarm, alpha=0.8)
plt.scatter(X_pca[:, 0], X_pca[:, 1], s=30, c=y, cmap=plt.cm.Paired,
            edgecolors='k')
plt.show()

In [None]:
plt.figure(figsize = (8, 6))
Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()])
Z = Z.reshape(XX.shape)
plt.contourf(XX, YY, Z, cmap=plt.cm.coolwarm, alpha=1)

sns.scatterplot(x=X_test_pca[:, 0], y=X_test_pca[:, 1], hue = y_test, palette=['blue', 'red'], linewidth=0)
plt.title("Nonlinear SVM", fontsize = 15)
plt.tick_params(labelsize = 15)
plt.legend(fontsize = 15)
plt.show()