In [1]:
# Importing libraries 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC # "Support vector classifier"
from sklearn.metrics import accuracy_score, confusion_matrix,mean_absolute_error, classification_report,roc_auc_score, precision_score

# use seaborn plotting defaults
import seaborn as sns; sns.set()
import functions as fn

In [2]:
# Loading/Reading data set using pandas 
dataset = pd.read_csv('dataset.csv')

In [3]:
# Subsetting dataset to select ONLY 
input_data = dataset.iloc[:, 0:len(dataset.columns)-1]

In [4]:
# Representing our categorical variables using One-Hot-Encoding (Dummy Variables)
dummies = [pd.get_dummies(dataset[c]) for c in input_data.columns]

features = pd.concat(dummies, axis = 1)

# 0 to 25 is train set
X = features[0:25].values
le = LabelEncoder()

# 15th is test set
y = le.fit_transform(dataset['WillWait'].values)

In [5]:
# Splitting data set into training & testing sets (70/30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state=42)

### Using Linear as kernel in SVM

In [6]:
# Create a svm Classifier (linear kernel)
model = SVC(kernel='linear', C=1E6, gamma=2)
model.fit(X_train, y_train)

SVC(C=1000000.0, gamma=2, kernel='linear')

In [7]:
# make predictions
y_pred = model.predict(X_test)

In [8]:
# Model Accuracy: how often is the classifier correct?
print('Accuracy: {:.2%}'.format(accuracy_score(y_test, y_pred)))

Accuracy: 62.50%


In [9]:
# summarize the fit of the model
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.60      0.75      0.67         4
           1       0.67      0.50      0.57         4

    accuracy                           0.62         8
   macro avg       0.63      0.62      0.62         8
weighted avg       0.63      0.62      0.62         8



In [10]:
# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision: {:.2%}".format(precision_score(y_test, y_pred)))

Precision: 66.67%


### Using RBF (radial basis function) as kernel in SVM

In [11]:
#Create a svm Classifier (RBF kernel)
rbf_svc = SVC(kernel='rbf', C=1E6, gamma=2)

#Train the model using the training sets
rbf_svc.fit(X_train, y_train)

SVC(C=1000000.0, gamma=2)

In [12]:
#Predict the response for test dataset
y_pred = rbf_svc.predict(X_test)

In [15]:
# Model Accuracy: how often is the classifier correct?
print('Accuracy: {:.2%}'.format(accuracy_score(y_test, y_pred)))

Accuracy: 50.00%


In [14]:
# summarize the fit of the model
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.50      1.00      0.67         4

    accuracy                           0.50         8
   macro avg       0.25      0.50      0.33         8
weighted avg       0.25      0.50      0.33         8



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
