## SVM

### RBF kernel

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the dataset
df = pd.read_csv('Dataset/NoInfValues.csv')
df.head()

Unnamed: 0,mean,std_dev,energy,entropy,num_peaks,lbp_0,lbp_1,ClassLabel
0,0.094581,0.058153,0.049309,1.09733,0.0,2,3,1
1,0.09397,0.051851,0.046076,1.17812,1.0,0,3,1
2,0.039831,0.020652,0.008052,1.217346,1.0,0,3,1
3,0.07755,0.050505,0.034259,1.115788,1.0,0,3,1
4,0.093627,0.051805,0.045799,1.178472,1.0,0,3,1


In [3]:
df.describe()

Unnamed: 0,mean,std_dev,energy,entropy,num_peaks,lbp_0,lbp_1,ClassLabel
count,578.0,578.0,578.0,578.0,578.0,578.0,578.0,578.0
mean,0.547162,0.563072,932.778758,1.17706,0.619377,0.859862,2.572664,3.093426
std,9.832225,11.675131,22421.517225,0.117442,0.485961,1.177231,0.823662,1.267481
min,0.006453,0.002335,0.000188,0.587278,0.0,0.0,0.0,1.0
25%,0.053044,0.0308,0.015532,1.109699,0.0,0.0,3.0,2.0
50%,0.113274,0.061033,0.06824,1.192146,1.0,0.0,3.0,3.0
75%,0.185966,0.10341,0.189697,1.268512,1.0,2.0,3.0,4.0
max,236.505877,280.762302,539050.000276,1.352549,1.0,3.0,3.0,5.0


In [4]:
# Separate features and target
x = df.iloc[:, :-1]  # Features
y = df.iloc[:, -1]   # Target

In [5]:
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)

In [6]:
# Train the Model
classifier_rbf = SVC(kernel = "rbf", random_state = 42)
classifier_rbf.fit(x_train,y_train)

In [7]:
# Predict
y_pred = classifier_rbf.predict(x_test)

In [8]:
# Evauavte the model
classifier_rbf.score(x, y)

0.23529411764705882

In [9]:
cm_rbf = confusion_matrix(y_test, y_pred)
cm_rbf

array([[ 0,  0, 17,  0,  0],
       [ 0,  0, 55,  0,  0],
       [ 0,  0, 35,  0,  0],
       [ 0,  0, 40,  0,  0],
       [ 0,  0, 27,  0,  0]])

In [10]:
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.20114942528735633

In [11]:
cr_rbf = classification_report(y_test, y_pred)
print(cr_rbf)

              precision    recall  f1-score   support

           1       0.00      0.00      0.00        17
           2       0.00      0.00      0.00        55
           3       0.20      1.00      0.33        35
           4       0.00      0.00      0.00        40
           5       0.00      0.00      0.00        27

    accuracy                           0.20       174
   macro avg       0.04      0.20      0.07       174
weighted avg       0.04      0.20      0.07       174



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Tune Hyper Parameters

In [12]:
# Standardize the features
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.3, random_state=42)

In [13]:
# Define the parameter grid for hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf']
}

# Initialize the GridSearchCV object
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2)
grid.fit(x_train, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=0.01, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=0.01

In [14]:
grid.best_estimator_

In [15]:
# Use the best estimator to make predictions
y_pred = grid.best_estimator_.predict(x_test)

In [16]:
classifier_rbf.score(x, y)

0.23529411764705882

In [17]:
# Evaluate the model
accuracy_score(y_test, y_pred)

0.40804597701149425

In [18]:
confusion_mat = confusion_matrix(y_test, y_pred)
confusion_mat

array([[ 1,  5,  8,  2,  1],
       [ 0, 43,  0,  5,  7],
       [ 0,  9, 10, 10,  6],
       [ 0, 13,  6, 16,  5],
       [ 1, 10,  8,  7,  1]])

In [19]:
classification_rep = classification_report(y_test, y_pred)
print(classification_rep)

              precision    recall  f1-score   support

           1       0.50      0.06      0.11        17
           2       0.54      0.78      0.64        55
           3       0.31      0.29      0.30        35
           4       0.40      0.40      0.40        40
           5       0.05      0.04      0.04        27

    accuracy                           0.41       174
   macro avg       0.36      0.31      0.30       174
weighted avg       0.38      0.41      0.37       174



In [20]:
# Define the parameter grid for hyperparameter tuning
param_grid = {
    'C': [100, 150, 200],
    'gamma': [1, 1.1, 0.9],
    'kernel': ['rbf']
}

# Initialize the GridSearchCV object
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2)
grid.fit(x_train, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] END .........................C=100, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=100, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=100, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=100, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=100, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .......................C=100, gamma=1.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=100, gamma=1.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=100, gamma=1.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=100, gamma=1.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=100, gamma=1.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=100, gamma=0.9, kernel=rbf; total time=   0.0s
[CV] END .......................C=100, gamma=0.9,

In [21]:
grid.best_estimator_

In [22]:
# Use the best estimator to make predictions
y_pred = grid.best_estimator_.predict(x_test)

In [23]:
classification_rep = classification_report(y_test, y_pred)
print(classification_rep)

              precision    recall  f1-score   support

           1       0.33      0.06      0.10        17
           2       0.63      0.76      0.69        55
           3       0.32      0.34      0.33        35
           4       0.44      0.50      0.47        40
           5       0.14      0.11      0.12        27

    accuracy                           0.45       174
   macro avg       0.37      0.36      0.34       174
weighted avg       0.42      0.45      0.42       174



### dataset.csv

In [44]:
df2 = pd.read_csv('Dataset/dataset.csv')
df2.head()

Unnamed: 0,X,Y,Z,Mixed,ClassLabel
0,0.125022,0.094986,0.001297,0.157018,1
1,0.15071,0.083282,-0.023514,0.173788,1
2,0.102941,0.111084,0.010075,0.151782,1
3,0.03845,0.049911,0.007511,0.063451,1
4,-0.029148,-0.105423,0.017124,0.110711,1


In [45]:
df2.describe()

Unnamed: 0,X,Y,Z,Mixed,ClassLabel
count,5035.0,5035.0,5035.0,5035.0,5035.0
mean,0.521365,4.6978,-294.7934,0.244225,3.002781
std,318.671126,233.800423,20264.8,0.192936,1.409145
min,-9720.0,-8850.0,-1437653.0,0.001157,1.0
25%,-0.069452,-0.100594,-0.06593132,0.094546,2.0
50%,-0.001627,0.007843,-0.003307343,0.196968,3.0
75%,0.071264,0.124863,0.05086803,0.343273,4.0
max,8960.0,7630.0,9000.0,0.995577,5.0


In [46]:
df2.isnull().sum()

X             0
Y             0
Z             0
Mixed         0
ClassLabel    0
dtype: int64

In [47]:
x = df2.iloc[:, :-1]  # Features
x.head()

Unnamed: 0,X,Y,Z,Mixed
0,0.125022,0.094986,0.001297,0.157018
1,0.15071,0.083282,-0.023514,0.173788
2,0.102941,0.111084,0.010075,0.151782
3,0.03845,0.049911,0.007511,0.063451
4,-0.029148,-0.105423,0.017124,0.110711


In [48]:
y = df2.iloc[:, -1]   # Target
y.head()

0    1
1    1
2    1
3    1
4    1
Name: ClassLabel, dtype: int64

In [49]:
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)

In [50]:
SD = StandardScaler()
x_train = SD.fit_transform(x_train)
x_test = SD.fit_transform(x_test)

In [51]:
# Define the parameter grid for hyperparameter tuning
param_grid = {
    'C': [100, 150, 200],
    'gamma': [1, 1.1, 0.9],
    'kernel': ['rbf']
}

# Initialize the GridSearchCV object
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2)
grid.fit(x_train, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] END .........................C=100, gamma=1, kernel=rbf; total time=   0.4s
[CV] END .........................C=100, gamma=1, kernel=rbf; total time=   0.4s
[CV] END .........................C=100, gamma=1, kernel=rbf; total time=   0.5s
[CV] END .........................C=100, gamma=1, kernel=rbf; total time=   0.4s
[CV] END .........................C=100, gamma=1, kernel=rbf; total time=   0.4s
[CV] END .......................C=100, gamma=1.1, kernel=rbf; total time=   0.5s
[CV] END .......................C=100, gamma=1.1, kernel=rbf; total time=   0.4s
[CV] END .......................C=100, gamma=1.1, kernel=rbf; total time=   0.5s
[CV] END .......................C=100, gamma=1.1, kernel=rbf; total time=   0.5s
[CV] END .......................C=100, gamma=1.1, kernel=rbf; total time=   0.4s
[CV] END .......................C=100, gamma=0.9, kernel=rbf; total time=   0.4s
[CV] END .......................C=100, gamma=0.9,

In [52]:
grid.best_estimator_

In [53]:
# Use the best estimator to make predictions
y_pred = grid.best_estimator_.predict(x_test)

In [54]:
classification_rep = classification_report(y_test, y_pred)
print(classification_rep)

              precision    recall  f1-score   support

           1       0.50      0.01      0.02       310
           2       0.72      0.81      0.77       319
           3       0.34      0.69      0.45       282
           4       0.31      0.58      0.41       303
           5       0.00      0.00      0.00       297

    accuracy                           0.42      1511
   macro avg       0.37      0.42      0.33      1511
weighted avg       0.38      0.42      0.33      1511



In [57]:
# Define the parameter grid for hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf']
}

# Initialize the GridSearchCV object
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2)
grid.fit(x_train, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.2s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.2s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.2s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.2s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.2s
[CV] END ......................C=0.1, gamma=0.01, kernel=rbf; total time=   0.2s
[CV] END ......................C=0.1, gamma=0.01

In [58]:
grid.best_estimator_

In [59]:
# Use the best estimator to make predictions
y_pred = grid.best_estimator_.predict(x_test)

In [60]:
classification_rep = classification_report(y_test, y_pred)
print(classification_rep)

              precision    recall  f1-score   support

           1       0.18      0.02      0.03       310
           2       0.72      0.84      0.77       319
           3       0.34      0.63      0.44       282
           4       0.31      0.58      0.40       303
           5       0.00      0.00      0.00       297

    accuracy                           0.41      1511
   macro avg       0.31      0.41      0.33      1511
weighted avg       0.31      0.41      0.33      1511

