In [1]:
import torch
print("Using torch", torch.__version__)

Using torch 2.0.1+cu117


In [2]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print("Device", device)

Device cuda


## Importing Libraries

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split    # For splitting the dataset
from sklearn import svm
from sklearn.datasets import make_moons
from sklearn.svm import SVC
from sklearn.inspection import DecisionBoundaryDisplay  # For plotting the non liner decision boundaries
from sklearn.model_selection import GridSearchCV        # For hyperperameter tuning

from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

## Data Pre-processing

In [4]:
training_data_95 = pd.read_csv('./csvs/hog_95/extracted_features_hog_95.csv',header = None)
print(training_data_95)

# X_train_df = training_data_95.drop(columns = [0])
# y_train_df = training_data_95[0]

# print(X_train_df)
# print(y_train_df)

                    0         1         2         3         4         5     \
0      Rachel_Leigh_Cook  0.101289  1.330157 -0.964354  0.278769 -0.543510   
1          Roseanne_Barr  0.391125  0.704784  1.217563 -0.780129 -1.414380   
2         Vladimir_Putin -1.246895 -0.680080  0.591671 -2.354134 -1.920589   
3           Carlos_Menem  0.597257 -0.661087  1.356273 -0.878202 -0.588220   
4          Lynne_Thigpen -1.589231 -0.049223 -1.306154 -0.461846  0.250779   
...                  ...       ...       ...       ...       ...       ...   
10581    Rohinton_Mistry  1.724274 -1.189989 -1.052075  1.067114 -0.041331   
10582      George_W_Bush  2.658667  0.404976  0.808148 -0.520925 -0.804796   
10583        Vicente_Fox  0.479659 -0.820659  0.506519  0.026553  1.815414   
10584          Todd_Wike -1.765033 -0.231035  0.285658 -1.902368 -0.086056   
10585         Kofi_Annan  1.083461  0.993570 -0.346959  1.194149 -1.330585   

           6         7         8         9     ...  3239   3240

In [5]:
testing_data_95 = pd.read_csv('./csvs/hog_95/extracted_features_test_hog_95.csv', header = None)
print(testing_data_95)

# X_test_df = testing_data_95.drop(columns = [0])
# y_test_df = testing_data_95[0]

# print(X_test_df)
# print(y_test_df)

                         0         1         2         3         4     \
0              Carolina_Kluft  1.943908 -0.685770  0.135847 -0.574862   
1            Roger_Etchegaray -1.104129  0.259980 -1.581375  0.471127   
2                  JK_Rowling  1.170456 -1.014306 -0.396331  0.125549   
3     Valery_Giscard_dEstaing  1.449810  0.777844  1.786893 -0.590618   
4         Juan_Carlos_Ferrero -0.364156 -1.960419 -0.170895  0.499791   
...                       ...       ...       ...       ...       ...   
2642            Michael_Chang  1.412572 -0.760504  0.483098 -0.281977   
2643              Osrat_Iosef  1.675409  2.553274 -0.504704 -0.422554   
2644              Yann_Martel  1.023609 -0.542472  0.853466  0.449747   
2645             Mitzi_Gaynor -0.901391  0.372586  2.181821  0.809191   
2646                Fujio_Cho  0.938650 -0.126036 -1.422143  0.285539   

          5         6         7         8         9     ...  3239   3240  \
0    -0.959423  0.472946  0.260016  0.167261 -0

### Taking only the images of persons whose number of images are greater than equal to 50

In [6]:
df = pd.concat([training_data_95,testing_data_95],axis = 0)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13233 entries, 0 to 2646
Columns: 3249 entries, 0 to 3248
dtypes: float64(3248), object(1)
memory usage: 328.1+ MB


In [7]:
df_50 = df[df[0].map(df[0].value_counts()) >= 50]  # Reference Stack overflow
df_50.reset_index(drop=True, inplace = True)
df_50

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3239,3240,3241,3242,3243,3244,3245,3246,3247,3248
0,George_W_Bush,-1.824086,-0.401801,-0.521794,-0.587088,-0.509684,0.280968,-1.599484,0.290741,0.713462,...,31.0,359.0,1280.0,349.0,26.0,262.0,456.0,332.0,313.0,16523.0
1,Colin_Powell,0.549831,-0.715630,1.769869,0.222841,0.108078,0.219034,-0.818753,-0.839518,-0.129442,...,44.0,504.0,1817.0,748.0,30.0,501.0,556.0,396.0,362.0,14746.0
2,George_W_Bush,-0.628315,-0.750331,0.547397,0.478640,2.337426,0.526050,0.712348,-1.843158,-0.076518,...,27.0,302.0,1903.0,347.0,13.0,263.0,381.0,313.0,252.0,29224.0
3,George_W_Bush,2.176470,-0.997268,0.699885,-1.213921,0.090995,1.768202,0.787791,-0.212840,0.012334,...,58.0,725.0,2027.0,597.0,39.0,361.0,507.0,334.0,312.0,6742.0
4,Tony_Blair,-2.303506,-0.646863,0.022650,1.529321,0.472518,0.093752,0.199501,-0.097458,-0.722055,...,25.0,374.0,2097.0,485.0,12.0,340.0,622.0,450.0,491.0,16778.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1555,George_W_Bush,-0.754818,-0.751547,-0.307345,0.836765,-0.443559,0.034152,1.786415,0.199164,-0.641788,...,41.0,496.0,1763.0,513.0,22.0,445.0,518.0,527.0,430.0,11888.0
1556,Tony_Blair,0.356975,-0.575622,-1.112081,-0.147580,2.257156,1.439668,-0.556521,0.081855,1.255535,...,45.0,646.0,3227.0,691.0,25.0,573.0,724.0,379.0,567.0,12012.0
1557,George_W_Bush,2.378569,0.299064,-0.343518,1.097499,0.782947,0.991848,-0.635729,-0.408563,-0.150484,...,40.0,353.0,1540.0,480.0,35.0,355.0,396.0,395.0,318.0,27232.0
1558,Tony_Blair,0.732607,-0.158723,-2.044487,0.405323,1.605780,-0.532496,-0.508825,0.240685,0.907796,...,35.0,522.0,1654.0,719.0,30.0,543.0,728.0,603.0,519.0,8109.0


In [8]:
print("Number of persons are ",df_50[0].unique().shape[0])

Number of persons are  12


In [10]:
X_50 = df_50.drop(columns = [0])
y_50 = df_50[0]
X_train_50, X_test_50, y_train_50, y_test_50 = train_test_split(X_50,y_50,train_size=0.8,random_state=42)
X_train_50.reset_index(drop=True, inplace = True)
X_test_50.reset_index(drop=True, inplace = True)
y_train_50.reset_index(drop=True, inplace = True)
y_test_50.reset_index(drop=True, inplace = True)
print(X_50)

          1         2         3         4         5         6         7     \
0    -1.824086 -0.401801 -0.521794 -0.587088 -0.509684  0.280968 -1.599484   
1     0.549831 -0.715630  1.769869  0.222841  0.108078  0.219034 -0.818753   
2    -0.628315 -0.750331  0.547397  0.478640  2.337426  0.526050  0.712348   
3     2.176470 -0.997268  0.699885 -1.213921  0.090995  1.768202  0.787791   
4    -2.303506 -0.646863  0.022650  1.529321  0.472518  0.093752  0.199501   
...        ...       ...       ...       ...       ...       ...       ...   
1555 -0.754818 -0.751547 -0.307345  0.836765 -0.443559  0.034152  1.786415   
1556  0.356975 -0.575622 -1.112081 -0.147580  2.257156  1.439668 -0.556521   
1557  2.378569  0.299064 -0.343518  1.097499  0.782947  0.991848 -0.635729   
1558  0.732607 -0.158723 -2.044487  0.405323  1.605780 -0.532496 -0.508825   
1559 -0.431153 -1.341352 -0.610607 -0.411693  0.667436 -0.542481 -1.090542   

          8         9         10    ...  3239   3240    3241   

# All features concatenated

## LinearSVC

In [23]:
# Training Linear Support Vector Classifier (LinearSVC)
lin_clf = svm.LinearSVC(dual=False)
lin_clf.fit(X_train_50, y_train_50)

In [25]:
pred_y_test_50 = lin_clf.predict(X_test_50)

In [26]:
# Accuracy of linearSVC model
linSVC_accuracy = lin_clf.score(X_test_50, y_test_50)
print("Accuracy of linearSVC is ",linSVC_accuracy)

Accuracy of linearSVC is  0.11295806573479411


In [27]:
# Classification Report of linearSVC model
print(classification_report(y_test_50,pred_y_test_50))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.39      0.64      0.49        14
     Colin_Powell       0.56      0.57      0.57        47
  Donald_Rumsfeld       0.53      0.29      0.38        31
    George_W_Bush       0.67      0.76      0.71        92
Gerhard_Schroeder       0.17      0.19      0.18        21
      Hugo_Chavez       0.14      0.10      0.11        21
   Jacques_Chirac       0.14      0.15      0.15        13
    Jean_Chretien       0.20      0.18      0.19        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       0.15      0.12      0.13        17
  Serena_Williams       0.43      0.38      0.40         8
       Tony_Blair       0.23      0.24      0.24        25

         accuracy                           0.44       312
        macro avg       0.30      0.30      0.30       312
     weighted avg       0.43      0.44      0.42       312



## SVM Models with Linear, polynomial and rbf kernals

In [10]:
svm_linear = SVC(kernel='linear') # linear kernal
svm_poly = SVC(kernel='poly', degree= 3)  # Polynomial kernel of degree 3
svm_rbf = SVC(kernel='rbf')  # RBF kernel

In [11]:
svm_linear.fit(X_train_50, y_train_50)
svm_poly.fit(X_train_50, y_train_50)
svm_rbf.fit(X_train_50, y_train_50)

### Linear Kernel

In [41]:
svm_linear.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'linear',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [27]:
lin_pred_y_50 = svm_linear.predict(X_test_50)

In [28]:
lin_accuracy = accuracy_score(y_test_50,lin_pred_y_50)
print("Accuracy of SVM with Linear kernel is ",lin_accuracy)

Accuracy of SVM with RBF kernel is  0.3557692307692308


In [29]:
print(classification_report(y_test_50,lin_pred_y_50))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.53      0.71      0.61        14
     Colin_Powell       0.41      0.47      0.44        47
  Donald_Rumsfeld       0.52      0.35      0.42        31
    George_W_Bush       0.43      0.57      0.49        92
Gerhard_Schroeder       0.26      0.29      0.27        21
      Hugo_Chavez       0.11      0.05      0.07        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.17      0.09      0.12        11
    John_Ashcroft       0.14      0.08      0.11        12
Junichiro_Koizumi       0.17      0.06      0.09        17
  Serena_Williams       0.38      0.38      0.38         8
       Tony_Blair       0.11      0.12      0.11        25

         accuracy                           0.36       312
        macro avg       0.27      0.26      0.26       312
     weighted avg       0.33      0.36      0.33       312



#### Tunning Hyperparameter

In [32]:
c_values = [0.1, 1, 10, 100]

accuracy_values = []
predicted_values = []

for i in c_values:
    svm_lin_tunned = SVC(kernel='linear', C = i )
    svm_lin_tunned.fit(X_train_50, y_train_50)
    lin_t_pred_y_50 = svm_lin_tunned.predict(X_test_50)
    lin_t_accuracy = accuracy_score(y_test_50,lin_t_pred_y_50)
    
    accuracy_values.append(lin_t_accuracy)
    predicted_values.append(lin_t_pred_y_50)
    
print(accuracy_values)

[0.3557692307692308, 0.3557692307692308, 0.3557692307692308, 0.3557692307692308]


Therefor, value of c (between 0.1 to 100) is not affecting the accuracy.
So, no requirement of hyperparameter tuning.

### Polynomial Kernel

In [42]:
svm_poly.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'poly',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [15]:
poly_pred_y_50 = svm_poly.predict(X_test_50)

In [19]:
poly_accuracy = accuracy_score(y_test_50,poly_pred_y_50)
print("Accuracy of SVM with polynomial kernel (of degree 3) is ",poly_accuracy)

0.3076923076923077
Accuracy of SVM with polynomial kernel (of degree 3) is  0.3076923076923077


In [17]:
print(classification_report(y_test_50,poly_pred_y_50))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        14
     Colin_Powell       0.43      0.13      0.20        47
  Donald_Rumsfeld       0.00      0.00      0.00        31
    George_W_Bush       0.30      0.98      0.46        92
Gerhard_Schroeder       0.00      0.00      0.00        21
      Hugo_Chavez       0.00      0.00      0.00        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       0.00      0.00      0.00        17
  Serena_Williams       0.00      0.00      0.00         8
       Tony_Blair       0.00      0.00      0.00        25

         accuracy                           0.31       312
        macro avg       0.06      0.09      0.05       312
     weighted avg       0.15      0.31      0.17       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


#### Tuning Hyperparameters

In [43]:
# Defining parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search = GridSearchCV(estimator=svm_poly, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train_50,y_train_50)

# Best parameters
poly_best_params_initial = grid_search.best_params_
poly_best_params_initial

{'C': 0.1, 'gamma': 0.001}

In [44]:
svm_poly_tunned = SVC(kernel='poly', gamma=poly_best_params_initial['gamma'], C =poly_best_params_initial['C'] )
svm_poly_tunned.fit(X_train_50, y_train_50)

In [45]:
poly_t_pred_y_50 = svm_poly_tunned.predict(X_test_50)

In [46]:
poly_t_accuracy = accuracy_score(y_test_50,poly_t_pred_y_50)
print("Accuracy of SVM with Polynomial kernel (degree = 3) with tunned hyperparameters is ",poly_t_accuracy)

Accuracy of SVM with Polynomial kernel (degree = 3) with tunned hyperparameters is  0.3141025641025641


In [47]:
print(classification_report(y_test_50,poly_t_pred_y_50))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.24      0.50      0.33        14
     Colin_Powell       0.32      0.36      0.34        47
  Donald_Rumsfeld       0.33      0.23      0.27        31
    George_W_Bush       0.46      0.57      0.50        92
Gerhard_Schroeder       0.08      0.05      0.06        21
      Hugo_Chavez       0.10      0.05      0.06        21
   Jacques_Chirac       0.27      0.23      0.25        13
    Jean_Chretien       0.11      0.09      0.10        11
    John_Ashcroft       0.08      0.08      0.08        12
Junichiro_Koizumi       0.27      0.18      0.21        17
  Serena_Williams       0.12      0.12      0.12         8
       Tony_Blair       0.19      0.16      0.17        25

         accuracy                           0.31       312
        macro avg       0.22      0.22      0.21       312
     weighted avg       0.29      0.31      0.30       312



### RBF Kernel

In [22]:
rbf_pred_y_50 = svm_rbf.predict(X_test_50)

In [23]:
rbf_accuracy = accuracy_score(y_test_50,rbf_pred_y_50)
print("Accuracy of SVM with RBF kernel is ",rbf_accuracy)

Accuracy of SVM with RBF kernel is  0.2980769230769231


In [24]:
print(classification_report(y_test_50,rbf_pred_y_50))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        14
     Colin_Powell       0.28      0.19      0.23        47
  Donald_Rumsfeld       0.00      0.00      0.00        31
    George_W_Bush       0.30      0.91      0.45        92
Gerhard_Schroeder       0.00      0.00      0.00        21
      Hugo_Chavez       0.00      0.00      0.00        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       0.00      0.00      0.00        17
  Serena_Williams       0.00      0.00      0.00         8
       Tony_Blair       0.00      0.00      0.00        25

         accuracy                           0.30       312
        macro avg       0.05      0.09      0.06       312
     weighted avg       0.13      0.30      0.17       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


#### Tuning Hyperparameteres

In [48]:
# Define parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train_50, y_train_50)

# Best parameters
rbf_best_params_initial = grid_search.best_params_
rbf_best_params_initial

{'C': 0.1, 'gamma': 0.001}

In [50]:
svm_rbf_tunned = SVC(kernel='rbf', gamma=rbf_best_params_initial['gamma'], C =rbf_best_params_initial['C'] )
svm_rbf_tunned.fit(X_train_50, y_train_50)

In [51]:
rbf_t_pred_y_50 = svm_rbf_tunned.predict(X_test_50)

In [52]:
rbf_t_accuracy = accuracy_score(y_test_50,rbf_t_pred_y_50)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy)

Accuracy of SVM with rbf kernel with tunned hyperparameters is  0.2948717948717949


In [53]:
print(classification_report(y_test_50,rbf_t_pred_y_50))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        14
     Colin_Powell       0.00      0.00      0.00        47
  Donald_Rumsfeld       0.00      0.00      0.00        31
    George_W_Bush       0.29      1.00      0.46        92
Gerhard_Schroeder       0.00      0.00      0.00        21
      Hugo_Chavez       0.00      0.00      0.00        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       0.00      0.00      0.00        17
  Serena_Williams       0.00      0.00      0.00         8
       Tony_Blair       0.00      0.00      0.00        25

         accuracy                           0.29       312
        macro avg       0.02      0.08      0.04       312
     weighted avg       0.09      0.29      0.13       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [54]:
# Defining the starting and ending points for C and gamma, and the number of points
start_C = 0.1
end_C = 10
num_C = 10

start_gamma = 0.001
end_gamma = 0.1
num_gamma = 10

# Parameter grid with specified ranges and number of points
param_grid = {'C': np.linspace(start_C, end_C, num_C),
              'gamma': np.linspace(start_gamma, end_gamma, num_gamma)}


# Performing grid search
grid_search = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5 , scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train_50, y_train_50)

# Best parameters
rbf_best_params_final = grid_search.best_params_
rbf_best_accuracy_final = grid_search.best_score_
rbf_best_params_final

{'C': 0.1, 'gamma': 0.001}

In [55]:
svm_rbf_tunned_f = SVC(kernel='rbf', gamma=rbf_best_params_final['gamma'], C =rbf_best_params_final['C'] )
svm_rbf_tunned_f.fit(X_train_50, y_train_50)

In [56]:
rbf_t_pred_y_50_f = svm_rbf_tunned_f.predict(X_test_50)

In [57]:
rbf_t_accuracy_f = accuracy_score(y_test_50,rbf_t_pred_y_50_f)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_f)

Accuracy of SVM with rbf kernel with tunned hyperparameters is  0.2948717948717949


In [58]:
print(classification_report(y_test_50,rbf_t_pred_y_50_f))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        14
     Colin_Powell       0.00      0.00      0.00        47
  Donald_Rumsfeld       0.00      0.00      0.00        31
    George_W_Bush       0.29      1.00      0.46        92
Gerhard_Schroeder       0.00      0.00      0.00        21
      Hugo_Chavez       0.00      0.00      0.00        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       0.00      0.00      0.00        17
  Serena_Williams       0.00      0.00      0.00         8
       Tony_Blair       0.00      0.00      0.00        25

         accuracy                           0.29       312
        macro avg       0.02      0.08      0.04       312
     weighted avg       0.09      0.29      0.13       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# CNN HoG LBP Features seperately

In [23]:
X_hog = X_50.loc[:,:944]
X_cnn = X_50.loc[:,945:2992]
X_lbp = X_50.loc[:,2993:]

In [24]:
# X_hog
# X_cnn
# X_lbp

## CNN Features

In [25]:
X_train_50_cnn, X_test_50_cnn, y_train_50_cnn, y_test_50_cnn = train_test_split(X_cnn,y_50,train_size=0.8,random_state=42)
X_train_50_cnn.reset_index(drop=True, inplace = True)
X_test_50_cnn.reset_index(drop=True, inplace = True)
y_train_50_cnn.reset_index(drop=True, inplace = True)
y_test_50_cnn.reset_index(drop=True, inplace = True)

### LinearSVC

In [26]:
# Training Linear Support Vector Classifier (LinearSVC)
lin_clf = svm.LinearSVC(dual=False)
lin_clf.fit(X_train_50_cnn, y_train_50_cnn)

In [27]:
pred_y_test_50_cnn = lin_clf.predict(X_test_50_cnn)

In [28]:
# Accuracy of linearSVC model
linSVC_accuracy = lin_clf.score(X_test_50_cnn, y_test_50_cnn)
print("Accuracy of linearSVC is ",linSVC_accuracy)

Accuracy of linearSVC is  0.7596153846153846


In [29]:
# Classification Report of linearSVC model
print(classification_report(y_test_50_cnn,pred_y_test_50_cnn))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.80      0.86      0.83        14
     Colin_Powell       0.81      0.89      0.85        47
  Donald_Rumsfeld       0.81      0.68      0.74        31
    George_W_Bush       0.80      0.85      0.82        92
Gerhard_Schroeder       0.48      0.48      0.48        21
      Hugo_Chavez       0.79      0.71      0.75        21
   Jacques_Chirac       0.88      0.54      0.67        13
    Jean_Chretien       0.50      0.45      0.48        11
    John_Ashcroft       0.64      0.58      0.61        12
Junichiro_Koizumi       0.88      0.82      0.85        17
  Serena_Williams       0.78      0.88      0.82         8
       Tony_Blair       0.70      0.76      0.73        25

         accuracy                           0.76       312
        macro avg       0.74      0.71      0.72       312
     weighted avg       0.76      0.76      0.76       312



### SVM Models with Linear, polynomial and rbf kernals

In [30]:
svm_linear = SVC(kernel='linear') # linear kernal
svm_poly = SVC(kernel='poly', degree= 3)  # Polynomial kernel of degree 3
svm_rbf = SVC(kernel='rbf')  # RBF kernel

In [31]:
svm_linear.fit(X_train_50_cnn, y_train_50_cnn)
svm_poly.fit(X_train_50_cnn, y_train_50_cnn)
svm_rbf.fit(X_train_50_cnn, y_train_50_cnn)

#### Linear Kernel

In [32]:
svm_linear.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'linear',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [33]:
lin_pred_y_50_cnn = svm_linear.predict(X_test_50_cnn)

In [34]:
lin_accuracy_cnn = accuracy_score(y_test_50_cnn,lin_pred_y_50_cnn)
print("Accuracy of SVM with Linear kernel is ",lin_accuracy_cnn)

Accuracy of SVM with Linear kernel is  0.7660256410256411


In [35]:
print(classification_report(y_test_50_cnn,lin_pred_y_50_cnn))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.60      0.86      0.71        14
     Colin_Powell       0.73      0.79      0.76        47
  Donald_Rumsfeld       0.89      0.77      0.83        31
    George_W_Bush       0.78      0.92      0.85        92
Gerhard_Schroeder       0.62      0.48      0.54        21
      Hugo_Chavez       0.94      0.76      0.84        21
   Jacques_Chirac       0.70      0.54      0.61        13
    Jean_Chretien       0.80      0.36      0.50        11
    John_Ashcroft       0.45      0.42      0.43        12
Junichiro_Koizumi       0.93      0.82      0.88        17
  Serena_Williams       0.89      1.00      0.94         8
       Tony_Blair       0.77      0.68      0.72        25

         accuracy                           0.77       312
        macro avg       0.76      0.70      0.72       312
     weighted avg       0.77      0.77      0.76       312



##### Tunning Hyperparameter

In [37]:
c_values = [0.1, 1, 10, 100]

accuracy_values_cnn = []
predicted_values_cnn = []

for i in c_values:
    svm_lin_tunned_cnn = SVC(kernel='linear', C = i )
    svm_lin_tunned_cnn.fit(X_train_50_cnn, y_train_50_cnn)
    lin_t_pred_y_50_cnn = svm_lin_tunned_cnn.predict(X_test_50_cnn)
    lin_t_accuracy_cnn = accuracy_score(y_test_50_cnn,lin_t_pred_y_50_cnn)
    
    accuracy_values_cnn.append(lin_t_accuracy_cnn)
    predicted_values_cnn.append(lin_t_pred_y_50_cnn)
    
print(accuracy_values_cnn)

[0.7660256410256411, 0.7660256410256411, 0.7660256410256411, 0.7660256410256411]


Therefor, value of c (between 0.1 to 100) is not affecting the accuracy.
So, no requirement of hyperparameter tuning.

#### Polynomial Kernel

In [38]:
svm_poly.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'poly',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [39]:
poly_pred_y_50_cnn = svm_poly.predict(X_test_50_cnn)

In [40]:
poly_accuracy_cnn = accuracy_score(y_test_50_cnn,poly_pred_y_50_cnn)
print("Accuracy of SVM with polynomial kernel (of degree 3) is ",poly_accuracy_cnn)

Accuracy of SVM with polynomial kernel (of degree 3) is  0.6185897435897436


In [41]:
print(classification_report(y_test_50_cnn,poly_pred_y_50_cnn))

                   precision    recall  f1-score   support

     Ariel_Sharon       1.00      0.64      0.78        14
     Colin_Powell       0.63      0.77      0.69        47
  Donald_Rumsfeld       0.84      0.52      0.64        31
    George_W_Bush       0.50      0.99      0.67        92
Gerhard_Schroeder       0.60      0.14      0.23        21
      Hugo_Chavez       1.00      0.43      0.60        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.75      0.25      0.38        12
Junichiro_Koizumi       0.92      0.71      0.80        17
  Serena_Williams       0.88      0.88      0.88         8
       Tony_Blair       1.00      0.28      0.44        25

         accuracy                           0.62       312
        macro avg       0.68      0.47      0.51       312
     weighted avg       0.66      0.62      0.57       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


##### Tuning Hyperparameters

In [42]:
# Defining parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_cnn = GridSearchCV(estimator=svm_poly, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_cnn.fit(X_train_50_cnn,y_train_50_cnn)

# Best parameters
poly_best_params_initial_cnn = grid_search_cnn.best_params_
poly_best_params_initial_cnn

{'C': 0.1, 'gamma': 0.1}

In [44]:
svm_poly_tunned_cnn = SVC(kernel='poly', gamma=poly_best_params_initial_cnn['gamma'], C =poly_best_params_initial_cnn['C'] )
svm_poly_tunned_cnn.fit(X_train_50_cnn, y_train_50_cnn)

In [45]:
poly_t_pred_y_50_cnn = svm_poly_tunned_cnn.predict(X_test_50_cnn)

In [46]:
poly_t_accuracy_cnn = accuracy_score(y_test_50_cnn,poly_t_pred_y_50_cnn)
print("Accuracy of SVM with Polynomial kernel (degree = 3) with tunned hyperparameters is ",poly_t_accuracy_cnn)

Accuracy of SVM with Polynomial kernel (degree = 3) with tunned hyperparameters is  0.7403846153846154


In [47]:
print(classification_report(y_test_50_cnn,poly_t_pred_y_50_cnn))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.71      0.86      0.77        14
     Colin_Powell       0.71      0.74      0.73        47
  Donald_Rumsfeld       0.88      0.68      0.76        31
    George_W_Bush       0.71      0.95      0.81        92
Gerhard_Schroeder       0.62      0.48      0.54        21
      Hugo_Chavez       0.94      0.71      0.81        21
   Jacques_Chirac       0.55      0.46      0.50        13
    Jean_Chretien       0.75      0.27      0.40        11
    John_Ashcroft       0.45      0.42      0.43        12
Junichiro_Koizumi       0.93      0.82      0.88        17
  Serena_Williams       0.88      0.88      0.88         8
       Tony_Blair       0.84      0.64      0.73        25

         accuracy                           0.74       312
        macro avg       0.75      0.66      0.69       312
     weighted avg       0.75      0.74      0.73       312



#### RBF Kernel

In [48]:
rbf_pred_y_50_cnn = svm_rbf.predict(X_test_50_cnn)

In [49]:
rbf_accuracy_cnn = accuracy_score(y_test_50_cnn,rbf_pred_y_50_cnn)
print("Accuracy of SVM with RBF kernel is ",rbf_accuracy_cnn)

Accuracy of SVM with RBF kernel is  0.5576923076923077


In [50]:
print(classification_report(y_test_50_cnn,rbf_pred_y_50_cnn))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.90      0.64      0.75        14
     Colin_Powell       0.58      0.74      0.65        47
  Donald_Rumsfeld       0.67      0.26      0.37        31
    George_W_Bush       0.46      0.98      0.63        92
Gerhard_Schroeder       0.60      0.14      0.23        21
      Hugo_Chavez       0.86      0.29      0.43        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       1.00      0.65      0.79        17
  Serena_Williams       1.00      1.00      1.00         8
       Tony_Blair       1.00      0.16      0.28        25

         accuracy                           0.56       312
        macro avg       0.59      0.40      0.43       312
     weighted avg       0.59      0.56      0.49       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


##### Tuning Hyperparameteres

In [51]:
# Define parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_cnn = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_cnn.fit(X_train_50_cnn, y_train_50_cnn)

# Best parameters
rbf_best_params_initial_cnn = grid_search_cnn.best_params_
rbf_best_params_initial_cnn

{'C': 100, 'gamma': 0.001}

In [52]:
svm_rbf_tunned_cnn = SVC(kernel='rbf', gamma=rbf_best_params_initial_cnn['gamma'], C =rbf_best_params_initial_cnn['C'] )
svm_rbf_tunned_cnn.fit(X_train_50_cnn, y_train_50_cnn)

In [53]:
rbf_t_pred_y_50_cnn = svm_rbf_tunned_cnn.predict(X_test_50_cnn)

In [54]:
rbf_t_accuracy_cnn = accuracy_score(y_test_50_cnn,rbf_t_pred_y_50_cnn)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_cnn)

Accuracy of SVM with rbf kernel with tunned hyperparameters is  0.7532051282051282


In [55]:
print(classification_report(y_test_50_cnn,rbf_t_pred_y_50_cnn))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.67      0.86      0.75        14
     Colin_Powell       0.69      0.77      0.73        47
  Donald_Rumsfeld       0.89      0.77      0.83        31
    George_W_Bush       0.79      0.90      0.84        92
Gerhard_Schroeder       0.53      0.48      0.50        21
      Hugo_Chavez       0.94      0.76      0.84        21
   Jacques_Chirac       0.64      0.54      0.58        13
    Jean_Chretien       1.00      0.27      0.43        11
    John_Ashcroft       0.42      0.42      0.42        12
Junichiro_Koizumi       0.93      0.82      0.88        17
  Serena_Williams       0.89      1.00      0.94         8
       Tony_Blair       0.71      0.68      0.69        25

         accuracy                           0.75       312
        macro avg       0.76      0.69      0.70       312
     weighted avg       0.76      0.75      0.75       312



In [56]:
# Defining the starting and ending points for C and gamma, and the number of points
start_C = 0.1
end_C = 10
num_C = 10

start_gamma = 0.001
end_gamma = 0.1
num_gamma = 10

# Parameter grid with specified ranges and number of points
param_grid = {'C': np.linspace(start_C, end_C, num_C),
              'gamma': np.linspace(start_gamma, end_gamma, num_gamma)}


# Performing grid search
grid_search_cnn = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5 , scoring='accuracy', n_jobs=-1)
grid_search_cnn.fit(X_train_50_cnn, y_train_50_cnn)

# Best parameters
rbf_best_params_final_cnn = grid_search_cnn.best_params_
rbf_best_accuracy_final_cnn = grid_search_cnn.best_score_
rbf_best_params_final_cnn

{'C': 8.9, 'gamma': 0.012}

In [57]:
svm_rbf_tunned_f_cnn = SVC(kernel='rbf', gamma=rbf_best_params_final_cnn['gamma'], C =rbf_best_params_final_cnn['C'] )
svm_rbf_tunned_f_cnn.fit(X_train_50_cnn, y_train_50_cnn)

In [58]:
rbf_t_pred_y_50_f_cnn = svm_rbf_tunned_f_cnn.predict(X_test_50_cnn)

In [59]:
rbf_t_accuracy_f_cnn = accuracy_score(y_test_50_cnn,rbf_t_pred_y_50_f_cnn)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_f_cnn)

Accuracy of SVM with rbf kernel with tunned hyperparameters is  0.7275641025641025


In [60]:
print(classification_report(y_test_50_cnn,rbf_t_pred_y_50_f_cnn))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.86      0.86      0.86        14
     Colin_Powell       0.66      0.74      0.70        47
  Donald_Rumsfeld       0.87      0.65      0.74        31
    George_W_Bush       0.66      0.98      0.79        92
Gerhard_Schroeder       0.67      0.38      0.48        21
      Hugo_Chavez       0.87      0.62      0.72        21
   Jacques_Chirac       0.71      0.38      0.50        13
    Jean_Chretien       0.67      0.18      0.29        11
    John_Ashcroft       0.67      0.50      0.57        12
Junichiro_Koizumi       1.00      0.76      0.87        17
  Serena_Williams       1.00      1.00      1.00         8
       Tony_Blair       0.83      0.60      0.70        25

         accuracy                           0.73       312
        macro avg       0.79      0.64      0.68       312
     weighted avg       0.75      0.73      0.71       312



## HoG Features

In [61]:
X_train_50_hog, X_test_50_hog, y_train_50_hog, y_test_50_hog = train_test_split(X_hog,y_50,train_size=0.8,random_state=42)
X_train_50_hog.reset_index(drop=True, inplace = True)
X_test_50_hog.reset_index(drop=True, inplace = True)
y_train_50_hog.reset_index(drop=True, inplace = True)
y_test_50_hog.reset_index(drop=True, inplace = True)

### LinearSVC

In [62]:
# Training Linear Support Vector Classifier (LinearSVC)
lin_clf = svm.LinearSVC(dual=False)
lin_clf.fit(X_train_50_hog, y_train_50_hog)

In [63]:
pred_y_test_50_hog = lin_clf.predict(X_test_50_hog)

In [64]:
# Accuracy of linearSVC model
linSVC_accuracy_hog = lin_clf.score(X_test_50_hog, y_test_50_hog)
print("Accuracy of linearSVC is ",linSVC_accuracy_hog)

Accuracy of linearSVC is  0.8397435897435898


In [65]:
# Classification Report of linearSVC model
print(classification_report(y_test_50_hog,pred_y_test_50_hog))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.71      0.86      0.77        14
     Colin_Powell       0.92      0.96      0.94        47
  Donald_Rumsfeld       0.88      0.74      0.81        31
    George_W_Bush       0.81      0.96      0.88        92
Gerhard_Schroeder       0.78      0.67      0.72        21
      Hugo_Chavez       0.82      0.67      0.74        21
   Jacques_Chirac       0.82      0.69      0.75        13
    Jean_Chretien       0.73      0.73      0.73        11
    John_Ashcroft       0.83      0.83      0.83        12
Junichiro_Koizumi       0.92      0.71      0.80        17
  Serena_Williams       1.00      0.75      0.86         8
       Tony_Blair       0.91      0.84      0.88        25

         accuracy                           0.84       312
        macro avg       0.84      0.78      0.81       312
     weighted avg       0.84      0.84      0.84       312



### SVM Models with Linear, polynomial and rbf kernals

In [66]:
svm_linear = SVC(kernel='linear') # linear kernal
svm_poly = SVC(kernel='poly', degree= 3)  # Polynomial kernel of degree 3
svm_rbf = SVC(kernel='rbf')  # RBF kernel

In [67]:
svm_linear.fit(X_train_50_hog, y_train_50_hog)
svm_poly.fit(X_train_50_hog, y_train_50_hog)
svm_rbf.fit(X_train_50_hog, y_train_50_hog)

#### Linear Kernel

In [68]:
svm_linear.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'linear',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [69]:
lin_pred_y_50_hog = svm_linear.predict(X_test_50_hog)

In [70]:
lin_accuracy_hog = accuracy_score(y_test_50_hog,lin_pred_y_50_hog)
print("Accuracy of SVM with Linear kernel is ",lin_accuracy_hog)

Accuracy of SVM with Linear kernel is  0.8012820512820513


In [71]:
print(classification_report(y_test_50_hog,lin_pred_y_50_hog))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.75      0.86      0.80        14
     Colin_Powell       0.90      0.91      0.91        47
  Donald_Rumsfeld       0.79      0.74      0.77        31
    George_W_Bush       0.71      0.99      0.83        92
Gerhard_Schroeder       0.87      0.62      0.72        21
      Hugo_Chavez       0.87      0.62      0.72        21
   Jacques_Chirac       0.90      0.69      0.78        13
    Jean_Chretien       1.00      0.64      0.78        11
    John_Ashcroft       0.80      0.33      0.47        12
Junichiro_Koizumi       0.92      0.71      0.80        17
  Serena_Williams       1.00      0.62      0.77         8
       Tony_Blair       0.86      0.72      0.78        25

         accuracy                           0.80       312
        macro avg       0.86      0.70      0.76       312
     weighted avg       0.82      0.80      0.79       312



##### Tunning Hyperparameter

In [72]:
c_values = [0.1, 1, 10, 100]

accuracy_values_hog = []
predicted_values_hog = []

for i in c_values:
    svm_lin_tunned_hog = SVC(kernel='linear', C = i )
    svm_lin_tunned_hog.fit(X_train_50_hog, y_train_50_hog)
    lin_t_pred_y_50_hog = svm_lin_tunned_hog.predict(X_test_50_hog)
    lin_t_accuracy_hog = accuracy_score(y_test_50_hog,lin_t_pred_y_50_hog)
    
    accuracy_values_hog.append(lin_t_accuracy_hog)
    predicted_values_hog.append(lin_t_pred_y_50_hog)
    
print(accuracy_values_hog)

[0.8012820512820513, 0.8012820512820513, 0.8012820512820513, 0.8012820512820513]


Therefor, value of c (between 0.1 to 100) is not affecting the accuracy.
So, no requirement of hyperparameter tuning.

#### Polynomial Kernel

In [73]:
svm_poly.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'poly',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [74]:
poly_pred_y_50_hog = svm_poly.predict(X_test_50_hog)

In [75]:
poly_accuracy_hog = accuracy_score(y_test_50_hog,poly_pred_y_50_hog)
print("Accuracy of SVM with polynomial kernel (of degree 3) is ",poly_accuracy_hog)

Accuracy of SVM with polynomial kernel (of degree 3) is  0.34294871794871795


In [76]:
print(classification_report(y_test_50_hog,poly_pred_y_50_hog))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        14
     Colin_Powell       0.74      0.30      0.42        47
  Donald_Rumsfeld       0.00      0.00      0.00        31
    George_W_Bush       0.32      1.00      0.48        92
Gerhard_Schroeder       0.00      0.00      0.00        21
      Hugo_Chavez       0.00      0.00      0.00        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       1.00      0.06      0.11        17
  Serena_Williams       0.00      0.00      0.00         8
       Tony_Blair       0.00      0.00      0.00        25

         accuracy                           0.34       312
        macro avg       0.17      0.11      0.08       312
     weighted avg       0.26      0.34      0.21       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


##### Tuning Hyperparameters

In [77]:
# Defining parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_hog = GridSearchCV(estimator=svm_poly, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_hog.fit(X_train_50_hog,y_train_50_hog)

# Best parameters
poly_best_params_initial_hog = grid_search_hog.best_params_
poly_best_params_initial_hog

{'C': 0.1, 'gamma': 0.1}

In [78]:
svm_poly_tunned_hog = SVC(kernel='poly', gamma=poly_best_params_initial_hog['gamma'], C =poly_best_params_initial_hog['C'] )
svm_poly_tunned_hog.fit(X_train_50_hog, y_train_50_hog)

In [79]:
poly_t_pred_y_50_hog = svm_poly_tunned_hog.predict(X_test_50_hog)

In [80]:
poly_t_accuracy_hog = accuracy_score(y_test_50_hog,poly_t_pred_y_50_hog)
print("Accuracy of SVM with Polynomial kernel (degree = 3) with tunned hyperparameters is ",poly_t_accuracy_hog)

Accuracy of SVM with Polynomial kernel (degree = 3) with tunned hyperparameters is  0.3814102564102564


In [81]:
print(classification_report(y_test_50_hog,poly_t_pred_y_50_hog))

                   precision    recall  f1-score   support

     Ariel_Sharon       1.00      0.07      0.13        14
     Colin_Powell       0.83      0.40      0.54        47
  Donald_Rumsfeld       1.00      0.10      0.18        31
    George_W_Bush       0.33      1.00      0.49        92
Gerhard_Schroeder       0.00      0.00      0.00        21
      Hugo_Chavez       0.00      0.00      0.00        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       1.00      0.24      0.38        17
  Serena_Williams       0.00      0.00      0.00         8
       Tony_Blair       0.00      0.00      0.00        25

         accuracy                           0.38       312
        macro avg       0.35      0.15      0.14       312
     weighted avg       0.42      0.38      0.27       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


#### RBF Kernel

In [82]:
rbf_pred_y_50_hog = svm_rbf.predict(X_test_50_hog)

In [83]:
rbf_accuracy_hog = accuracy_score(y_test_50_hog,rbf_pred_y_50_hog)
print("Accuracy of SVM with RBF kernel is ",rbf_accuracy_hog)

Accuracy of SVM with RBF kernel is  0.5737179487179487


In [84]:
print(classification_report(y_test_50_hog,rbf_pred_y_50_hog))

                   precision    recall  f1-score   support

     Ariel_Sharon       1.00      0.43      0.60        14
     Colin_Powell       0.80      0.94      0.86        47
  Donald_Rumsfeld       1.00      0.13      0.23        31
    George_W_Bush       0.43      1.00      0.60        92
Gerhard_Schroeder       0.88      0.33      0.48        21
      Hugo_Chavez       0.00      0.00      0.00        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       1.00      0.47      0.64        17
  Serena_Williams       1.00      0.38      0.55         8
       Tony_Blair       1.00      0.60      0.75        25

         accuracy                           0.57       312
        macro avg       0.59      0.36      0.39       312
     weighted avg       0.61      0.57      0.50       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


##### Tuning Hyperparameteres

In [85]:
# Define parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_hog = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_hog.fit(X_train_50_hog, y_train_50_hog)

# Best parameters
rbf_best_params_initial_hog = grid_search_cnn.best_params_
rbf_best_params_initial_hog

{'C': 8.9, 'gamma': 0.012}

In [86]:
svm_rbf_tunned_hog = SVC(kernel='rbf', gamma=rbf_best_params_initial_hog['gamma'], C =rbf_best_params_initial_hog['C'] )
svm_rbf_tunned_hog.fit(X_train_50_hog, y_train_50_hog)

In [87]:
rbf_t_pred_y_50_hog = svm_rbf_tunned_hog.predict(X_test_50_hog)

In [88]:
rbf_t_accuracy_hog = accuracy_score(y_test_50_hog,rbf_t_pred_y_50_hog)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_hog)

Accuracy of SVM with rbf kernel with tunned hyperparameters is  0.7788461538461539


In [90]:
print(classification_report(y_test_50_hog,rbf_t_pred_y_50_hog))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.85      0.79      0.81        14
     Colin_Powell       0.90      0.96      0.93        47
  Donald_Rumsfeld       0.90      0.61      0.73        31
    George_W_Bush       0.62      1.00      0.76        92
Gerhard_Schroeder       1.00      0.62      0.76        21
      Hugo_Chavez       0.91      0.48      0.62        21
   Jacques_Chirac       0.89      0.62      0.73        13
    Jean_Chretien       1.00      0.64      0.78        11
    John_Ashcroft       0.83      0.42      0.56        12
Junichiro_Koizumi       1.00      0.65      0.79        17
  Serena_Williams       1.00      0.50      0.67         8
       Tony_Blair       1.00      0.72      0.84        25

         accuracy                           0.78       312
        macro avg       0.91      0.67      0.75       312
     weighted avg       0.84      0.78      0.77       312



In [91]:
# Defining the starting and ending points for C and gamma, and the number of points
start_C = 0.1
end_C = 10
num_C = 10

start_gamma = 0.001
end_gamma = 0.1
num_gamma = 10

# Parameter grid with specified ranges and number of points
param_grid = {'C': np.linspace(start_C, end_C, num_C),
              'gamma': np.linspace(start_gamma, end_gamma, num_gamma)}


# Performing grid search
grid_search_hog = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5 , scoring='accuracy', n_jobs=-1)
grid_search_hog.fit(X_train_50_hog, y_train_50_hog)

# Best parameters
rbf_best_params_final_hog = grid_search_cnn.best_params_
rbf_best_accuracy_final_hog = grid_search_cnn.best_score_
rbf_best_params_final_hog

{'C': 8.9, 'gamma': 0.012}

In [92]:
svm_rbf_tunned_f_hog = SVC(kernel='rbf', gamma=rbf_best_params_final_hog['gamma'], C =rbf_best_params_final_hog['C'] )
svm_rbf_tunned_f_hog.fit(X_train_50_hog, y_train_50_hog)

In [93]:
rbf_t_pred_y_50_f_hog = svm_rbf_tunned_f_hog.predict(X_test_50_hog)

In [94]:
rbf_t_accuracy_f_hog = accuracy_score(y_test_50_hog,rbf_t_pred_y_50_f_hog)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_f_hog)

Accuracy of SVM with rbf kernel with tunned hyperparameters is  0.7788461538461539


In [95]:
print(classification_report(y_test_50_hog,rbf_t_pred_y_50_f_hog))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.85      0.79      0.81        14
     Colin_Powell       0.90      0.96      0.93        47
  Donald_Rumsfeld       0.90      0.61      0.73        31
    George_W_Bush       0.62      1.00      0.76        92
Gerhard_Schroeder       1.00      0.62      0.76        21
      Hugo_Chavez       0.91      0.48      0.62        21
   Jacques_Chirac       0.89      0.62      0.73        13
    Jean_Chretien       1.00      0.64      0.78        11
    John_Ashcroft       0.83      0.42      0.56        12
Junichiro_Koizumi       1.00      0.65      0.79        17
  Serena_Williams       1.00      0.50      0.67         8
       Tony_Blair       1.00      0.72      0.84        25

         accuracy                           0.78       312
        macro avg       0.91      0.67      0.75       312
     weighted avg       0.84      0.78      0.77       312



## LBP Features

In [96]:
X_train_50_lbp, X_test_50_lbp, y_train_50_lbp, y_test_50_lbp = train_test_split(X_lbp,y_50,train_size=0.8,random_state=42)
X_train_50_lbp.reset_index(drop=True, inplace = True)
X_test_50_lbp.reset_index(drop=True, inplace = True)
y_train_50_lbp.reset_index(drop=True, inplace = True)
y_test_50_lbp.reset_index(drop=True, inplace = True)

### LinearSVC

In [97]:
# Training Linear Support Vector Classifier (LinearSVC)
lin_clf = svm.LinearSVC(dual=False)
lin_clf.fit(X_train_50_lbp, y_train_50_lbp)

In [98]:
pred_y_test_50_lbp = lin_clf.predict(X_test_50_lbp)

In [99]:
# Accuracy of linearSVC model
linSVC_accuracy_lbp = lin_clf.score(X_test_50_lbp, y_test_50_lbp)
print("Accuracy of linearSVC is ",linSVC_accuracy_lbp)

Accuracy of linearSVC is  0.2692307692307692


In [100]:
# Classification Report of linearSVC model
print(classification_report(y_test_50_lbp,pred_y_test_50_lbp))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.27      0.43      0.33        14
     Colin_Powell       0.34      0.30      0.32        47
  Donald_Rumsfeld       0.35      0.26      0.30        31
    George_W_Bush       0.44      0.43      0.44        92
Gerhard_Schroeder       0.08      0.14      0.11        21
      Hugo_Chavez       0.12      0.10      0.11        21
   Jacques_Chirac       0.13      0.15      0.14        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       0.17      0.12      0.14        17
  Serena_Williams       0.43      0.38      0.40         8
       Tony_Blair       0.13      0.16      0.14        25

         accuracy                           0.27       312
        macro avg       0.21      0.21      0.20       312
     weighted avg       0.28      0.27      0.27       312



### SVM Models with Linear, polynomial and rbf kernals

In [101]:
svm_linear = SVC(kernel='linear') # linear kernal
svm_poly = SVC(kernel='poly', degree= 3)  # Polynomial kernel of degree 3
svm_rbf = SVC(kernel='rbf')  # RBF kernel

In [102]:
svm_linear.fit(X_train_50_lbp, y_train_50_lbp)
svm_poly.fit(X_train_50_lbp, y_train_50_lbp)
svm_rbf.fit(X_train_50_lbp, y_train_50_lbp)

#### Linear Kernel

In [103]:
svm_linear.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'linear',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [104]:
lin_pred_y_50_lbp = svm_linear.predict(X_test_50_lbp)

In [105]:
lin_accuracy_lbp = accuracy_score(y_test_50_lbp,lin_pred_y_50_lbp)
print("Accuracy of SVM with Linear kernel is ",lin_accuracy_lbp)

Accuracy of SVM with Linear kernel is  0.3269230769230769


In [106]:
print(classification_report(y_test_50_lbp,lin_pred_y_50_lbp))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.55      0.79      0.65        14
     Colin_Powell       0.37      0.49      0.42        47
  Donald_Rumsfeld       0.44      0.35      0.39        31
    George_W_Bush       0.40      0.45      0.42        92
Gerhard_Schroeder       0.24      0.24      0.24        21
      Hugo_Chavez       0.17      0.10      0.12        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.17      0.09      0.12        11
    John_Ashcroft       0.14      0.08      0.11        12
Junichiro_Koizumi       0.17      0.06      0.09        17
  Serena_Williams       0.27      0.38      0.32         8
       Tony_Blair       0.11      0.12      0.12        25

         accuracy                           0.33       312
        macro avg       0.25      0.26      0.25       312
     weighted avg       0.30      0.33      0.31       312



##### Tunning Hyperparameter

In [107]:
c_values = [0.1, 1, 10, 100]

accuracy_values_lbp = []
predicted_values_lbp = []

for i in c_values:
    svm_lin_tunned_lbp = SVC(kernel='linear', C = i )
    svm_lin_tunned_lbp.fit(X_train_50_lbp, y_train_50_lbp)
    lin_t_pred_y_50_lbp = svm_lin_tunned_lbp.predict(X_test_50_lbp)
    lin_t_accuracy_lbp = accuracy_score(y_test_50_lbp,lin_t_pred_y_50_lbp)
    
    accuracy_values_lbp.append(lin_t_accuracy_lbp)
    predicted_values_lbp.append(lin_t_pred_y_50_lbp)
    
print(accuracy_values_lbp)

[0.3269230769230769, 0.3269230769230769, 0.3269230769230769, 0.3269230769230769]


Therefor, value of c (between 0.1 to 100) is not affecting the accuracy.
So, no requirement of hyperparameter tuning.

#### Polynomial Kernel

In [108]:
svm_poly.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'poly',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [109]:
poly_pred_y_50_lbp = svm_poly.predict(X_test_50_lbp)

In [110]:
poly_accuracy_lbp = accuracy_score(y_test_50_lbp,poly_pred_y_50_lbp)
print("Accuracy of SVM with polynomial kernel (of degree 3) is ",poly_accuracy_lbp)

Accuracy of SVM with polynomial kernel (of degree 3) is  0.3076923076923077


In [111]:
print(classification_report(y_test_50_lbp,poly_pred_y_50_lbp))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        14
     Colin_Powell       0.40      0.13      0.19        47
  Donald_Rumsfeld       0.00      0.00      0.00        31
    George_W_Bush       0.30      0.98      0.46        92
Gerhard_Schroeder       0.00      0.00      0.00        21
      Hugo_Chavez       0.00      0.00      0.00        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       0.00      0.00      0.00        17
  Serena_Williams       0.00      0.00      0.00         8
       Tony_Blair       0.00      0.00      0.00        25

         accuracy                           0.31       312
        macro avg       0.06      0.09      0.05       312
     weighted avg       0.15      0.31      0.17       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


##### Tuning Hyperparameters

In [112]:
# Defining parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_lbp = GridSearchCV(estimator=svm_poly, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_lbp.fit(X_train_50_lbp,y_train_50_lbp)

# Best parameters
poly_best_params_initial_lbp = grid_search_lbp.best_params_
poly_best_params_initial_lbp

{'C': 0.1, 'gamma': 1}

In [113]:
svm_poly_tunned_lbp = SVC(kernel='poly', gamma=poly_best_params_initial_lbp['gamma'], C =poly_best_params_initial_lbp['C'] )
svm_poly_tunned_lbp.fit(X_train_50_lbp, y_train_50_lbp)

In [114]:
poly_t_pred_y_50_lbp = svm_poly_tunned_lbp.predict(X_test_50_lbp)

In [115]:
poly_t_accuracy_lbp = accuracy_score(y_test_50_lbp,poly_t_pred_y_50_lbp)
print("Accuracy of SVM with Polynomial kernel (degree = 3) with tunned hyperparameters is ",poly_t_accuracy_lbp)

Accuracy of SVM with Polynomial kernel (degree = 3) with tunned hyperparameters is  0.3108974358974359


In [116]:
print(classification_report(y_test_50_lbp,poly_t_pred_y_50_lbp))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.24      0.50      0.33        14
     Colin_Powell       0.31      0.36      0.34        47
  Donald_Rumsfeld       0.30      0.19      0.24        31
    George_W_Bush       0.47      0.57      0.51        92
Gerhard_Schroeder       0.08      0.05      0.06        21
      Hugo_Chavez       0.10      0.05      0.06        21
   Jacques_Chirac       0.25      0.23      0.24        13
    Jean_Chretien       0.11      0.09      0.10        11
    John_Ashcroft       0.08      0.08      0.08        12
Junichiro_Koizumi       0.27      0.18      0.21        17
  Serena_Williams       0.11      0.12      0.12         8
       Tony_Blair       0.19      0.16      0.17        25

         accuracy                           0.31       312
        macro avg       0.21      0.22      0.20       312
     weighted avg       0.29      0.31      0.29       312



#### RBF Kernel

In [117]:
rbf_pred_y_50_lbp = svm_rbf.predict(X_test_50_lbp)

In [118]:
rbf_accuracy_lbp = accuracy_score(y_test_50_lbp,rbf_pred_y_50_lbp)
print("Accuracy of SVM with RBF kernel is ",rbf_accuracy_lbp)

Accuracy of SVM with RBF kernel is  0.3076923076923077


In [119]:
print(classification_report(y_test_50_lbp,rbf_pred_y_50_lbp))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        14
     Colin_Powell       0.33      0.26      0.29        47
  Donald_Rumsfeld       0.00      0.00      0.00        31
    George_W_Bush       0.30      0.91      0.46        92
Gerhard_Schroeder       0.00      0.00      0.00        21
      Hugo_Chavez       0.00      0.00      0.00        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       0.00      0.00      0.00        17
  Serena_Williams       0.00      0.00      0.00         8
       Tony_Blair       0.00      0.00      0.00        25

         accuracy                           0.31       312
        macro avg       0.05      0.10      0.06       312
     weighted avg       0.14      0.31      0.18       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


##### Tuning Hyperparameteres

In [120]:
# Define parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_lbp = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_lbp.fit(X_train_50_lbp, y_train_50_lbp)

# Best parameters
rbf_best_params_initial_lbp = grid_search_lbp.best_params_
rbf_best_params_initial_lbp

{'C': 0.1, 'gamma': 0.001}

In [121]:
svm_rbf_tunned_lbp = SVC(kernel='rbf', gamma=rbf_best_params_initial_lbp['gamma'], C =rbf_best_params_initial_lbp['C'] )
svm_rbf_tunned_lbp.fit(X_train_50_lbp, y_train_50_lbp)

In [122]:
rbf_t_pred_y_50_lbp = svm_rbf_tunned_lbp.predict(X_test_50_lbp)

In [123]:
rbf_t_accuracy_lbp = accuracy_score(y_test_50_lbp,rbf_t_pred_y_50_lbp)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_lbp)

Accuracy of SVM with rbf kernel with tunned hyperparameters is  0.2948717948717949


In [124]:
print(classification_report(y_test_50_lbp,rbf_t_pred_y_50_lbp))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        14
     Colin_Powell       0.00      0.00      0.00        47
  Donald_Rumsfeld       0.00      0.00      0.00        31
    George_W_Bush       0.29      1.00      0.46        92
Gerhard_Schroeder       0.00      0.00      0.00        21
      Hugo_Chavez       0.00      0.00      0.00        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       0.00      0.00      0.00        17
  Serena_Williams       0.00      0.00      0.00         8
       Tony_Blair       0.00      0.00      0.00        25

         accuracy                           0.29       312
        macro avg       0.02      0.08      0.04       312
     weighted avg       0.09      0.29      0.13       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [125]:
# Defining the starting and ending points for C and gamma, and the number of points
start_C = 0.1
end_C = 10
num_C = 5

start_gamma = 0.001
end_gamma = 0.1
num_gamma = 5

# Parameter grid with specified ranges and number of points
param_grid = {'C': np.linspace(start_C, end_C, num_C),
              'gamma': np.linspace(start_gamma, end_gamma, num_gamma)}


# Performing grid search
grid_search_lbp = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5 , scoring='accuracy', n_jobs=-1)
grid_search_lbp.fit(X_train_50_lbp, y_train_50_lbp)

# Best parameters
rbf_best_params_final_lbp = grid_search_lbp.best_params_
rbf_best_accuracy_final_lbp = grid_search_lbp.best_score_
rbf_best_params_final_lbp

{'C': 0.1, 'gamma': 0.001}

In [126]:
svm_rbf_tunned_f_lbp = SVC(kernel='rbf', gamma=rbf_best_params_final_lbp['gamma'], C =rbf_best_params_final_lbp['C'] )
svm_rbf_tunned_f_lbp.fit(X_train_50_lbp, y_train_50_lbp)

In [127]:
rbf_t_pred_y_50_f_lbp = svm_rbf_tunned_f_lbp.predict(X_test_50_lbp)

In [128]:
rbf_t_accuracy_f_lbp = accuracy_score(y_test_50_lbp,rbf_t_pred_y_50_f_lbp)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_f_lbp)

Accuracy of SVM with rbf kernel with tunned hyperparameters is  0.2948717948717949


In [129]:
print(classification_report(y_test_50_lbp,rbf_t_pred_y_50_f_lbp))

                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        14
     Colin_Powell       0.00      0.00      0.00        47
  Donald_Rumsfeld       0.00      0.00      0.00        31
    George_W_Bush       0.29      1.00      0.46        92
Gerhard_Schroeder       0.00      0.00      0.00        21
      Hugo_Chavez       0.00      0.00      0.00        21
   Jacques_Chirac       0.00      0.00      0.00        13
    Jean_Chretien       0.00      0.00      0.00        11
    John_Ashcroft       0.00      0.00      0.00        12
Junichiro_Koizumi       0.00      0.00      0.00        17
  Serena_Williams       0.00      0.00      0.00         8
       Tony_Blair       0.00      0.00      0.00        25

         accuracy                           0.29       312
        macro avg       0.02      0.08      0.04       312
     weighted avg       0.09      0.29      0.13       312



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
