# Data Preprocessing

Importing Libraries

In [None]:
from sklearn import svm, datasets
import sklearn.model_selection as model_selection
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd

Loading the dataset

In [None]:
dataset = pd.read_csv('features_resnet_50.csv', on_bad_lines='skip')
X = dataset.iloc[ 1 : , 1 : ].values
y = dataset.iloc[ 1 : , 0 ].values
dataset

Unnamed: 0,category,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_1490,feature_1491,feature_1492,feature_1493,feature_1494,feature_1495,feature_1496,feature_1497,feature_1498,feature_1499
0,NeuralTextures,-208.830640,-51.366436,22.105505,-147.724580,-118.696495,70.681854,-158.035980,-60.336370,-81.641150,...,0.174770,-1.024955,0.550097,-0.667983,-0.249598,-0.086514,0.051813,-0.193782,-0.875143,0.800290
1,NeuralTextures,-206.798250,-51.171688,21.357613,-145.072200,-119.500336,68.893486,-152.899750,-60.237330,-81.942690,...,-0.265358,1.619639,-0.884596,1.297069,0.422281,0.205003,0.507595,0.606314,0.987814,-1.014394
2,NeuralTextures,44.855625,163.526350,28.992188,-0.194592,-166.357700,-21.432076,-11.083757,-74.018010,85.272380,...,0.049644,0.086908,-0.163586,0.283276,-0.606649,0.081803,0.171576,-0.155878,-0.274118,0.340998
3,NeuralTextures,-108.299500,41.846737,-44.416473,21.699093,-28.603456,-30.288048,209.787800,-5.704433,241.421000,...,0.015077,0.121308,-0.113433,-0.103224,0.018173,0.129763,-0.034330,-0.021882,-0.070459,-0.014453
4,NeuralTextures,-109.370780,41.092896,-48.564682,25.477957,-24.968306,-32.800940,216.076190,-5.125932,248.847930,...,-0.190528,-0.182551,0.064347,0.007614,0.026868,-0.092080,0.023531,0.002670,0.140599,0.006854
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,Deepfakes,97.526540,-81.080696,-157.692370,-52.393520,50.411045,30.970957,-65.768430,0.526890,-39.739212,...,-0.859035,0.863212,-0.414095,-0.128424,-0.070445,-0.354402,-0.626486,-0.890920,-0.251958,-0.065793
1596,Deepfakes,98.847750,-81.677690,-158.850280,-51.383717,51.222370,30.486883,-64.718030,0.617533,-39.304176,...,1.220299,-1.267858,0.270246,0.313363,-0.955972,-0.371023,0.307595,0.910387,0.288813,-0.773428
1597,Deepfakes,98.551970,-80.302120,-159.712360,-49.903286,53.196720,30.995779,-63.454690,-0.744103,-36.466087,...,0.128490,0.434769,-0.033786,-0.569766,0.682816,0.234970,-0.380222,0.207625,-0.006370,0.762144
1598,Deepfakes,101.743590,-78.321690,-159.565730,-47.564743,52.495888,30.036093,-59.953506,-0.897982,-36.605667,...,0.082850,0.169240,-0.037573,-0.011322,0.237362,0.003830,0.058504,-0.241213,0.124224,0.186105


Split the dataset into training set and test set.

In [None]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=0.80, test_size=0.20, random_state=101)

Initialise the 5-Fold Cross Validation

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Support Vector Machine

Predict on Test Set.

In [None]:
poly_accuracy, rbf_accuracy = 0, 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    poly = svm.SVC(kernel='poly', degree=10, C=1).fit(X_train, y_train)
    rbf = svm.SVC(kernel='rbf', gamma=1, C=1).fit(X_train, y_train)
    
    poly_pred = poly.predict(X_test)
    rbf_pred = rbf.predict(X_test)
    
    poly_accuracy += accuracy_score(y_test, poly_pred)
    rbf_accuracy += accuracy_score(y_test, rbf_pred)


Outcomes.

In [None]:
print('Accuracy (Polynomial Kernel): ', "%.2f" % (poly_accuracy/5*100) + " %")
print('Accuracy (RBF Kernel): ', "%.2f" % (rbf_accuracy/5*100) + " %")

poly_f1 = f1_score(y_test, poly_pred, average='weighted')
print('F1 score (Polynomial Kernel): ', "%.2f" % (poly_f1*100))
rbf_f1 = f1_score(y_test, rbf_pred, average='weighted')
print('F1 score (RBF Kernel): ', "%.2f" % (rbf_f1*100))

print('Confusion Matrix (Polynomial Kernel): \n', confusion_matrix(y_test, poly_pred))
print('Confusion Matrix (RBF Kernel): \n',confusion_matrix(y_test, rbf_pred))

Accuracy (Polynomial Kernel):  70.54 %
Accuracy (RBF Kernel):  22.08 %
F1 score (Polynomial Kernel):  68.52
F1 score (RBF Kernel):  8.10
Confusion Matrix (Polynomial Kernel): 
 [[66  0  5 21]
 [ 1 39  3 36]
 [ 4  4 48 21]
 [ 2  3  4 62]]
Confusion Matrix (RBF Kernel): 
 [[ 0  0  0 92]
 [ 0  0  0 79]
 [ 0  0  0 77]
 [ 0  0  0 71]]


# Random Forest

Importing Libraries

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

Define the Classifier

In [None]:
classifier = RandomForestClassifier(n_estimators = 500, criterion = 'entropy', random_state = 42)

Predicting Test Set Results

In [None]:
rf_accuracy = 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    
    rf_accuracy += accuracy_score(y_test, y_pred)
    

Outcomes

In [None]:
rf_f1 = f1_score(y_test, y_pred, average='weighted')
print('Accuracy (Random Forest): ', "%.2f" % (rf_accuracy/5*100) + " %")
print('F1 (Random Forest): ', "%.2f" % (rf_f1*100))
print("Confustion Matrix (Random Forest):\n", confusion_matrix(y_test, y_pred))

Accuracy (Random Forest):  82.74 %
F1 (Random Forest):  82.77
Confustion Matrix (Random Forest):
 [[78  1 12  1]
 [ 2 61  3 13]
 [10  1 66  0]
 [ 2  6  4 59]]


# Decision Tree

Importing Libraries

In [None]:
from sklearn.tree import DecisionTreeClassifier

Define the Classifier

In [None]:
clf = DecisionTreeClassifier()

Predicting Test Set Results

In [None]:
dt_accuracy = 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    clf.fit(X_train, y_train)
    
    y_pred = clf.predict(X_test)
    
    dt_accuracy += accuracy_score(y_test, y_pred)

Outcomes

In [None]:
dt_f1 = f1_score(y_test, y_pred, average='weighted')
print('Accuracy (Decision Tree): ', "%.2f" % (dt_accuracy/5*100) + " %")
print('F1 (Decision Tree): ', "%.2f" % (dt_f1*100))
print("Confustion Matrix (Decision Tree):\n", confusion_matrix(y_test, y_pred))

Accuracy (Decision Tree):  55.29 %
F1 (Decision Tree):  57.22
Confustion Matrix (Decision Tree):
 [[59  7 15 11]
 [10 34 13 22]
 [11  6 48 12]
 [10 12  7 42]]


# MultiClass Logistic Regression

Import Libraries

In [None]:
from sklearn.linear_model import LogisticRegression

Define Classifier.

In [None]:
classifier = LogisticRegression(multi_class='auto', solver='sag')


Predicting Test Set Results.

In [None]:
lr_accuracy = 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    
    lr_accuracy += accuracy_score(y_test, y_pred)




Outcomes

In [None]:
lr_f1 = f1_score(y_test, y_pred, average='weighted')
print('Accuracy (Logistic Regression): ', "%.2f" % (lr_accuracy/5*100) + " %")
print('F1 (Logistic Regression): ', "%.2f" % (lr_f1*100))
print("Confusion Matrix (Logistic Regression):\n", confusion_matrix(y_test, y_pred))

Accuracy (Logistic Regression):  95.31 %
F1 (Logistic Regression):  94.97
Confusion Matrix (Logistic Regression):
 [[91  0  1  0]
 [ 0 72  1  6]
 [ 2  0 75  0]
 [ 0  6  0 65]]


# KNN

Importing Libraries

In [None]:
from sklearn.neighbors import KNeighborsClassifier

Define the Classifier

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)

Predicting Test Set Results

In [None]:
knn_accuracy = 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    knn.fit(X_train, y_train)
    
    y_pred = knn.predict(X_test)
    
    knn_accuracy += accuracy_score(y_test, y_pred)

Outcomes

In [None]:
knn_f1 = f1_score(y_test, y_pred, average='weighted')
print('Accuracy (KNN): ', "%.2f" % (knn_accuracy/5*100) + " %")
print('F1 (KNN): ', "%.2f" % (knn_f1*100))
print("Confustion Matrix (KNN):\n", confusion_matrix(y_test, y_pred))

Accuracy (KNN):  79.11 %
F1 (KNN):  77.73
Confustion Matrix (KNN):
 [[83  1  7  1]
 [ 0 51  1 27]
 [12  1 63  1]
 [ 1 17  2 51]]
