# Data Preprocessing

Importing Libraries

In [23]:
from sklearn import svm, datasets
import sklearn.model_selection as model_selection
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd

Loading the dataset

In [24]:
dataset = pd.read_csv('features_resnet_101.csv', on_bad_lines='skip')
X = dataset.iloc[ 1 : , 1 : ].values
y = dataset.iloc[ 1 : , 0 ].values
dataset

Unnamed: 0,category,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_1490,feature_1491,feature_1492,feature_1493,feature_1494,feature_1495,feature_1496,feature_1497,feature_1498,feature_1499
0,NeuralTextures,-32.093704,146.902530,-17.185703,145.078540,-63.500923,-105.487350,-11.982006,-90.380450,-140.270250,...,0.434148,0.261460,0.466725,0.011658,-0.250158,-0.144494,0.206021,-0.092880,0.568985,0.004353
1,NeuralTextures,-35.860138,144.555700,-18.497292,144.925540,-65.397354,-105.359440,-10.496799,-92.211815,-138.410110,...,-0.912738,-0.295983,-0.639621,0.177329,0.317876,0.365629,-0.490861,-0.023407,-1.234763,0.014360
2,NeuralTextures,298.745330,-61.443790,-207.530320,105.024630,-143.749080,-192.775770,197.092320,356.193540,102.496150,...,-0.056320,-0.098121,-0.340010,0.118510,0.069462,-0.019535,0.074369,-0.157523,-0.260493,0.338494
3,NeuralTextures,-44.937443,72.313560,119.494545,-27.042110,-9.954171,-98.384910,47.641968,-40.619480,110.178560,...,0.079021,0.022073,-0.000436,-0.012982,0.092203,0.056661,-0.061510,0.176381,0.002696,-0.007272
4,NeuralTextures,-45.384537,73.638760,115.877730,-25.753588,-11.652071,-96.958040,45.257343,-39.462980,109.793495,...,-0.130039,-0.281760,-0.009189,-0.104403,-0.219469,-0.072707,0.019448,-0.155787,0.078128,0.179217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,Deepfakes,-94.170420,-15.779587,-53.370056,-200.466720,-2.336628,16.896307,-8.254656,19.890854,-10.412720,...,-0.561939,-1.117823,-2.381083,-0.804908,0.322209,3.105451,-0.425978,1.289405,0.387524,1.206896
1596,Deepfakes,-93.088000,-18.265966,-50.431774,-201.737170,-1.154791,16.313220,-8.137126,18.626867,-8.200521,...,0.445529,1.226872,2.208652,0.827821,-0.177020,-3.034489,0.413817,-1.205983,0.086704,-0.763830
1597,Deepfakes,-94.228950,-17.622774,-51.374363,-201.379970,-4.702398,15.354007,-8.863097,18.517200,-9.760734,...,-0.276811,0.046149,-0.346713,-0.144246,-0.263702,0.502898,-0.193400,0.110160,-0.372153,-0.516195
1598,Deepfakes,-95.468580,-16.348152,-51.249672,-203.380660,-2.945286,15.727937,-7.390561,18.557844,-9.799802,...,0.164427,0.255737,0.296921,0.314879,0.319995,-0.425134,0.006323,-0.264667,-0.054350,-0.382216


Split the dataset into training set and test set.

In [25]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=0.80, test_size=0.20, random_state=101)

Initialise the 5-Fold Cross Validation

In [26]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Support Vector Machine

Predict on Test Set.

In [27]:
poly_accuracy, rbf_accuracy = 0, 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    poly = svm.SVC(kernel='poly', degree=10, C=1).fit(X_train, y_train)
    rbf = svm.SVC(kernel='rbf', gamma=1, C=1).fit(X_train, y_train)
    
    poly_pred = poly.predict(X_test)
    rbf_pred = rbf.predict(X_test)
    
    poly_accuracy += accuracy_score(y_test, poly_pred)
    rbf_accuracy += accuracy_score(y_test, rbf_pred)


Outcomes.

In [28]:
print('Accuracy (Polynomial Kernel): ', "%.2f" % (poly_accuracy/5*100) + " %")
print('Accuracy (RBF Kernel): ', "%.2f" % (rbf_accuracy/5*100) + " %")

poly_f1 = f1_score(y_test, poly_pred, average='weighted')
print('F1 score (Polynomial Kernel): ', "%.2f" % (poly_f1*100))
rbf_f1 = f1_score(y_test, rbf_pred, average='weighted')
print('F1 score (RBF Kernel): ', "%.2f" % (rbf_f1*100))

print('Confusion Matrix (Polynomial Kernel): \n', confusion_matrix(y_test, poly_pred))
print('Confusion Matrix (RBF Kernel): \n',confusion_matrix(y_test, rbf_pred))

Accuracy (Polynomial Kernel):  66.29 %
Accuracy (RBF Kernel):  22.08 %
F1 score (Polynomial Kernel):  64.70
F1 score (RBF Kernel):  8.10
Confusion Matrix (Polynomial Kernel): 
 [[63  0 25  4]
 [ 2 33  2 42]
 [ 8  1 60  8]
 [ 5 10  5 51]]
Confusion Matrix (RBF Kernel): 
 [[ 0  0  0 92]
 [ 0  0  0 79]
 [ 0  0  0 77]
 [ 0  0  0 71]]


# Random Forest

Importing Libraries

In [29]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

Define the Classifier

In [30]:
classifier = RandomForestClassifier(n_estimators = 500, criterion = 'entropy', random_state = 42)

Predicting Test Set Results

In [31]:
rf_accuracy = 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    
    rf_accuracy += accuracy_score(y_test, y_pred)
    

Outcomes

In [32]:
rf_f1 = f1_score(y_test, y_pred, average='weighted')
print('Accuracy (Random Forest): ', "%.2f" % (rf_accuracy/5*100) + " %")
print('F1 (Random Forest): ', "%.2f" % (rf_f1*100))
print("Confustion Matrix (Random Forest):\n", confusion_matrix(y_test, y_pred))

Accuracy (Random Forest):  83.62 %
F1 (Random Forest):  86.24
Confustion Matrix (Random Forest):
 [[77  4 11  0]
 [ 0 63  4 12]
 [ 3  2 71  1]
 [ 0  5  2 64]]


# Decision Tree

Importing Libraries

In [33]:
from sklearn.tree import DecisionTreeClassifier

Define the Classifier

In [34]:
clf = DecisionTreeClassifier()

Predicting Test Set Results

In [35]:
dt_accuracy = 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    clf.fit(X_train, y_train)
    
    y_pred = clf.predict(X_test)
    
    dt_accuracy += accuracy_score(y_test, y_pred)

Outcomes

In [36]:
dt_f1 = f1_score(y_test, y_pred, average='weighted')
print('Accuracy (Decision Tree): ', "%.2f" % (dt_accuracy/5*100) + " %")
print('F1 (Decision Tree): ', "%.2f" % (dt_f1*100))
print("Confustion Matrix (Decision Tree):\n", confusion_matrix(y_test, y_pred))

Accuracy (Decision Tree):  58.72 %
F1 (Decision Tree):  59.88
Confustion Matrix (Decision Tree):
 [[59  8 20  5]
 [ 9 48  6 16]
 [13  5 48 11]
 [ 6 22  7 36]]


# MultiClass Logistic Regression

Import Libraries

In [37]:
from sklearn.linear_model import LogisticRegression

Define Classifier.

In [38]:
classifier = LogisticRegression(multi_class='auto', solver='sag')


Predicting Test Set Results.

In [39]:
lr_accuracy = 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    
    lr_accuracy += accuracy_score(y_test, y_pred)




Outcomes

In [40]:
lr_f1 = f1_score(y_test, y_pred, average='weighted')
print('Accuracy (Logistic Regression): ', "%.2f" % (lr_accuracy/5*100) + " %")
print('F1 (Logistic Regression): ', "%.2f" % (lr_f1*100))
print("Confusion Matrix (Logistic Regression):\n", confusion_matrix(y_test, y_pred))

Accuracy (Logistic Regression):  94.93 %
F1 (Logistic Regression):  93.73
Confusion Matrix (Logistic Regression):
 [[89  0  2  1]
 [ 1 71  0  7]
 [ 2  0 75  0]
 [ 0  7  0 64]]


# KNN

Importing Libraries

In [41]:
from sklearn.neighbors import KNeighborsClassifier

Define the Classifier

In [42]:
knn = KNeighborsClassifier(n_neighbors=5)

Predicting Test Set Results

In [43]:
knn_accuracy = 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    knn.fit(X_train, y_train)
    
    y_pred = knn.predict(X_test)
    
    knn_accuracy += accuracy_score(y_test, y_pred)

Outcomes

In [44]:
knn_f1 = f1_score(y_test, y_pred, average='weighted')
print('Accuracy (KNN): ', "%.2f" % (knn_accuracy/5*100) + " %")
print('F1 (KNN): ', "%.2f" % (knn_f1*100))
print("Confustion Matrix (KNN):\n", confusion_matrix(y_test, y_pred))

Accuracy (KNN):  76.30 %
F1 (KNN):  76.89
Confustion Matrix (KNN):
 [[82  3  7  0]
 [ 4 51  3 21]
 [12  2 62  1]
 [ 4 12  4 51]]
