In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Data Preprocessing

Importing Libraries

In [14]:
from sklearn import svm, datasets
import sklearn.model_selection as model_selection
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold
from sklearn import preprocessing
import numpy as np
import pandas as pd

Loading the dataset

In [15]:
dataset = pd.read_csv('/content/drive/MyDrive/Features_vgg16.csv')
X = dataset.iloc[ : , 2 : ].values
y = dataset.iloc[ : , 0 ].values
print(X)
print(y)


[[0.         0.         0.         ... 0.98208046 0.         1.25845563]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.71421111]
 ...
 [0.         0.         0.         ... 0.04508503 0.         0.23801658]
 [0.         0.         0.         ... 0.19043779 0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.57762998]]
['Faceswap' 'Faceswap' 'Faceswap' ... 'Deepfakes' 'Deepfakes' 'Deepfakes']


Feature scaling of the dataset

In [16]:
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
X = min_max_scaler.fit_transform(X)
print ("\nAfter min max Scaling : \n",X)


After min max Scaling : 
 [[0.         0.         0.         ... 0.85947196 0.         0.86403397]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.49036505]
 ...
 [0.         0.         0.         ... 0.03945636 0.         0.16341808]
 [0.         0.         0.         ... 0.16666246 0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.3965908 ]]


Train test split

In [17]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=0.80, test_size=0.20, random_state=101)

Initialise the 5-Fold Cross Validation

In [18]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Support Vector Machine

Predict on Test Set.

In [19]:
poly_accuracy, rbf_accuracy = 0, 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    poly = svm.SVC(kernel='poly', degree=8, C=1).fit(X_train, y_train)
    rbf = svm.SVC(kernel='rbf', gamma=1, C=1).fit(X_train, y_train)
    
    poly_pred = poly.predict(X_test)
    rbf_pred = rbf.predict(X_test)
    
    poly_accuracy += accuracy_score(y_test, poly_pred)
    rbf_accuracy += accuracy_score(y_test, rbf_pred)


Outcomes.

In [20]:
print('Accuracy (Polynomial Kernel): ', "%.2f" % (poly_accuracy/5*100) + " %")
print('Accuracy (RBF Kernel): ', "%.2f" % (rbf_accuracy/5*100) + " %")

poly_f1 = f1_score(y_test, poly_pred, average='weighted')
print('F1 score (Polynomial Kernel): ', "%.2f" % (poly_f1*100))
rbf_f1 = f1_score(y_test, rbf_pred, average='weighted')
print('F1 score (RBF Kernel): ', "%.2f" % (rbf_f1*100))

print('Confusion Matrix (Polynomial Kernel): \n', confusion_matrix(y_test, poly_pred))
print('Confusion Matrix (RBF Kernel): \n',confusion_matrix(y_test, rbf_pred))

Accuracy (Polynomial Kernel):  81.74 %
Accuracy (RBF Kernel):  62.47 %
F1 score (Polynomial Kernel):  79.24
F1 score (RBF Kernel):  56.94
Confusion Matrix (Polynomial Kernel): 
 [[84  0  8  0]
 [ 0 51  0 28]
 [11  1 59  0]
 [ 1 17  0 59]]
Confusion Matrix (RBF Kernel): 
 [[35  0 57  0]
 [ 0 35 44  0]
 [ 0  0 71  0]
 [ 0  0 43 34]]


# Random Forest

Importing Libraries

In [21]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

Define the Classifier

In [22]:
classifier = RandomForestClassifier(n_estimators = 500, criterion = 'entropy', random_state = 42)

Predicting Test Set Results

In [23]:
rf_accuracy = 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    
    rf_accuracy += accuracy_score(y_test, y_pred)
    

Outcomes

In [24]:
rf_f1 = f1_score(y_test, y_pred, average='weighted')
print('Accuracy (Random Forest): ', "%.2f" % (rf_accuracy/5*100) + " %")
print('F1 (Random Forest): ', "%.2f" % (rf_f1*100))
print("Confustion Matrix (Random Forest):\n", confusion_matrix(y_test, y_pred))

Accuracy (Random Forest):  97.50 %
F1 (Random Forest):  97.48
Confustion Matrix (Random Forest):
 [[92  0  0  0]
 [ 1 73  0  5]
 [ 0  0 71  0]
 [ 0  2  0 75]]


# MultiClass Logistic Regression

Import Libraries

In [25]:
from sklearn.linear_model import LogisticRegression

Define Classifier.

In [26]:
classifier = LogisticRegression(multi_class='auto', solver='sag')


Predicting Test Set Results.

In [27]:
lr_accuracy = 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    
    lr_accuracy += accuracy_score(y_test, y_pred)




Outcomes

In [28]:
lr_f1 = f1_score(y_test, y_pred, average='weighted')
print('Accuracy (Logistic Regression): ', "%.2f" % (lr_accuracy/5*100) + " %")
print('F1 (Logistic Regression): ', "%.2f" % (lr_f1*100))
print("Confusion Matrix (Logistic Regression):\n", confusion_matrix(y_test, y_pred))

Accuracy (Logistic Regression):  98.44 %
F1 (Logistic Regression):  97.49
Confusion Matrix (Logistic Regression):
 [[92  0  0  0]
 [ 0 73  0  6]
 [ 0  0 71  0]
 [ 0  2  0 75]]


# Decision Tree

Importing Libraries

In [29]:
from sklearn.tree import DecisionTreeClassifier

Define the Classifier

In [30]:
clf = DecisionTreeClassifier()

Predicting Test Set Results

In [31]:
dt_accuracy = 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    clf.fit(X_train, y_train)
    
    y_pred = clf.predict(X_test)
    
    dt_accuracy += accuracy_score(y_test, y_pred)

Outcomes

In [32]:
dt_f1 = f1_score(y_test, y_pred, average='weighted')
print('Accuracy (Decision Tree): ', "%.2f" % (dt_accuracy/5*100) + " %")
print('F1 (Decision Tree): ', "%.2f" % (dt_f1*100))
print("Confustion Matrix (Decision Tree):\n", confusion_matrix(y_test, y_pred))

Accuracy (Decision Tree):  81.11 %
F1 (Decision Tree):  81.45
Confustion Matrix (Decision Tree):
 [[83  3  3  3]
 [ 3 61  2 13]
 [ 7  0 61  3]
 [ 2 16  4 55]]


# KNN

Importing Libraries

In [33]:
from sklearn.neighbors import KNeighborsClassifier

Define the Classifier

In [34]:
knn = KNeighborsClassifier(n_neighbors=5)

Predicting Test Set Results

In [35]:
knn_accuracy = 0

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    knn.fit(X_train, y_train)
    
    y_pred = knn.predict(X_test)
    
    knn_accuracy += accuracy_score(y_test, y_pred)

Outcomes

In [36]:
knn_f1 = f1_score(y_test, y_pred, average='weighted')
print('Accuracy (KNN): ', "%.2f" % (knn_accuracy/5*100) + " %")
print('F1 (KNN): ', "%.2f" % (knn_f1*100))
print("Confustion Matrix (KNN):\n", confusion_matrix(y_test, y_pred))

Accuracy (KNN):  84.24 %
F1 (KNN):  82.36
Confustion Matrix (KNN):
 [[82  0  7  3]
 [ 1 64  1 13]
 [ 7  0 64  0]
 [ 2 22  0 53]]
