In [3]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [4]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
import cv2
import os
import pandas as pd

In [5]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("shahzaibshazoo/detect-ai-generated-faces-high-quality-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/shahzaibshazoo/detect-ai-generated-faces-high-quality-dataset?dataset_version_number=1...


100%|██████████| 116M/116M [00:01<00:00, 90.1MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/shahzaibshazoo/detect-ai-generated-faces-high-quality-dataset/versions/1


In [6]:
IMG_WIDTH = 224
IMG_HEIGHT = 224
TEST_SIZE = 0.2
NUM_CATEGORIES = 2
EPOCHS = 10
path

'/root/.cache/kagglehub/datasets/shahzaibshazoo/detect-ai-generated-faces-high-quality-dataset/versions/1'

In [7]:
# Loading all the Image labels through their folder names
label_list = []
path = os.path.join(path, 'AI-face-detection-Dataset')
label_list = (os.listdir(path))
label_list

['real', 'AI']

In [8]:
image_data = []
labels = []
for i in label_list:
  image_list = os.listdir(os.path.join(path, str(i)))
  for image_name in image_list:
    image_path = os.path.join(path, str(i), image_name)
    image_array = cv2.imread(image_path)
    image_array = cv2.resize(image_array, (IMG_WIDTH, IMG_HEIGHT))
    image_array = cv2.cvtColor(image_array, cv2.COLOR_BGR2GRAY)
    image_array = cv2.equalizeHist(image_array)
    image_data.append(image_array)
    if i == 'real':
      labels.append(0)
    else:
      labels.append(1)

In [9]:
labels[0]

0

In [10]:
labels = np.array(tf.keras.utils.to_categorical(labels))
#labels = np.array(labels)
image_data = np.array(image_data)
labels[0]

array([1., 0.])

In [11]:
from sklearn.utils import shuffle

image_data, labels = shuffle(image_data, labels, random_state=42)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    image_data, labels, test_size=TEST_SIZE
)
y_train.shape
#X_train.shape
y_train[0]

array([1., 0.])

In [13]:
#X_train = X_train/255
#X_test = X_test/255

In [14]:
rescal_and_resize  = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1.0/255),
    tf.keras.layers.Resizing(IMG_HEIGHT, IMG_WIDTH)
]) # Data augmentation is done toe to introduce the data to different orrientations and also increase the size of the dataset

data_augmentation =tf.keras.Sequential([
    tf.keras.layers.RandomFlip('Horizontal_andvertical'),
    tf.keras.layers.RandomRotation(0.5)
])

In [15]:
#input_shape = (BATCH_SIZE, IMG_HEIGHT, IMG_WIDTH, CHANNELS)
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(IMG_WIDTH, IMG_HEIGHT, 1)),
    data_augmentation,
    rescal_and_resize,
    tf.keras.layers.Conv2D(
        32, (3,3), activation="relu", input_shape = (IMG_WIDTH, IMG_HEIGHT, 3)
    ),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(2, activation="softmax")
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

In [17]:
model.fit(X_train, y_train, epochs=EPOCHS)

Epoch 1/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 2s/step - accuracy: 0.7267 - loss: 4.1809
Epoch 2/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 2s/step - accuracy: 0.9588 - loss: 0.1249
Epoch 3/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 1s/step - accuracy: 0.9059 - loss: 0.2211
Epoch 4/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 2s/step - accuracy: 0.9746 - loss: 0.0661
Epoch 5/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 2s/step - accuracy: 0.9329 - loss: 0.1968
Epoch 6/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 2s/step - accuracy: 0.9844 - loss: 0.0554
Epoch 7/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 2s/step - accuracy: 0.9767 - loss: 0.0668
Epoch 8/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 2s/step - accuracy: 0.9853 - loss: 0.0438
Epoch 9/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7b12948deed0>

In [18]:
model.evaluate(X_test, y_test)

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 413ms/step - accuracy: 0.9941 - loss: 0.0152


[0.017726685851812363, 0.9921996593475342]

In [23]:
# f1 score, accuracy, precision,
from sklearn.metrics import confusion_matrix, f1_score

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

cm = confusion_matrix(y_true_classes, y_pred_classes)
print(cm)

f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')
print(f1)

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 363ms/step
[[439   5]
 [  0 197]]
0.9922264774343389
