In [2]:
#Mount drive to retrieve dataset
from google.colab import drive

drive.mount('/content/drive')



Mounted at /content/drive


In [3]:
import pandas as pd
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report

# Define the directories
train_dir = '/content/drive/MyDrive/unzippedData/rvf10k/train'
valid_dir = '/content/drive/MyDrive/unzippedData/rvf10k/valid'

# Image data generator
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.25)

# Flow from directory for training and validation data
train_generator = datagen.flow_from_directory(train_dir,
                                              target_size=(256, 256),
                                              batch_size=64,
                                              class_mode='categorical',
                                              subset='training')

valid_generator = datagen.flow_from_directory(train_dir,
                                              target_size=(256, 256),
                                              batch_size=64,
                                              class_mode='categorical',
                                              subset='validation')

test_generator = datagen.flow_from_directory(valid_dir,
                                             target_size=(256, 256),
                                             batch_size=64,
                                             class_mode='categorical'
                                            )



# Define the model
model = Sequential()
model.add(Conv2D(32, kernel_size=(5,5), padding='same', activation='relu', input_shape=(256,256,3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(5,5), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

# Compile model
sgd = SGD(learning_rate=0.01, weight_decay=0.00001, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit model
model_fit = model.fit(train_generator,
                    steps_per_epoch=7000/128,
                    epochs=10,
                    validation_data=valid_generator,
                    validation_steps=3000/128)

# Evaluate model
loss, accuracy = model.evaluate(valid_generator, steps=300)
print("Validation Accuracy:", accuracy)

predictions = model.predict(test_generator)

#Prediction of test (valid folder) with classification_report, got from chatGPT
predicted_classes = predictions.argmax(axis=1)
true_labels = test_generator.classes
report = classification_report(true_labels, predicted_classes)
print("Classification Report:")
print(report)
model.summary()


Found 5250 images belonging to 2 classes.
Found 1750 images belonging to 2 classes.
Found 3000 images belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
 28/300 [=>............................] - ETA: 17:17 - loss: 0.7516 - accuracy: 0.7051



Validation Accuracy: 0.7051428556442261
Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.54      0.52      1500
           1       0.50      0.46      0.48      1500

    accuracy                           0.50      3000
   macro avg       0.50      0.50      0.50      3000
weighted avg       0.50      0.50      0.50      3000

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 256, 256, 32)      2432      
                                                                 
 max_pooling2d (MaxPooling2  (None, 128, 128, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 128, 128, 64)      51264     
                                                              

1. My project is about detecting images of ai generated faces and photos of real humans; the kaggle data set was made to help address the problems of deepfakes online.. https://www.kaggle.com/datasets/sachchitkunichetty/rvf10k
2. All of the project is my own work including design of the model: batch_size, convolutional layers, max pooling layers, dropout, optimizers, training split; except for the prediction of the test data was chatGPT(3.5).
3. pandas, keras: .models, .layers, .preprocessing.image, sklearn: .metrics
4. I used the CNN model, as I have used it succesfully before, and it is the most popular NN for image classification
5. Batch Size crashed at 128 so I wanted to lower it. Different optimizers changed the accuracy:
SGD accuracy = 60, adam = 71. I originally did 5 epochs and then saw that the accuracy was moving up steadily, and so was the val_accuracy. I changed it to 10 to allow it to continue to improve and it did up to about epoch 8. After 8 epcohs the val_accuracy wasn't going up enough to warrant more epochs.
6. I wanted to use ImageDataGenerator which was very helpful for inputting the images from my drive into the CNN. I found examples in the slides 16_DataAugment_TransferLearning.pdf