In [1]:
# Import required libraries
from google.colab import drive
import os
import zipfile
import shutil
import random
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing import image_dataset_from_directory

In [2]:
# Step 1: Connect to Google Drive
drive.mount('/content/drive')

# Define file paths
zip_file_location = '/content/drive/MyDrive/cifake_dataset.zip'  # Path to the CIFAKE dataset zip file
unzip_location = '/content/data'  # Destination folder for extracted files

# Step 2: Extract the ZIP file (may take some time)
with zipfile.ZipFile(zip_file_location, 'r') as zip_file:
    zip_file.extractall(unzip_location)

Mounted at /content/drive


In [3]:
# Step 3: Set up paths for training and validation folders
real_train_path = os.path.join(unzip_location, 'cifake_dataset', 'train', 'REAL')
fake_train_path = os.path.join(unzip_location, 'cifake_dataset', 'train', 'FAKE')
validation_folder_path = os.path.join(unzip_location, 'cifake_dataset', 'validation')
real_validation_path = os.path.join(validation_folder_path, 'REAL')
fake_validation_path = os.path.join(validation_folder_path, 'FAKE')

# Create validation folders if they do not exist
os.makedirs(real_validation_path, exist_ok=True)
os.makedirs(fake_validation_path, exist_ok=True)

In [4]:
# Step 4: Transfer 10,000 images from train/REAL to validation/REAL and from train/FAKE to validation/FAKE
def transfer_images(source_folder, destination_folder, count):
    image_list = os.listdir(source_folder)
    random.shuffle(image_list)  # Shuffle images for random selection
    for image in image_list[:count]:
        source_image_path = os.path.join(source_folder, image)
        destination_image_path = os.path.join(destination_folder, image)
        shutil.move(source_image_path, destination_image_path)

# Move images
transfer_images(real_train_path, real_validation_path, 10000)
transfer_images(fake_train_path, fake_validation_path, 10000)

# **PART 2**

In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Load datasets from the directories
train_ds = image_dataset_from_directory(
    os.path.join(unzip_location, 'cifake_dataset', 'train'),
    image_size=(32, 32),
    batch_size=32,
)

val_ds = image_dataset_from_directory(
    os.path.join(unzip_location, 'cifake_dataset', 'validation'),
    image_size=(32, 32),
    batch_size=32,
)

test_ds = image_dataset_from_directory(
    os.path.join(unzip_location, 'cifake_dataset', 'test'),
    image_size=(32, 32),
    batch_size=32,
)


Found 80000 files belonging to 2 classes.
Found 20000 files belonging to 2 classes.
Found 20000 files belonging to 2 classes.


In [6]:
# Define the CNN model based on the architecture
model = models.Sequential([
    # Rescaling layer
    layers.Rescaling(1.0 / 255, input_shape=(32, 32, 3)),

    # First Conv2D layer with ReLU activation, followed by Max Pooling
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    # Second Conv2D layer with ReLU activation, followed by Max Pooling
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    # Flatten the output and add Dense layers
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Sigmoid for binary classification
])

  super().__init__(**kwargs)


In [7]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Define ModelCheckpoint callback to save the best model
checkpoint_cb = ModelCheckpoint("best_model.keras", save_best_only=True, monitor="val_loss")

# Train the model for 30 epochs
history = model.fit(train_ds, validation_data=val_ds, epochs=30, callbacks=[checkpoint_cb])

# Evaluate the model on the test dataset
test_loss, test_accuracy = model.evaluate(test_ds)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Epoch 1/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - accuracy: 0.7864 - loss: 0.4385 - val_accuracy: 0.8861 - val_loss: 0.2725
Epoch 2/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 4ms/step - accuracy: 0.8952 - loss: 0.2541 - val_accuracy: 0.9119 - val_loss: 0.2232
Epoch 3/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 5ms/step - accuracy: 0.9129 - loss: 0.2167 - val_accuracy: 0.9212 - val_loss: 0.2002
Epoch 4/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 5ms/step - accuracy: 0.9235 - loss: 0.1904 - val_accuracy: 0.9241 - val_loss: 0.1900
Epoch 5/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.9308 - loss: 0.1734 - val_accuracy: 0.9280 - val_loss: 0.1812
Epoch 6/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 5ms/step - accuracy: 0.9378 - loss: 0.1595 - val_accuracy: 0.9280 - val_loss: 0.1847
Epoch 7/30