In [None]:
# Upload zip folder to Drive from local

from google.colab import files
from google.colab import drive
import shutil

# Step 1: Upload the archive.zip file from your local to Colab
print("Please upload your archive.zip file.")
uploaded = files.upload()  # This will prompt to upload the file

# Step 2: Mount Google Drive
drive.mount('/content/drive')

# Step 3: Define destination in Google Drive
drive_path = '/content/drive/My Drive/DLHW6'

# Step 4: Move the file to Google Drive
shutil.move('archive.zip', f"{drive_path}/archive.zip")

print(f"File uploaded to Google Drive at {drive_path}/archive.zip")

Please upload your archive.zip file.


Saving archive.zip to archive.zip
Mounted at /content/drive
File uploaded to Google Drive at /content/drive/My Drive/DLHW6/archive.zip


In [None]:
# 1: Split the training dataset into real training dataset and a validation dataset

from google.colab import drive
import zipfile
import os
import shutil

# Step 1: Mount Google Drive
drive.mount('/content/drive')

# Step 2: Define paths
drive_path = '/content/drive/MyDrive/DLHW6'
zip_file_path = f'{drive_path}/archive.zip'
unzip_folder_path = '/content/dataset'

# Step 3: Unzip the file
print("Unzipping dataset, this may take a while...")
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(unzip_folder_path)
print("Unzipping complete.")

# Step 4: Define paths for train, test, and validation folders
train_path = os.path.join(unzip_folder_path, 'train')
test_path = os.path.join(unzip_folder_path, 'test')
validation_path = os.path.join(unzip_folder_path, 'validation')

# Create validation folders if they don't exist
os.makedirs(os.path.join(validation_path, 'REAL'), exist_ok=True)
os.makedirs(os.path.join(validation_path, 'FAKE'), exist_ok=True)

# Step 5: Move 10,000 images from train/REAL to validation/REAL
real_train_path = os.path.join(train_path, 'REAL')
real_validation_path = os.path.join(validation_path, 'REAL')
real_images = os.listdir(real_train_path)

for img in real_images[:10000]:  # Select the first 10,000 images
    shutil.move(os.path.join(real_train_path, img), os.path.join(real_validation_path, img))

# Step 6: Move 10,000 images from train/FAKE to validation/FAKE
fake_train_path = os.path.join(train_path, 'FAKE')
fake_validation_path = os.path.join(validation_path, 'FAKE')
fake_images = os.listdir(fake_train_path)

for img in fake_images[:10000]:  # Select the first 10,000 images
    shutil.move(os.path.join(fake_train_path, img), os.path.join(fake_validation_path, img))

print("Dataset has been split successfully:")
print(f"Train folder contains REAL: {len(os.listdir(real_train_path))} images, FAKE: {len(os.listdir(fake_train_path))} images.")
print(f"Validation folder contains REAL: {len(os.listdir(real_validation_path))} images, FAKE: {len(os.listdir(fake_validation_path))} images.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Unzipping dataset, this may take a while...
Unzipping complete.
Dataset has been split successfully:
Train folder contains REAL: 40000 images, FAKE: 40000 images.
Validation folder contains REAL: 10000 images, FAKE: 10000 images.


In [None]:
# 2. Convolutional Neural Network (CNN)

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint
import os

# Load the datasets from the "train", "validation", and "test" folders
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/dataset/train',
    image_size=(32, 32),
    batch_size=32
)

validation_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/dataset/validation',
    image_size=(32, 32),
    batch_size=32
)

test_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/content/dataset/test',
    image_size=(32, 32),
    batch_size=32
)

# Define the CNN model architecture based on the paper
model = models.Sequential([
    layers.Rescaling(1./255, input_shape=(32, 32, 3)),

    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # binary classification (REAL vs FAKE)
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# ModelCheckpoint callback to save the best model based on validation loss
checkpoint_path = "best_model.keras"
checkpoint = ModelCheckpoint(filepath=checkpoint_path,
                             monitor='val_loss',
                             save_best_only=True,
                             verbose=1)

# Train the model
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=30,
    callbacks=[checkpoint]
)

# Evaluate the model on the test dataset
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

# Check if model achieved at least 92% accuracy on test set
if test_accuracy >= 0.92:
    print("The model achieved the desired accuracy of 92% or above on the test dataset.")
else:
    print("The model did not achieve the desired accuracy. Consider tuning hyperparameters or model architecture.")

Found 80000 files belonging to 2 classes.
Found 20000 files belonging to 2 classes.
Found 20000 files belonging to 2 classes.
Epoch 1/30


  super().__init__(**kwargs)


[1m2494/2500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.7900 - loss: 0.4366
Epoch 1: val_loss improved from inf to 0.25911, saving model to best_model.keras
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - accuracy: 0.7902 - loss: 0.4364 - val_accuracy: 0.8917 - val_loss: 0.2591
Epoch 2/30
[1m2497/2500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.8942 - loss: 0.2516
Epoch 2: val_loss improved from 0.25911 to 0.25224, saving model to best_model.keras
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - accuracy: 0.8942 - loss: 0.2515 - val_accuracy: 0.8928 - val_loss: 0.2522
Epoch 3/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9114 - loss: 0.2141
Epoch 3: val_loss improved from 0.25224 to 0.18804, saving model to best_model.keras
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 4ms/step - accura