In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("birdy654/cifake-real-and-ai-generated-synthetic-images")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/birdy654/cifake-real-and-ai-generated-synthetic-images?dataset_version_number=3...


100%|██████████| 105M/105M [00:05<00:00, 18.6MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images/versions/3


Prepare Dataset

In [2]:
%cd /root/.cache/kagglehub/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images/versions/3

/root/.cache/kagglehub/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images/versions/3


In [3]:
!ls

test  train


In [4]:
import os
import shutil
from pathlib import Path

# Define paths
base_dir = Path('.')  # The current directory
train_dir = base_dir / 'train'
validation_dir = base_dir / 'validation'

# Create validation directory and subfolders if they don't exist
validation_dir.mkdir(exist_ok=True)
(validation_dir / 'REAL').mkdir(exist_ok=True)
(validation_dir / 'FAKE').mkdir(exist_ok=True)

# Function to move a specified number of files
def move_files(source_dir, target_dir, num_files):
    files = list(source_dir.glob('*'))  # Get all files in source_dir
    files_to_move = files[:num_files]  # Select the first num_files

    for file in files_to_move:
        shutil.move(str(file), str(target_dir))

# Move 10,000 images from each subfolder in train to the validation subfolders
move_files(train_dir / 'REAL', validation_dir / 'REAL', 10000)
move_files(train_dir / 'FAKE', validation_dir / 'FAKE', 10000)

print("Files moved successfully!")

Files moved successfully!


In [5]:
!ls

test  train  validation


In [6]:
import os

print(len(os.listdir("train/REAL")))
print(len(os.listdir("train/FAKE")))
print(len(os.listdir("validation/REAL")))
print(len(os.listdir("validation/FAKE")))

40000
40000
10000
10000


In [13]:
# prompt: Use the image_dataset_from_directory() function to
# load data from the “train”, “validation”, and “test” folders.

import tensorflow as tf

# Define image size and batch size
IMG_WIDTH = 128
IMG_HEIGHT = 128
image_size = (IMG_WIDTH, IMG_HEIGHT)
BATCH_SIZE = 128

# Load datasets using image_dataset_from_directory
train_ds = tf.keras.utils.image_dataset_from_directory(
    "train",
    labels='inferred',
    label_mode='binary',
    image_size=image_size,
    interpolation='nearest',
    batch_size=BATCH_SIZE,
    shuffle=True
)

validation_ds = tf.keras.utils.image_dataset_from_directory(
    "validation",
    labels='inferred',
    label_mode='binary',
    image_size=image_size,
    interpolation='nearest',
    batch_size=BATCH_SIZE,
    shuffle=False
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    "test",
    labels='inferred',
    label_mode='binary',
    image_size=image_size,
    interpolation='nearest',
    batch_size=BATCH_SIZE,
    shuffle=False
)

Found 80000 files belonging to 2 classes.
Found 20000 files belonging to 2 classes.
Found 20000 files belonging to 2 classes.


In [24]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import EfficientNetB0

def create_real_or_fake_model():

    base_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(128, 128, 3))
    #base_model.trainable = False  # Freeze the base model

    model = models.Sequential([
        # Pretrained EfficientNetB0 model
        base_model,

        # Global Average Pooling (reduces the spatial dimensions of the output)
        layers.GlobalAveragePooling2D(),

        # Dropout layer to prevent overfitting
        layers.Dropout(0.2),

        # Fully connected (Dense) layers
        layers.Dense(64, activation='relu'),

        # Output layer for binary classification
        layers.Dense(1, activation='sigmoid')  # Sigmoid activation for binary output
    ])

    # Compile the model
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model

# Instantiate the model
model = create_real_or_fake_model()
#model.summary()


In [25]:
# prompt: train the model with train and validation sets save the best model into drive.

# Define callbacks for saving the best model
checkpoint_filepath = 'CIFAKE/bonus_model.keras'  # Specify the path on your Google Drive
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,  # Save the entire model
    monitor='val_accuracy',  # Monitor validation accuracy
    mode='max',  # Save the model with the highest validation accuracy
    save_best_only=True  # Save only the best model
)

# Train the model
epochs = 50  # Adjust the number of epochs as needed
history = model.fit(
    train_ds,
    epochs=epochs,
    validation_data=validation_ds,
    callbacks=[model_checkpoint_callback] # Add the callback to the training process
)

Epoch 1/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 59ms/step - accuracy: 0.9313 - loss: 0.1699 - val_accuracy: 0.9681 - val_loss: 0.0879
Epoch 2/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 43ms/step - accuracy: 0.9743 - loss: 0.0686 - val_accuracy: 0.9639 - val_loss: 0.1057
Epoch 3/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 44ms/step - accuracy: 0.9796 - loss: 0.0530 - val_accuracy: 0.9748 - val_loss: 0.0767
Epoch 4/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 43ms/step - accuracy: 0.9855 - loss: 0.0382 - val_accuracy: 0.9744 - val_loss: 0.0830
Epoch 5/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 43ms/step - accuracy: 0.9879 - loss: 0.0337 - val_accuracy: 0.9732 - val_loss: 0.0777
Epoch 6/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 43ms/step - accuracy: 0.9897 - loss: 0.0277 - val_accuracy: 0.9747 - val_loss: 0.0818
Epoch 7/50
[1m

In [26]:
loss, accuracy = model.evaluate(test_ds)

print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.9838 - loss: 0.0811
Test Loss: 0.09747299551963806
Test Accuracy: 0.9811499714851379
