In [1]:
import pandas as pd
import tensorflow as tf
import tensorflow.keras.layers as tfl
from tensorflow.python.framework import ops
import pandas as pd
import matplotlib.pyplot as plt
import h5py
import scipy
%matplotlib inline
from matplotlib.image import imread
import math
import numpy as np
from PIL import Image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
import os
from tensorflow.python.keras import regularizers

In [2]:
folder = "/kaggle/input/cifake-real-and-ai-generated-synthetic-images/train"

In [3]:
def transform_to_arrays(file):
    fake = []
    real = []
    for dirname, _, filenames in os.walk(file +'/FAKE/'):
        for filename in filenames:
            image = imread(dirname+filename)
            fake.append(np.asarray(image))
    for dirname, _, filenames in os.walk(file +'/REAL/'):
        for filename in filenames:
            image = imread(dirname+filename)
            real.append(np.asarray(image))
            
    return real, fake

In [4]:
real, fake = transform_to_arrays(folder)

In [5]:
real_array = np.asarray(real)
fake_array = np.asarray(fake)
real_array.shape, fake_array.shape

((50000, 32, 32, 3), (50000, 32, 32, 3))

In [6]:
#Getting all data
def custom_split(real_data, fake_data, split_ratio=0.3, num_samples=100000):
    random_indices = np.random.randint(num_samples, size=num_samples)
    dev_samples = int(num_samples * split_ratio)
    train_samples = num_samples - dev_samples
    
    data_shuffle = []
    labels_shuffle = []
    
    # Assuming real_data and fake_data are the input parameters
    labels = [0] * len(real_data) + [1] * len(fake_data)
    data = np.concatenate((real_data, fake_data), axis=0)
    
    for index in random_indices:
        data_shuffle.append(data[index])
        labels_shuffle.append(labels[index])
    
    data_shuffle = np.array(data_shuffle)
    labels_shuffle = np.array(labels_shuffle)
    
    dev_data = data_shuffle[:dev_samples]
    dev_labels = labels_shuffle[:dev_samples]
    
    train_data = data_shuffle[dev_samples:]
    train_labels = labels_shuffle[dev_samples:]
    
    return train_data, train_labels, dev_data, dev_labels


In [7]:
from sklearn.model_selection import train_test_split

X_train, X_dev, y_train, y_dev = train_test_split(np.concatenate([real, fake]), 
                                                  np.concatenate([np.ones(len(real)), np.zeros(len(fake))]),
                                                  test_size=0.2, random_state=42)

In [8]:
#Scale Data to make model efficient
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale = 1.0/255.0)
train_it = datagen.flow(X_train, y_train, batch_size=64)
dev_it = datagen.flow(X_dev, y_dev, batch_size=64)

In [9]:
#Model Creation
from tensorflow.keras import layers, Sequential, regularizers

model = Sequential([
    layers.Input(shape=(32, 32, 3)),
    
    # Convolutional layers
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    
    # Flatten layer
    layers.Flatten(),
    
    # Dropout layer
    layers.Dropout(0.2),
    
    # Dense layers
    layers.Dense(256, kernel_regularizer=regularizers.l2(0.01), activation='relu'),
    layers.Dense(1, activation='sigmoid')
])


In [10]:
#Model Compiling
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 15, 15, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 13, 13, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 6, 6, 64)          0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 4, 4, 128)         73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 2, 2, 128)         0