# GAN Discriminator for AI vs Real Image Detection

This notebook implements a **DCGAN-style Discriminator** to classify images as Real vs AI.
It also includes system performance monitoring (CPU, RAM, Disk I/O) during training.

## 1. Imports and Setup

In [None]:
!pip install datasets pandas pyarrow psutil matplotlib

In [None]:
import os
import time
import psutil
import threading
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from datasets import load_dataset
from PIL import Image
import io

# Check for GPU
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

## 2. System Performance Monitoring
We use a background thread to log CPU, RAM, and Disk usage during training.

In [None]:
class SystemMonitor:
    def __init__(self, interval=1.0):
        self.interval = interval
        self.stop_event = threading.Event()
        self.history = {
            'timestamp': [],
            'cpu_percent': [],
            'ram_percent': [],
            'disk_read': [],
            'disk_write': []
        }
        self.thread = threading.Thread(target=self._monitor_loop)

    def _monitor_loop(self):
        # Initial disk counters
        last_disk = psutil.disk_io_counters()
        start_time = time.time()
        
        while not self.stop_event.is_set():
            current_time = time.time() - start_time
            cpu = psutil.cpu_percent(interval=None)
            ram = psutil.virtual_memory().percent
            
            current_disk = psutil.disk_io_counters()
            disk_read = (current_disk.read_bytes - last_disk.read_bytes) / 1024 / 1024 # MB
            disk_write = (current_disk.write_bytes - last_disk.write_bytes) / 1024 / 1024 # MB
            last_disk = current_disk
            
            self.history['timestamp'].append(current_time)
            self.history['cpu_percent'].append(cpu)
            self.history['ram_percent'].append(ram)
            self.history['disk_read'].append(disk_read)
            self.history['disk_write'].append(disk_write)
            
            time.sleep(self.interval)

    def start(self):
        self.stop_event.clear()
        self.thread = threading.Thread(target=self._monitor_loop) # Recreate thread if restarted
        self.thread.start()
        print("System monitoring started...")

    def stop(self):
        self.stop_event.set()
        self.thread.join()
        print("System monitoring stopped.")
        
    def plot(self):
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
        
        # CPU & RAM
        ax1.plot(self.history['timestamp'], self.history['cpu_percent'], label='CPU %')
        ax1.plot(self.history['timestamp'], self.history['ram_percent'], label='RAM %')
        ax1.set_title('CPU & RAM Usage')
        ax1.set_xlabel('Time (s)')
        ax1.set_ylabel('Percentage')
        ax1.legend()
        ax1.grid(True)
        
        # Disk I/O
        ax2.plot(self.history['timestamp'], self.history['disk_read'], label='Disk Read (MB)')
        ax2.plot(self.history['timestamp'], self.history['disk_write'], label='Disk Write (MB)')
        ax2.set_title('Disk I/O (MB per interval)')
        ax2.set_xlabel('Time (s)')
        ax2.set_ylabel('MB')
        ax2.legend()
        ax2.grid(True)
        
        plt.show()

# Create monitor instance
monitor = SystemMonitor(interval=1.0)

## 3. Configuration & Data Loading

In [None]:
# Paths
DATA_FILES = {
    "train": "/storage/AIGeneratedImages_Midjourney/data/train-*.parquet",
    "validation": "/storage/AIGeneratedImages_Midjourney/data/validation-*.parquet",
    "test": "/storage/AIGeneratedImages_Midjourney/data/test-*.parquet",
}

# Hyperparameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 64
LEARNING_RATE = 0.0002 # Standard GAN LR
BETA_1 = 0.5 # Standard GAN Beta
NUM_EPOCHS = 10
SEED = 42

In [None]:
print("Loading dataset from parquet...")
dataset = load_dataset(
    "parquet",
    data_files=DATA_FILES
)

def process_example(example):
    from PIL import Image
    import io
    img_data = example['image']
    try:
        if isinstance(img_data, bytes):
             image = Image.open(io.BytesIO(img_data))
        elif isinstance(img_data, dict) and 'bytes' in img_data:
             image = Image.open(io.BytesIO(img_data['bytes']))
        else:
             image = img_data
        
        if not isinstance(image, Image.Image):
             if isinstance(image, str):
                 image = Image.open(image)
    except Exception as e:
        image = Image.new('RGB', IMG_SIZE)
    
    image = image.convert("RGB").resize(IMG_SIZE)
    # Scale to [0, 1]
    img_array = np.array(image) / 255.0
    return img_array.astype(np.float32), example['label']

def tf_data_generator(split_name):
    def generator():
        for example in dataset[split_name]:
            yield process_example(example)
    return generator

def create_tf_dataset(split_name):
    return tf.data.Dataset.from_generator(
        tf_data_generator(split_name),
        output_signature=(
            tf.TensorSpec(shape=(IMG_SIZE[0], IMG_SIZE[1], 3), dtype=tf.float32),
            tf.TensorSpec(shape=(), dtype=tf.int64)
        )
    )

train_ds = create_tf_dataset('train')
val_ds = create_tf_dataset('validation')
test_ds = create_tf_dataset('test')

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.shuffle(1000).batch(BATCH_SIZE).prefetch(AUTOTUNE)
val_ds = val_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

## 4. Model Architecture: DCGAN Discriminator
We use a standard DCGAN discriminator structure:
- Convolutional Layers with Stride 2 (Downsampling)
- LeakyReLU Activation
- Dropout

In [None]:
def build_discriminator():
    model = models.Sequential(name="DCGAN_Discriminator")
    
    # Input Layer (224x224x3)
    model.add(layers.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
    
    # Block 1
    model.add(layers.Conv2D(32, (3, 3), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.25))
    
    # Block 2
    model.add(layers.Conv2D(64, (3, 3), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.25))
    
    # Block 3
    model.add(layers.Conv2D(128, (3, 3), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.25))
    
    # Block 4
    model.add(layers.Conv2D(256, (3, 3), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.25))

    # Block 5 (Optional deep block for large images)
    model.add(layers.Conv2D(512, (3, 3), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.25))
    
    # Classifier Head
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(1, activation='sigmoid'))
    
    return model

model = build_discriminator()
model.summary()

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE, beta_1=BETA_1),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.Precision(name='precision'), keras.metrics.Recall(name='recall')]
)

## 5. Training with Monitoring

In [None]:
# Custom Callback to stop/start monitor (optional integration style)
# But since we have a thread, we can just start it before fit and stop after.

monitor.start()

try:
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=NUM_EPOCHS,
        callbacks=[
            keras.callbacks.ModelCheckpoint("gan_discriminator.keras", save_best_only=True),
            keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
        ]
    )
finally:
    monitor.stop()

In [None]:
# Visualize System Performance
monitor.plot()

## 6. Evaluation

In [None]:
print("Evaluating on Test Set...")
results = model.evaluate(test_ds)
print(f"Test Accuracy: {results[1]:.4f}")
print(f"Test Precision: {results[2]:.4f}")
print(f"Test Recall: {results[3]:.4f}")