In [None]:
import pandas as pd


df = pd.read_csv('/Users/aarohiverma/Documents/ai_vs_real/train.csv')
df = df.drop(columns ='Unnamed: 0')

In [None]:
df = df.sample(frac = 1)

In [None]:
df.head()

In [None]:

import tensorflow as tf
from tensorflow.keras.applications import EfficientNetV2B3
from tensorflow.keras.layers import (Dense, GlobalAveragePooling2D, Input, 
                                     Dropout, BatchNormalization)
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.metrics import Recall
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd
import numpy as np
import os  

# ----- Constants -----
IMG_SIZE = 256  
BATCH_SIZE = 64
LEARNING_RATE = 0.000005
EPOCHS = 30  
AUTOTUNE = tf.data.AUTOTUNE
IMAGE_BASE_PATH = ""  

# ----- Load DataFrame -----
# df = pd.read_csv("your_csv_file.csv")  # Uncomment if needed
df["file_name"] = df["file_name"].apply(lambda x: os.path.join(IMAGE_BASE_PATH, x))
file_paths = df["file_name"].values  
labels = df["label"].values.astype(np.float32)

# ----- Train-Test Split -----
train_paths, val_paths, train_labels, val_labels = train_test_split(
    file_paths, labels, test_size=0.2, random_state=42, stratify=labels
)

# ----- Data Augmentation -----
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.2),
    tf.keras.layers.RandomBrightness(0.2),
    tf.keras.layers.RandomContrast(0.2),
])

def load_image(file_path, label, augment=False):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE)) / 255.0
    if augment:
        img = data_augmentation(img)
    return img, label

# ----- TF Datasets -----
train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_dataset = train_dataset.map(lambda x, y: load_image(x, y, True), num_parallel_calls=AUTOTUNE)
train_dataset = train_dataset.shuffle(5000).batch(BATCH_SIZE).prefetch(AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))
val_dataset = val_dataset.map(lambda x, y: load_image(x, y, False), num_parallel_calls=AUTOTUNE)
val_dataset = val_dataset.batch(BATCH_SIZE).prefetch(AUTOTUNE)

# ----- Raw CNN Model (Without Self-Attention) -----
image_input = Input(shape=(IMG_SIZE, IMG_SIZE, 3), name="image_input")
base_model = EfficientNetV2B3(weights="imagenet", include_top=False)(image_input)
image_features = GlobalAveragePooling2D()(base_model)

x = Dense(128, activation="relu", kernel_regularizer=l2(0.0005))(image_features)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
output = Dense(1, activation="sigmoid")(x)

model = Model(inputs=image_input, outputs=output)
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss="binary_crossentropy",
    metrics=["accuracy", Recall(name="recall")]
)

early_stop = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

# ----- Train -----
model.fit(train_dataset, validation_data=val_dataset, epochs=EPOCHS, callbacks=[early_stop])

# ----- Evaluate -----
# Get predictions from validation set
val_images = []
val_labels_list = []
for img_batch, label_batch in val_dataset:
    val_images.append(img_batch)
    val_labels_list.append(label_batch)
val_images = tf.concat(val_images, axis=0)
val_labels = tf.concat(val_labels_list, axis=0)

# Predict
pred_probs = model.predict(val_images, batch_size=BATCH_SIZE)
pred_labels = (pred_probs > 0.5).astype("int32").flatten()

# Metrics
print("Confusion Matrix:")
print(confusion_matrix(val_labels, pred_labels))
print("\nClassification Report:")
print(classification_report(val_labels, pred_labels, digits=4))

