In [3]:
import os
import json

#Load and save kaggle credentials
with open("kaggle.json", "r") as f:
    kaggle_credentials = json.load(f)

os.environ["KAGGLE_USERNAME"] = kaggle_credentials["username"]
os.environ["KAGGLE_KEY"] = kaggle_credentials["key"]

In [3]:
import kaggle

#Download the full dataset
!kaggle datasets download -d alessandrasala79/ai-vs-human-generated-dataset

Dataset URL: https://www.kaggle.com/datasets/alessandrasala79/ai-vs-human-generated-dataset
License(s): apache-2.0
Downloading ai-vs-human-generated-dataset.zip to /content
100% 9.75G/9.76G [01:00<00:00, 255MB/s]
100% 9.76G/9.76G [01:00<00:00, 172MB/s]


In [None]:
!unzip ai-vs-human-generated-dataset.zip -d ai_vs_human_dataset

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: ai_vs_human_dataset/train_data/efd938a800124cf69ec656cd2d518fa5.jpg  
  inflating: ai_vs_human_dataset/train_data/efd97dc6cf2d44b4b635b20415c516c1.jpg  
  inflating: ai_vs_human_dataset/train_data/efd9d70912ee498485cabe62c2a9943e.jpg  
  inflating: ai_vs_human_dataset/train_data/efd9facc4ca04695ad2710f43a33219d.jpg  
  inflating: ai_vs_human_dataset/train_data/efda2fdd29ae4c19ac1ecf6c1a83166b.jpg  
  inflating: ai_vs_human_dataset/train_data/efdc29bae2ba4847aa23f5549ce3e89b.jpg  
  inflating: ai_vs_human_dataset/train_data/efdcd2eab54d4c6f87b6cb25dbaf139a.jpg  
  inflating: ai_vs_human_dataset/train_data/efdceffbac1e445e987ac4e06d8775ec.jpg  
  inflating: ai_vs_human_dataset/train_data/efde74f673c341e09ab5184fe29759da.jpg  
  inflating: ai_vs_human_dataset/train_data/efdf66dfcf4d4e53b7fc9b55cd563d0d.jpg  
  inflating: ai_vs_human_dataset/train_data/efdfd51607ed4ff8ad14ffeeb798f977.jpg  
  inflating: ai_vs_hum

In [1]:
import pandas as pd

#Load the data using pandas
train_df = pd.read_csv('ai_vs_human_dataset/train.csv')

print("Train data size: ",train_df.shape)

Train data size:  (79950, 3)


In [2]:
train_df.head()

Unnamed: 0.1,Unnamed: 0,file_name,label
0,0,train_data/a6dcb93f596a43249135678dfcfc17ea.jpg,1
1,1,train_data/041be3153810433ab146bc97d5af505c.jpg,0
2,2,train_data/615df26ce9494e5db2f70e57ce7a3a4f.jpg,1
3,3,train_data/8542fe161d9147be8e835e50c0de39cd.jpg,0
4,4,train_data/5d81fa12bc3b4cea8c94a6700a477cf2.jpg,1


In [3]:
train_df = train_df[["file_name", "label"]]
train_df.head()

Unnamed: 0,file_name,label
0,train_data/a6dcb93f596a43249135678dfcfc17ea.jpg,1
1,train_data/041be3153810433ab146bc97d5af505c.jpg,0
2,train_data/615df26ce9494e5db2f70e57ce7a3a4f.jpg,1
3,train_data/8542fe161d9147be8e835e50c0de39cd.jpg,0
4,train_data/5d81fa12bc3b4cea8c94a6700a477cf2.jpg,1


In [4]:
import os

#Check how many unique labels exist
print(train_df['label'].value_counts())

label
1    39975
0    39975
Name: count, dtype: int64


In [5]:
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import torch

#Define a dataset for our training data
class AIDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx, 0]
        img_path = os.path.join(self.root_dir, img_name)
        img_path = img_path.replace("train_data/train_data", "train_data")

        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Image file not found: {img_path}")

        image = Image.open(img_path).convert("RGB")
        label = int(self.dataframe.iloc[idx, 1])

        if self.transform:
            image = self.transform(image)

        return image, label

#Define transformations
transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(5),
    transforms.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.02),
    transforms.RandomAffine(degrees=5, translate=(0.02, 0.02), scale=(0.98, 1.02)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_root_dir = "ai_vs_human_dataset"

#Create dataset and dataloader
train_dataset = AIDataset(train_df, root_dir=train_root_dir, transform=transform)

sample_image, sample_label = train_dataset[0]
print(f"Sample Image Shape: {sample_image.shape}, Label: {sample_label}")

Sample Image Shape: torch.Size([3, 224, 224]), Label: 1


In [6]:
import torch.nn as nn
import timm

#Load pretrained CoAtNet-2
model = timm.create_model('coatnet_2_rw_224', pretrained=True)

model.global_pool = nn.AdaptiveAvgPool2d(1)

num_ftrs = model.head.in_features

model.head = nn.Sequential(
    nn.AdaptiveAvgPool2d(1),
    nn.Flatten(),
    nn.LayerNorm(num_ftrs),
    nn.Dropout(0.1),
    nn.Linear(num_ftrs, 512),
    nn.ReLU(),
    nn.Linear(512, 2)
)

#Use CUDA to utilize GPU training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

print(model)

MaxxVit(
  (stem): Stem(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (norm1): BatchNormAct2d(
      64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )
  (stages): Sequential(
    (0): MaxxVitStage(
      (blocks): Sequential(
        (0): MbConvBlock(
          (shortcut): Downsample2d(
            (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
            (expand): Identity()
          )
          (pre_norm): BatchNormAct2d(
            128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (down): Identity()
          (conv1_1x1): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm1): BatchNormAct2d(
            512, eps=1e-05

In [7]:
import torch_optimizer as optim

#Define loss function and optimizer
criterion = nn.CrossEntropyLoss(label_smoothing=0.002)
optimizer = optim.Lamb(model.parameters(), lr=2e-4, weight_decay=5e-5)

In [8]:
from sklearn.model_selection import train_test_split

#First, split into 80% train and 20% (validation + test)
train_df, val_test_df = train_test_split(train_df, test_size=0.2, stratify=train_df['label'], random_state=42)

#Now, split the 20% into 10% validation and 10% test
val_df, test_df = train_test_split(val_test_df, test_size=0.5, stratify=val_test_df['label'], random_state=42)

#Print dataset sizes
print(f"Train Size: {train_df.shape}, Validation Size: {val_df.shape}, Test Size: {test_df.shape}")

Train Size: (63960, 2), Validation Size: (7995, 2), Test Size: (7995, 2)


In [9]:
#Check the data distribution
print("Original Dataset Distribution:")
print(train_df['label'].value_counts(normalize=True))
print(val_df['label'].value_counts(normalize=True))
print(test_df['label'].value_counts(normalize=True))

Original Dataset Distribution:
label
0    0.5
1    0.5
Name: proportion, dtype: float64
label
1    0.500063
0    0.499937
Name: proportion, dtype: float64
label
0    0.500063
1    0.499937
Name: proportion, dtype: float64


In [10]:
#Define training and validation datasets
train_dataset = AIDataset(train_df, root_dir="ai_vs_human_dataset", transform=transform)
val_dataset = AIDataset(val_df, root_dir="ai_vs_human_dataset", transform=transform)
test_dataset = AIDataset(test_df, root_dir="ai_vs_human_dataset", transform=transform)

#Define data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=12, prefetch_factor=8, persistent_workers=True, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=6, prefetch_factor=4, persistent_workers=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=6, prefetch_factor=4, persistent_workers=True, pin_memory=True)

#Check if validation data loads correctly
sample_image, sample_label = val_dataset[0]
print(f"Validation Sample Image Shape: {sample_image.shape}, Label: {sample_label}")

Validation Sample Image Shape: torch.Size([3, 224, 224]), Label: 1


In [11]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.backends.cudnn as cudnn
from tqdm import tqdm

#Trains the neural network using mixed precision and OneCycleLR scheduler.
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):

    cudnn.benchmark = True
    scaler = torch.amp.GradScaler("cuda")

    #Use GPU
    model.to(device)

    #Calculate the total number of steps for the scheduler based on epochs and batches
    steps_per_epoch = len(train_loader)
    total_steps = steps_per_epoch * num_epochs

    #Initialize the OneCycleLR scheduler with cosine annealing and learning rate adjustments
    scheduler = optim.lr_scheduler.OneCycleLR(
      optimizer,
      max_lr=2e-4,
      total_steps=total_steps,
      pct_start=0.25,
      anneal_strategy="cos",
      final_div_factor=5
    )

    #Training starts here
    for epoch in range(num_epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        #Progress bar using tqdm
        loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")

        #Iterate over each batch of training data
        for batch_idx, (images, labels) in enumerate(loop):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            #Forward pass using mixed precision autocast
            with torch.amp.autocast("cuda"):
                outputs = model(images)
                loss = criterion(outputs, labels)

            #Backward pass with scaled gradients
            scaler.scale(loss).backward()

            #Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            #Update optimizer with scaled gradients
            scaler.step(optimizer)

            #Update the gradient scaler for next iteration
            scaler.update()

            #Update learning rate using the OneCycleLR scheduler
            scheduler.step()

            #Track training loss and accuracy
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            #Update the progress bar with current loss and accuracy
            loop.set_postfix(loss=running_loss / (loop.n + 1), acc=100. * correct / total)

        #Validation starts here
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0

        with torch.no_grad():
            #Loop through each batch of validation data
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)

                with torch.amp.autocast("cuda"):
                    outputs = model(images)
                    loss = criterion(outputs, labels)

                #Track validation loss and accuracy
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()

        #Calculate average validation loss and accuracy for the epoch
        val_loss /= len(val_loader)
        val_acc = 100. * val_correct / val_total

        #Print validation results
        print(f"📊 Epoch {epoch+1}: Val Loss = {val_loss:.4f}, Val Acc = {val_acc:.2f}%")

    print("Training Complete!")

#Train the model using our train and validation data
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=15)

Epoch 1/15: 100%|██████████| 1999/1999 [10:01<00:00,  3.32batch/s, acc=91.9, loss=0.21] 


📊 Epoch 1: Val Loss = 0.0631, Val Acc = 98.82%


Epoch 2/15: 100%|██████████| 1999/1999 [10:31<00:00,  3.17batch/s, acc=98.6, loss=0.0822]


📊 Epoch 2: Val Loss = 0.0795, Val Acc = 98.39%


Epoch 3/15: 100%|██████████| 1999/1999 [10:29<00:00,  3.17batch/s, acc=98.7, loss=0.0756]


📊 Epoch 3: Val Loss = 0.0839, Val Acc = 98.77%


Epoch 4/15: 100%|██████████| 1999/1999 [10:32<00:00,  3.16batch/s, acc=98.9, loss=0.0705]


📊 Epoch 4: Val Loss = 0.3693, Val Acc = 94.47%


Epoch 5/15: 100%|██████████| 1999/1999 [10:30<00:00,  3.17batch/s, acc=99.2, loss=0.0526]


📊 Epoch 5: Val Loss = 0.3898, Val Acc = 94.13%


Epoch 6/15: 100%|██████████| 1999/1999 [10:32<00:00,  3.16batch/s, acc=99.4, loss=0.0414]


📊 Epoch 6: Val Loss = 0.0465, Val Acc = 99.25%


Epoch 7/15: 100%|██████████| 1999/1999 [10:31<00:00,  3.17batch/s, acc=99.5, loss=0.0365]


📊 Epoch 7: Val Loss = 0.0421, Val Acc = 99.36%


Epoch 8/15: 100%|██████████| 1999/1999 [10:30<00:00,  3.17batch/s, acc=99.6, loss=0.0307]


📊 Epoch 8: Val Loss = 0.0441, Val Acc = 99.36%


Epoch 9/15: 100%|██████████| 1999/1999 [10:38<00:00,  3.13batch/s, acc=99.7, loss=0.0264]


📊 Epoch 9: Val Loss = 0.0309, Val Acc = 99.60%


Epoch 10/15: 100%|██████████| 1999/1999 [10:35<00:00,  3.15batch/s, acc=99.7, loss=0.022] 


📊 Epoch 10: Val Loss = 0.0310, Val Acc = 99.62%


Epoch 11/15: 100%|██████████| 1999/1999 [10:33<00:00,  3.15batch/s, acc=99.8, loss=0.0185]


📊 Epoch 11: Val Loss = 0.0256, Val Acc = 99.70%


Epoch 12/15: 100%|██████████| 1999/1999 [10:32<00:00,  3.16batch/s, acc=99.9, loss=0.0156]


📊 Epoch 12: Val Loss = 0.0375, Val Acc = 99.50%


Epoch 13/15: 100%|██████████| 1999/1999 [10:30<00:00,  3.17batch/s, acc=99.9, loss=0.0132]


📊 Epoch 13: Val Loss = 0.0293, Val Acc = 99.62%


Epoch 14/15: 100%|██████████| 1999/1999 [10:32<00:00,  3.16batch/s, acc=99.9, loss=0.011] 


📊 Epoch 14: Val Loss = 0.0230, Val Acc = 99.74%


Epoch 15/15: 100%|██████████| 1999/1999 [10:17<00:00,  3.24batch/s, acc=100, loss=0.00971]


📊 Epoch 15: Val Loss = 0.0223, Val Acc = 99.75%
Training Complete!


In [14]:
import torch
from tqdm import tqdm
import numpy as np
from sklearn.metrics import f1_score

def evaluate_test_set(model, test_loader, criterion, device='cuda'):
    model.to(device)
    model.eval()

    test_loss, test_correct, test_total = 0.0, 0, 0
    test_all_labels, test_all_preds = [], []

    with torch.no_grad():
        loop = tqdm(test_loader, desc="Evaluating Test Set", leave=False)
        for images, labels in loop:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            _, predicted = outputs.max(1)

            test_loss += loss.item()
            test_total += labels.size(0)
            test_correct += predicted.eq(labels).sum().item()

            test_all_labels.extend(labels.cpu().numpy())
            test_all_preds.extend(predicted.cpu().numpy())

    test_loss /= len(test_loader)
    test_accuracy = 100. * test_correct / test_total
    test_f1 = f1_score(test_all_labels, test_all_preds, average='weighted')

    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%, Test F1 Score: {test_f1:.4f}")

    torch.cuda.empty_cache()

print("Evaluating Test Set:")
evaluate_test_set(model, test_loader, criterion)

Evaluating Test Set:


                                                                      

Test Loss: 0.0216, Test Accuracy: 99.76%, Test F1 Score: 0.9976
