In [1]:
"""
Stage 1: AI vs Real Image Classifier - ConvNeXt-Large
Optimized for ConvNeXt architecture with modern training techniques
"""

import os
import json
import time
from glob import glob
from PIL import Image
import matplotlib.pyplot as plt
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from tqdm import tqdm

torch.backends.cudnn.benchmark = True

# ==========================================
# CONFIGURATION - OPTIMIZED FOR 4GB VRAM
# ==========================================
class Config:
    # Paths
    DATASET_DIR = r"E:\AI\AI vs Real Dataset"
    AI_DIR = os.path.join(DATASET_DIR, "AI")
    REAL_DIR = os.path.join(DATASET_DIR, "Real")
    
    # Model - ConvNeXt-Tiny (28M params vs 197M for Large)
    MODEL_NAME = "convnext_tiny"  # Much lighter!
    NUM_CLASSES = 2
    IMG_SIZE = 224
    
    # Training - Optimized for 4GB VRAM
    BATCH_SIZE = 16  # Reduced for memory
    EPOCHS = 30
    LEARNING_RATE = 5e-5
    WEIGHT_DECAY = 0.05
    DROPOUT_RATE = 0.3  # Reduced dropout
    LABEL_SMOOTHING = 0.1
    STOCHASTIC_DEPTH = 0.1
    
    # Data split
    TEST_SIZE = 0.2
    VAL_SIZE = 0.1
    
    # System
    NUM_WORKERS = 0
    PIN_MEMORY = True
    PERSISTENT_WORKERS = False
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Checkpointing
    SAVE_DIR = "checkpoints_convnext_tiny"
    
    # Early stopping
    PATIENCE = 7
    
    # Mixed precision training
    USE_AMP = True  # Critical for 4GB VRAM!

config = Config()
os.makedirs(config.SAVE_DIR, exist_ok=True)

print("="*60)
print("STAGE 1: AI vs REAL - ConvNeXt-Tiny (4GB VRAM Optimized)")
print("="*60)
print(f"Device: {config.DEVICE}")
print(f"Model: {config.MODEL_NAME}")
print(f"Image Size: {config.IMG_SIZE}x{config.IMG_SIZE}")
print(f"Batch Size: {config.BATCH_SIZE}")
print(f"Learning Rate: {config.LEARNING_RATE}")
print(f"Weight Decay: {config.WEIGHT_DECAY}")
print(f"Dropout: {config.DROPOUT_RATE}")
print(f"Stochastic Depth: {config.STOCHASTIC_DEPTH}")
print(f"Mixed Precision: {config.USE_AMP}")
print("="*60)

# ==========================================
# DATA COLLECTION
# ==========================================
def collect_images_fast(root_dir, max_images=100000):
    print(f"\nScanning {os.path.basename(root_dir)}...")
    valid_extensions = {'.png', '.jpg', '.jpeg', '.bmp', '.webp'}
    exclude_keywords = ['metadata', 'label', 'annotation']
    all_images = []
    
    print("  Searching for images recursively...")
    for ext in valid_extensions:
        pattern = os.path.join(root_dir, '**', f'*{ext}')
        files = glob(pattern, recursive=True)
        all_images.extend(files)
    
    filtered_images = []
    for img_path in all_images:
        filename = os.path.basename(img_path).lower()
        if not any(keyword in filename for keyword in exclude_keywords):
            filtered_images.append(img_path)
    
    print(f"  Found {len(all_images):,} total files")
    print(f"  After filtering: {len(filtered_images):,} images")
    
    if len(filtered_images) > max_images:
        print(f"  Sampling {max_images:,} images...")
        filtered_images = random.sample(filtered_images, max_images)
    
    return filtered_images

# ==========================================
# LOAD DATA
# ==========================================
print("\n" + "="*60)
print("LOADING DATASET")
print("="*60)

ai_images = collect_images_fast(config.AI_DIR, max_images=100000)
real_images = collect_images_fast(config.REAL_DIR, max_images=100000)

print(f"\nFinal counts:")
print(f"  AI:   {len(ai_images):,} images")
print(f"  Real: {len(real_images):,} images")

all_images = ai_images + real_images
labels = [1] * len(ai_images) + [0] * len(real_images)

if len(all_images) == 0:
    raise ValueError("No images found!")

# Shuffle
combined = list(zip(all_images, labels))
random.shuffle(combined)
all_images, labels = zip(*combined)
all_images, labels = list(all_images), list(labels)

# Split
train_imgs, test_imgs, train_lbls, test_lbls = train_test_split(
    all_images, labels, test_size=config.TEST_SIZE, random_state=42, stratify=labels
)
train_imgs, val_imgs, train_lbls, val_lbls = train_test_split(
    train_imgs, train_lbls, test_size=config.VAL_SIZE, random_state=42, stratify=train_lbls
)

print(f"\nDataset split:")
print(f"  Train: {len(train_imgs):,} images")
print(f"  Val:   {len(val_imgs):,} images")
print(f"  Test:  {len(test_imgs):,} images")

# ==========================================
# DATASET
# ==========================================
class FastAIDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        path = self.image_paths[idx]
        try:
            with Image.open(path) as img:
                img = img.convert("RGB")
                if self.transform:
                    img = self.transform(img)
        except:
            img = torch.zeros(3, config.IMG_SIZE, config.IMG_SIZE)
        label = self.labels[idx]
        return img, label

# ==========================================
# DATA AUGMENTATION - CONVNEXT OPTIMIZED
# ==========================================
# ConvNeXt uses stronger augmentation strategies
train_transform = transforms.Compose([
    transforms.Resize((config.IMG_SIZE, config.IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.1),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.RandomPerspective(distortion_scale=0.2, p=0.3),
    transforms.RandomGrayscale(p=0.05),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.25, scale=(0.02, 0.2), ratio=(0.3, 3.3))
])

val_transform = transforms.Compose([
    transforms.Resize((config.IMG_SIZE, config.IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# ==========================================
# DATALOADERS
# ==========================================
train_dataset = FastAIDataset(train_imgs, train_lbls, transform=train_transform)
val_dataset = FastAIDataset(val_imgs, val_lbls, transform=val_transform)
test_dataset = FastAIDataset(test_imgs, test_lbls, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True,
                          num_workers=config.NUM_WORKERS, pin_memory=config.PIN_MEMORY)
val_loader = DataLoader(val_dataset, batch_size=config.BATCH_SIZE, shuffle=False,
                        num_workers=config.NUM_WORKERS, pin_memory=config.PIN_MEMORY)
test_loader = DataLoader(test_dataset, batch_size=config.BATCH_SIZE, shuffle=False,
                         num_workers=config.NUM_WORKERS, pin_memory=config.PIN_MEMORY)

print(f"\nDataloaders: Train={len(train_loader)}, Val={len(val_loader)}, Test={len(test_loader)}")

# ==========================================
# CONVNEXT MODEL WITH CUSTOM HEAD
# ==========================================
class ConvNeXtClassifier(nn.Module):
    def __init__(self, num_classes=2, dropout_rate=0.3, model_size='tiny'):
        super().__init__()
        
        # Load appropriate ConvNeXt model
        if model_size == 'tiny':
            self.backbone = models.convnext_tiny(weights=models.ConvNeXt_Tiny_Weights.IMAGENET1K_V1)
        elif model_size == 'small':
            self.backbone = models.convnext_small(weights=models.ConvNeXt_Small_Weights.IMAGENET1K_V1)
        else:
            raise ValueError(f"Unknown model size: {model_size}")
        
        # Get the input features from the classifier
        in_features = self.backbone.classifier[2].in_features
        
        # Replace classifier with custom head
        # ConvNeXt output is [B, C, 1, 1], need to flatten first
        self.backbone.classifier = nn.Sequential(
            nn.Flatten(1),  # Flatten spatial dimensions first
            nn.LayerNorm(in_features),
            nn.Dropout(dropout_rate),
            nn.Linear(in_features, 512),  # Reduced from 768
            nn.GELU(),
            nn.LayerNorm(512),
            nn.Dropout(dropout_rate * 0.5),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        return self.backbone(x)

print("\n" + "="*60)
print("CREATING CONVNEXT-TINY MODEL (4GB VRAM OPTIMIZED)")
print("="*60)

model = ConvNeXtClassifier(
    num_classes=config.NUM_CLASSES, 
    dropout_rate=config.DROPOUT_RATE,
    model_size='tiny'  # Can change to 'small' if you have headroom
)
model = model.to(config.DEVICE)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Model size: ~{total_params * 4 / (1024**2):.2f} MB (FP32)")
print(f"With AMP: ~{total_params * 2 / (1024**2):.2f} MB (FP16)")

# ==========================================
# LABEL SMOOTHING LOSS
# ==========================================
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, smoothing=0.1):
        super().__init__()
        self.smoothing = smoothing
    
    def forward(self, pred, target):
        confidence = 1.0 - self.smoothing
        logprobs = F.log_softmax(pred, dim=-1)
        nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
        nll_loss = nll_loss.squeeze(1)
        smooth_loss = -logprobs.mean(dim=-1)
        loss = confidence * nll_loss + self.smoothing * smooth_loss
        return loss.mean()

criterion = LabelSmoothingCrossEntropy(smoothing=config.LABEL_SMOOTHING)

# ==========================================
# OPTIMIZER - ADAMW FOR CONVNEXT
# ==========================================
# ConvNeXt uses AdamW with layer-wise learning rate decay
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=config.LEARNING_RATE,
    weight_decay=config.WEIGHT_DECAY,
    betas=(0.9, 0.999),
    eps=1e-8
)

# Cosine annealing scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, 
    T_max=config.EPOCHS,
    eta_min=1e-6
)

# Mixed precision scaler
scaler = torch.cuda.amp.GradScaler() if config.USE_AMP else None

# ==========================================
# TRAINING FUNCTIONS
# ==========================================
def train_epoch(model, loader, criterion, optimizer, scheduler, device, epoch, scaler=None):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(loader, desc=f"Epoch {epoch+1}/{config.EPOCHS} [Train]")
    
    for images, labels in pbar:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        
        optimizer.zero_grad()
        
        # Mixed precision training
        if scaler is not None:
            with torch.cuda.amp.autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
        else:
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        pbar.set_postfix({
            'loss': f'{running_loss/(pbar.n+1):.4f}',
            'acc': f'{100.*correct/total:.2f}%',
            'lr': f'{optimizer.param_groups[0]["lr"]:.6f}'
        })
    
    epoch_loss = running_loss / len(loader)
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc

def validate(model, loader, criterion, device, phase="Val"):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    all_probs = []
    
    with torch.no_grad():
        pbar = tqdm(loader, desc=f"{phase}")
        for images, labels in pbar:
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)
            
            # Use AMP for inference too
            if config.USE_AMP:
                with torch.cuda.amp.autocast():
                    outputs = model(images)
                    loss = criterion(outputs, labels)
            else:
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            probs = torch.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs[:, 1].cpu().numpy())
            
            pbar.set_postfix({
                'loss': f'{running_loss/(pbar.n+1):.4f}',
                'acc': f'{100.*correct/total:.2f}%'
            })
    
    epoch_loss = running_loss / len(loader)
    epoch_acc = 100. * correct / total
    auc = roc_auc_score(all_labels, all_probs)
    
    return epoch_loss, epoch_acc, auc, all_preds, all_labels


STAGE 1: AI vs REAL - ConvNeXt-Tiny (4GB VRAM Optimized)
Device: cuda
Model: convnext_tiny
Image Size: 224x224
Batch Size: 16
Learning Rate: 5e-05
Weight Decay: 0.05
Dropout: 0.3
Stochastic Depth: 0.1
Mixed Precision: True

LOADING DATASET

Scanning AI...
  Searching for images recursively...
  Found 1,479,354 total files
  After filtering: 1,479,354 images
  Sampling 100,000 images...

Scanning Real...
  Searching for images recursively...
  Found 1,017,384 total files
  After filtering: 1,017,384 images
  Sampling 100,000 images...

Final counts:
  AI:   100,000 images
  Real: 100,000 images

Dataset split:
  Train: 144,000 images
  Val:   16,000 images
  Test:  40,000 images

Dataloaders: Train=9000, Val=1000, Test=2500

CREATING CONVNEXT-TINY MODEL (4GB VRAM OPTIMIZED)


Downloading: "https://download.pytorch.org/models/convnext_tiny-983f1562.pth" to C:\Users\-/.cache\torch\hub\checkpoints\convnext_tiny-983f1562.pth
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 109M/109M [01:01<00:00, 1.85MB/s] 


Total parameters: 28,215,906
Trainable parameters: 28,215,906
Model size: ~107.64 MB (FP32)
With AMP: ~53.82 MB (FP16)


  scaler = torch.cuda.amp.GradScaler() if config.USE_AMP else None


In [None]:

# ==========================================
# TRAINING LOOP
# ==========================================
print("\n" + "="*60)
print("STARTING TRAINING")
print("="*60)

history = {
    'train_loss': [], 'train_acc': [],
    'val_loss': [], 'val_acc': [], 'val_auc': []
}

best_val_acc = 0.0
best_val_auc = 0.0
patience_counter = 0
start_time = time.time()

for epoch in range(config.EPOCHS):
    print(f"\n{'='*60}")
    print(f"Epoch {epoch+1}/{config.EPOCHS}")
    print(f"{'='*60}")
    
    train_loss, train_acc = train_epoch(
        model, train_loader, criterion, optimizer, scheduler, config.DEVICE, epoch, scaler
    )
    
    val_loss, val_acc, val_auc, _, _ = validate(
        model, val_loader, criterion, config.DEVICE, phase="Val"
    )
    
    # Step scheduler
    scheduler.step()
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    history['val_auc'].append(val_auc)
    
    # Calculate generalization gap
    gen_gap = train_acc - val_acc
    
    print(f"\nSummary:")
    print(f"  Train: Loss={train_loss:.4f}, Acc={train_acc:.2f}%")
    print(f"  Val:   Loss={val_loss:.4f}, Acc={val_acc:.2f}%, AUC={val_auc:.4f}")
    print(f"  Generalization Gap: {gen_gap:.2f}%")
    
    # Save best model
    if val_auc > best_val_auc:
        best_val_acc = val_acc
        best_val_auc = val_auc
        patience_counter = 0
        
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'val_acc': val_acc,
            'val_auc': val_auc,
            'gen_gap': gen_gap,
            'config': {
                'model_name': config.MODEL_NAME,
                'img_size': config.IMG_SIZE,
                'num_classes': config.NUM_CLASSES
            }
        }
        
        torch.save(checkpoint, os.path.join(config.SAVE_DIR, 'best_model.pth'))
        print(f"  ‚úì Best model saved! (Val AUC: {val_auc:.4f}, Acc: {val_acc:.2f}%)")
    else:
        patience_counter += 1
        print(f"  No improvement ({patience_counter}/{config.PATIENCE})")
    
    if patience_counter >= config.PATIENCE:
        print(f"\nEarly stopping at epoch {epoch+1}")
        break

training_time = time.time() - start_time
print(f"\n{'='*60}")
print(f"Training Complete!")
print(f"Time: {training_time/60:.2f} minutes")
print(f"Best Val Acc: {best_val_acc:.2f}%")
print(f"Best Val AUC: {best_val_auc:.4f}")
print(f"{'='*60}")



STARTING TRAINING

Epoch 1/30


  with torch.cuda.amp.autocast():
Epoch 1/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [57:14<00:00,  2.62it/s, loss=0.5298, acc=76.25%, lr=0.000050]  
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:40<00:00,  4.54it/s, loss=0.4061, acc=86.38%]



Summary:
  Train: Loss=0.5298, Acc=76.25%
  Val:   Loss=0.4061, Acc=86.38%, AUC=0.9393
  Generalization Gap: -10.13%
  ‚úì Best model saved! (Val AUC: 0.9393, Acc: 86.38%)

Epoch 2/30


  with torch.cuda.amp.autocast():
Epoch 2/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [56:43<00:00,  2.64it/s, loss=0.4482, acc=83.30%, lr=0.000050]
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [04:10<00:00,  3.99it/s, loss=0.3743, acc=88.74%]



Summary:
  Train: Loss=0.4482, Acc=83.30%
  Val:   Loss=0.3743, Acc=88.74%, AUC=0.9621
  Generalization Gap: -5.44%
  ‚úì Best model saved! (Val AUC: 0.9621, Acc: 88.74%)

Epoch 3/30


  with torch.cuda.amp.autocast():
Epoch 3/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [4:56:35<00:00,  1.98s/it, loss=0.4187, acc=85.57%, lr=0.000049]       
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:34<00:00,  4.67it/s, loss=0.3605, acc=89.88%]



Summary:
  Train: Loss=0.4187, Acc=85.57%
  Val:   Loss=0.3605, Acc=89.88%, AUC=0.9640
  Generalization Gap: -4.31%
  ‚úì Best model saved! (Val AUC: 0.9640, Acc: 89.88%)

Epoch 4/30


  with torch.cuda.amp.autocast():
Epoch 4/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [15:32:10<00:00,  6.21s/it, loss=0.4023, acc=86.73%, lr=0.000049]        
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:34<00:00,  4.66it/s, loss=0.3663, acc=89.87%]



Summary:
  Train: Loss=0.4023, Acc=86.73%
  Val:   Loss=0.3663, Acc=89.87%, AUC=0.9638
  Generalization Gap: -3.13%
  No improvement (1/7)

Epoch 5/30


  with torch.cuda.amp.autocast():
Epoch 5/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [55:22<00:00,  2.71it/s, loss=0.3881, acc=87.91%, lr=0.000048]
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [05:09<00:00,  3.23it/s, loss=0.3366, acc=91.70%]



Summary:
  Train: Loss=0.3881, Acc=87.91%
  Val:   Loss=0.3366, Acc=91.70%, AUC=0.9728
  Generalization Gap: -3.79%
  ‚úì Best model saved! (Val AUC: 0.9728, Acc: 91.70%)

Epoch 6/30


  with torch.cuda.amp.autocast():
Epoch 6/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [5:29:06<00:00,  2.19s/it, loss=0.3776, acc=88.61%, lr=0.000047]       
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:19<00:00,  5.01it/s, loss=0.3328, acc=91.71%]



Summary:
  Train: Loss=0.3776, Acc=88.61%
  Val:   Loss=0.3328, Acc=91.71%, AUC=0.9733
  Generalization Gap: -3.10%
  ‚úì Best model saved! (Val AUC: 0.9733, Acc: 91.71%)

Epoch 7/30


  with torch.cuda.amp.autocast():
Epoch 7/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [1:52:08<00:00,  1.34it/s, loss=0.3683, acc=89.26%, lr=0.000045]     
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:27<00:00,  4.81it/s, loss=0.3885, acc=89.42%]



Summary:
  Train: Loss=0.3683, Acc=89.26%
  Val:   Loss=0.3885, Acc=89.42%, AUC=0.9686
  Generalization Gap: -0.17%
  No improvement (1/7)

Epoch 8/30


  with torch.cuda.amp.autocast():
Epoch 8/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [3:02:34<00:00,  1.22s/it, loss=0.3612, acc=89.83%, lr=0.000044]      
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:21<00:00,  4.96it/s, loss=0.3376, acc=91.67%]



Summary:
  Train: Loss=0.3612, Acc=89.83%
  Val:   Loss=0.3376, Acc=91.67%, AUC=0.9733
  Generalization Gap: -1.84%
  ‚úì Best model saved! (Val AUC: 0.9733, Acc: 91.67%)

Epoch 9/30


  with torch.cuda.amp.autocast():
Epoch 9/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [17:51:50<00:00,  7.15s/it, loss=0.3537, acc=90.36%, lr=0.000042]         
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:23<00:00,  4.92it/s, loss=0.3248, acc=92.71%]



Summary:
  Train: Loss=0.3537, Acc=90.36%
  Val:   Loss=0.3248, Acc=92.71%, AUC=0.9762
  Generalization Gap: -2.35%
  ‚úì Best model saved! (Val AUC: 0.9762, Acc: 92.71%)

Epoch 10/30


  with torch.cuda.amp.autocast():
Epoch 10/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [2:49:33<00:00,  1.13s/it, loss=0.3457, acc=91.00%, lr=0.000040]      
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:20<00:00,  4.98it/s, loss=0.3311, acc=92.52%]



Summary:
  Train: Loss=0.3457, Acc=91.00%
  Val:   Loss=0.3311, Acc=92.52%, AUC=0.9768
  Generalization Gap: -1.52%
  ‚úì Best model saved! (Val AUC: 0.9768, Acc: 92.52%)

Epoch 11/30


  with torch.cuda.amp.autocast():
Epoch 11/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [2:57:09<00:00,  1.18s/it, loss=0.3396, acc=91.44%, lr=0.000038]       
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:20<00:00,  4.99it/s, loss=0.3477, acc=91.72%]



Summary:
  Train: Loss=0.3396, Acc=91.44%
  Val:   Loss=0.3477, Acc=91.72%, AUC=0.9723
  Generalization Gap: -0.28%
  No improvement (1/7)

Epoch 12/30


  with torch.cuda.amp.autocast():
Epoch 12/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [11:45:05<00:00,  4.70s/it, loss=0.3338, acc=91.92%, lr=0.000035]        
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:18<00:00,  5.03it/s, loss=0.3592, acc=91.56%]



Summary:
  Train: Loss=0.3338, Acc=91.92%
  Val:   Loss=0.3592, Acc=91.56%, AUC=0.9732
  Generalization Gap: 0.37%
  No improvement (2/7)

Epoch 13/30


  with torch.cuda.amp.autocast():
Epoch 13/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [9:16:22<00:00,  3.71s/it, loss=0.3281, acc=92.38%, lr=0.000033]       
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:27<00:00,  4.82it/s, loss=0.3757, acc=90.58%]



Summary:
  Train: Loss=0.3281, Acc=92.38%
  Val:   Loss=0.3757, Acc=90.58%, AUC=0.9697
  Generalization Gap: 1.80%
  No improvement (3/7)

Epoch 14/30


  with torch.cuda.amp.autocast():
Epoch 14/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [2:53:07<00:00,  1.15s/it, loss=0.3245, acc=92.77%, lr=0.000031]       
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:19<00:00,  5.00it/s, loss=0.3349, acc=92.45%]



Summary:
  Train: Loss=0.3245, Acc=92.77%
  Val:   Loss=0.3349, Acc=92.45%, AUC=0.9774
  Generalization Gap: 0.32%
  ‚úì Best model saved! (Val AUC: 0.9774, Acc: 92.45%)

Epoch 15/30


  with torch.cuda.amp.autocast():
Epoch 15/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [11:56:53<00:00,  4.78s/it, loss=0.3171, acc=93.22%, lr=0.000028]        
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:21<00:00,  4.95it/s, loss=0.3444, acc=92.61%]



Summary:
  Train: Loss=0.3171, Acc=93.22%
  Val:   Loss=0.3444, Acc=92.61%, AUC=0.9772
  Generalization Gap: 0.61%
  No improvement (1/7)

Epoch 16/30


  with torch.cuda.amp.autocast():
Epoch 16/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [48:36<00:00,  3.09it/s, loss=0.3111, acc=93.69%, lr=0.000026]
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [01:41<00:00,  9.86it/s, loss=0.3553, acc=91.83%]



Summary:
  Train: Loss=0.3111, Acc=93.69%
  Val:   Loss=0.3553, Acc=91.83%, AUC=0.9777
  Generalization Gap: 1.86%
  ‚úì Best model saved! (Val AUC: 0.9777, Acc: 91.83%)

Epoch 17/30


  with torch.cuda.amp.autocast():
Epoch 17/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [43:14<00:00,  3.47it/s, loss=0.3021, acc=94.34%, lr=0.000023]
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [57:18<00:00,  3.44s/it, loss=0.3497, acc=92.66%]   



Summary:
  Train: Loss=0.3021, Acc=94.34%
  Val:   Loss=0.3493, Acc=92.66%, AUC=0.9773
  Generalization Gap: 1.68%
  No improvement (1/7)

Epoch 18/30


  with torch.cuda.amp.autocast():
Epoch 18/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [45:03<00:00,  3.33it/s, loss=0.2959, acc=94.76%, lr=0.000020]
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:27<00:00,  4.82it/s, loss=0.3489, acc=93.03%]



Summary:
  Train: Loss=0.2959, Acc=94.76%
  Val:   Loss=0.3489, Acc=93.03%, AUC=0.9793
  Generalization Gap: 1.74%
  ‚úì Best model saved! (Val AUC: 0.9793, Acc: 93.03%)

Epoch 19/30


  with torch.cuda.amp.autocast():
Epoch 19/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [3:50:09<00:00,  1.53s/it, loss=0.2892, acc=95.18%, lr=0.000018]       
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:26<00:00,  4.84it/s, loss=0.3410, acc=93.21%]



Summary:
  Train: Loss=0.2892, Acc=95.18%
  Val:   Loss=0.3410, Acc=93.21%, AUC=0.9795
  Generalization Gap: 1.97%
  ‚úì Best model saved! (Val AUC: 0.9795, Acc: 93.21%)

Epoch 20/30


  with torch.cuda.amp.autocast():
Epoch 20/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [2:01:27<00:00,  1.24it/s, loss=0.2820, acc=95.65%, lr=0.000016]      
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:24<00:00,  4.90it/s, loss=0.3622, acc=92.51%]



Summary:
  Train: Loss=0.2820, Acc=95.65%
  Val:   Loss=0.3622, Acc=92.51%, AUC=0.9780
  Generalization Gap: 3.15%
  No improvement (1/7)

Epoch 21/30


  with torch.cuda.amp.autocast():
Epoch 21/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [2:38:57<00:00,  1.06s/it, loss=0.2760, acc=96.00%, lr=0.000013]     
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:21<00:00,  4.96it/s, loss=0.3428, acc=93.17%]



Summary:
  Train: Loss=0.2760, Acc=96.00%
  Val:   Loss=0.3428, Acc=93.17%, AUC=0.9811
  Generalization Gap: 2.82%
  ‚úì Best model saved! (Val AUC: 0.9811, Acc: 93.17%)

Epoch 22/30


  with torch.cuda.amp.autocast():
Epoch 22/30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9000/9000 [14:49:32<00:00,  5.93s/it, loss=0.2680, acc=96.42%, lr=0.000011]         
  with torch.cuda.amp.autocast():
Val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [03:23<00:00,  4.91it/s, loss=0.3456, acc=93.41%]



Summary:
  Train: Loss=0.2680, Acc=96.42%
  Val:   Loss=0.3456, Acc=93.41%, AUC=0.9795
  Generalization Gap: 3.02%
  No improvement (1/7)

Epoch 23/30


  with torch.cuda.amp.autocast():
Epoch 23/30 [Train]:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 6227/9000 [38:00<2:22:35,  3.09s/it, loss=0.2596, acc=96.92%, lr=0.000009]

In [None]:

# ==========================================
# TEST EVALUATION
# ==========================================
print("\n" + "="*60)
print("FINAL TEST EVALUATION")
print("="*60)

checkpoint = torch.load(os.path.join(config.SAVE_DIR, 'best_model.pth'))
model.load_state_dict(checkpoint['model_state_dict'])

test_loss, test_acc, test_auc, test_preds, test_labels = validate(
    model, test_loader, criterion, config.DEVICE, phase="Test"
)

print(f"\nTest Results:")
print(f"  Loss: {test_loss:.4f}")
print(f"  Accuracy: {test_acc:.2f}%")
print(f"  AUC: {test_auc:.4f}")
print(f"  Best Epoch Generalization Gap: {checkpoint['gen_gap']:.2f}%")

print("\nClassification Report:")
print(classification_report(test_labels, test_preds, target_names=['Real', 'AI']))

cm = confusion_matrix(test_labels, test_preds)
print("\nConfusion Matrix:")
print(f"{'':15} {'Pred Real':>12} {'Pred AI':>12}")
print(f"{'Actual Real':15} {cm[0][0]:>12} {cm[0][1]:>12}")
print(f"{'Actual AI':15} {cm[1][0]:>12} {cm[1][1]:>12}")

# ==========================================
# SAVE & PLOT
# ==========================================
with open(os.path.join(config.SAVE_DIR, 'history.json'), 'w') as f:
    json.dump(history, f, indent=4)

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

axes[0, 0].plot(history['train_loss'], label='Train', linewidth=2)
axes[0, 0].plot(history['val_loss'], label='Val', linewidth=2)
axes[0, 0].set_title('Loss', fontweight='bold')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

axes[0, 1].plot(history['train_acc'], label='Train', linewidth=2)
axes[0, 1].plot(history['val_acc'], label='Val', linewidth=2)
axes[0, 1].set_title('Accuracy', fontweight='bold')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

axes[1, 0].plot(history['val_auc'], label='Val AUC', linewidth=2, color='green')
axes[1, 0].set_title('Validation AUC', fontweight='bold')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

gen_gaps = [t - v for t, v in zip(history['train_acc'], history['val_acc'])]
axes[1, 1].plot(gen_gaps, linewidth=2, color='red')
axes[1, 1].set_title('Generalization Gap', fontweight='bold')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Gap (%)')
axes[1, 1].axhline(y=5, color='orange', linestyle='--', label='5% threshold')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(config.SAVE_DIR, 'training_curves.png'), dpi=200)
print(f"\n‚úì Results saved to {config.SAVE_DIR}/")

print("\n" + "="*60)
print("TRAINING COMPLETE! ‚úì")
print("="*60)

# ==========================================
# INFERENCE FUNCTION
# ==========================================
def predict_image(image_path):
    """Predict if image is AI or Real"""
    model.eval()
    transform = transforms.Compose([
        transforms.Resize((config.IMG_SIZE, config.IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    try:
        img = Image.open(image_path).convert("RGB")
        img = transform(img).unsqueeze(0).to(config.DEVICE)
        
        with torch.no_grad():
            if config.USE_AMP:
                with torch.cuda.amp.autocast():
                    output = model(img)
            else:
                output = model(img)
            
            probs = torch.softmax(output, dim=1)
            pred = torch.argmax(probs, 1).item()
            conf = probs[0][pred].item()
        
        return {
            'prediction': 'AI' if pred == 1 else 'Real',
            'confidence': conf * 100,
            'prob_real': probs[0][0].item() * 100,
            'prob_ai': probs[0][1].item() * 100
        }
    except Exception as e:
        return {'error': str(e)}

# Example usage:
# result = predict_image('test_image.jpg')
# print(f"Prediction: {result['prediction']} ({result['confidence']:.2f}% confidence)")

In [1]:
import os
import json
import time
from glob import glob
from PIL import Image
import matplotlib.pyplot as plt
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from tqdm import tqdm

In [None]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from PIL import Image, UnidentifiedImageError
import os

# ==========================================
# 1. DEVICE
# ==========================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)
torch.backends.cudnn.benchmark = True  # Faster inference

# ==========================================
# 2. TRANSFORMS (same as training)
# ==========================================
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
])

# ==========================================
# 3. LOAD MODEL (ConvNeXt tiny, 2 classes)
# ==========================================
from torchvision.models import convnext_tiny

model = convnext_tiny(pretrained=False)
model.classifier[2] = nn.Linear(model.classifier[2].in_features, 2)  # AI vs REAL

# Load saved checkpoint
model_path = r"C:\Users\-\Desktop\.ipynb_checkpoints\Machine_learning\Deep learning\deepfake-detector\ConvNeXt-tiny\checkpoints_convnext_tiny\best_model.pth"
checkpoint = torch.load(model_path, map_location=device)

state_dict = checkpoint["model_state_dict"]
# Remove 'backbone.' prefix if present
new_state_dict = {k.replace("backbone.", "") if k.startswith("backbone.") else k: v for k, v in state_dict.items()}
model.load_state_dict(new_state_dict, strict=False)

model.to(device)
model.eval()
print("‚úÖ Model loaded successfully!")

# ==========================================
# 4. LOAD TEST DATASET
# ==========================================
test_dir = r"C:\Users\-\Desktop\.ipynb_checkpoints\Machine_learning\Deep learning\deepfake-detector\Test_images_Ai-Real"

test_dataset = datasets.ImageFolder(test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
class_names = test_dataset.classes
print("Classes:", class_names)
print("Total test samples:", len(test_dataset))

# ==========================================
# 5. PER-IMAGE PREDICTIONS
# ==========================================
print("\nüìå Per-image predictions:")
for img_path, label in test_dataset.samples:
    try:
        img = Image.open(img_path).convert("RGB")
    except:
        print(f"‚ùå Cannot open: {img_path}")
        continue

    img_tensor = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(img_tensor)
        _, pred = torch.max(output, 1)

    true_class = class_names[label]
    predicted_class = class_names[pred.item()]
    print(f"üñº {os.path.basename(img_path)} | True: {true_class} | Predicted: {predicted_class}")

# ==========================================
# 6. TEST ACCURACY ON FULL TEST SET
# ==========================================
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"\nüî• Test Accuracy: {accuracy:.2f}%")

# ==========================================
# 7. SINGLE IMAGE INFERENCE FUNCTION
# ==========================================
def predict_image(image_path):
    try:
        img = Image.open(image_path).convert("RGB")
    except UnidentifiedImageError:
        print(f"‚ùå Cannot read image: {image_path}")
        return

    img_tensor = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(img_tensor)
        _, pred = torch.max(output, 1)

    print(f"üîç Image: {image_path}")
    print(f"‚û° Prediction: {class_names[pred.item()]}")

# Example usage:
# predict_image(r"C:\Users\-\Desktop\test_image.jpg")


Device: cuda


  checkpoint = torch.load(model_path, map_location=device)


‚úÖ Model loaded successfully!
Classes: ['Ai', 'Real']
Total test samples: 2

üìå Per-image predictions:
üñº Gemini_Generated_Image_poxdu6poxdu6poxd.png | True: Ai | Predicted: Real
üñº WhatsApp Image 2025-12-18 at 9.14.30 PM (1).jpeg | True: Real | Predicted: Ai

üî• Test Accuracy: 0.00%


In [1]:
"""
Test Folder Evaluation Script
Evaluates model on AI and Real folders with detailed metrics
"""

import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image
import os
import json
import numpy as np
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt

# ==========================================
# MODEL DEFINITION
# ==========================================
class ConvNeXtClassifier(nn.Module):
    def __init__(self, num_classes=2, dropout_rate=0.3, model_size='tiny'):
        super().__init__()
        
        if model_size == 'tiny':
            self.backbone = models.convnext_tiny(weights=None)
        elif model_size == 'small':
            self.backbone = models.convnext_small(weights=None)
        else:
            raise ValueError(f"Unknown model size: {model_size}")
        
        in_features = self.backbone.classifier[2].in_features
        
        self.backbone.classifier = nn.Sequential(
            nn.Flatten(1),
            nn.LayerNorm(in_features),
            nn.Dropout(dropout_rate),
            nn.Linear(in_features, 512),
            nn.GELU(),
            nn.LayerNorm(512),
            nn.Dropout(dropout_rate * 0.5),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        return self.backbone(x)

# ==========================================
# TEST EVALUATOR CLASS
# ==========================================
class TestFolderEvaluator:
    def __init__(self, model_path, device='cuda', img_size=224):
        self.device = device
        self.img_size = img_size
        
        print("="*60)
        print("LOADING MODEL")
        print("="*60)
        
        # Load checkpoint
        checkpoint = torch.load(model_path, map_location=device)
        
        # Initialize model
        self.model = ConvNeXtClassifier(num_classes=2, dropout_rate=0.3, model_size='tiny')
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.model = self.model.to(device)
        self.model.eval()
        
        print(f"‚úì Model loaded successfully!")
        print(f"  Training Val Acc: {checkpoint.get('val_acc', 'N/A'):.2f}%")
        print(f"  Training Val AUC: {checkpoint.get('val_auc', 'N/A'):.4f}")
        
        # Transform (EXACT same as validation)
        self.transform = transforms.Compose([
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    
    def load_images_from_folder(self, folder_path, label):
        """Load all images from a folder"""
        valid_extensions = {'.png', '.jpg', '.jpeg', '.bmp', '.webp'}
        images = []
        labels = []
        
        for file in os.listdir(folder_path):
            if os.path.splitext(file)[1].lower() in valid_extensions:
                img_path = os.path.join(folder_path, file)
                images.append(img_path)
                labels.append(label)
        
        return images, labels
    
    def preprocess_image(self, image_path):
        """Preprocess single image"""
        try:
            img = Image.open(image_path).convert('RGB')
            return self.transform(img)
        except Exception as e:
            print(f"Error loading {image_path}: {e}")
            return None
    
    def evaluate_folder(self, test_folder_path, num_runs=5):
        """
        Evaluate on test folder with AI and Real subfolders
        num_runs: Number of inference passes per image for stability
        """
        
        print("\n" + "="*60)
        print("LOADING TEST DATA")
        print("="*60)
        
        # Load AI images (label = 1)
        ai_folder = os.path.join(test_folder_path, 'AI')
        ai_images, ai_labels = self.load_images_from_folder(ai_folder, label=1)
        print(f"AI images: {len(ai_images)}")
        
        # Load Real images (label = 0)
        real_folder = os.path.join(test_folder_path, 'Real')
        real_images, real_labels = self.load_images_from_folder(real_folder, label=0)
        print(f"Real images: {len(real_images)}")
        
        # Combine
        all_images = ai_images + real_images
        all_labels = ai_labels + real_labels
        
        print(f"\nTotal test images: {len(all_images)}")
        
        # Evaluate
        print("\n" + "="*60)
        print("RUNNING EVALUATION")
        print("="*60)
        
        predictions = []
        probabilities = []
        uncertainties = []
        
        self.model.eval()
        
        with torch.no_grad():
            for img_path in tqdm(all_images, desc="Processing images"):
                # Preprocess
                img_tensor = self.preprocess_image(img_path)
                
                if img_tensor is None:
                    # If image failed to load, predict randomly
                    predictions.append(0)
                    probabilities.append([0.5, 0.5])
                    uncertainties.append(1.0)
                    continue
                
                img_tensor = img_tensor.unsqueeze(0).to(self.device)
                
                # Multiple runs for stability
                run_probs = []
                for _ in range(num_runs):
                    output = self.model(img_tensor)
                    probs = torch.softmax(output, dim=1)
                    run_probs.append(probs.cpu().numpy()[0])
                
                # Average probabilities
                avg_probs = np.mean(run_probs, axis=0)
                std_probs = np.std(run_probs, axis=0)
                
                pred = int(np.argmax(avg_probs))
                predictions.append(pred)
                probabilities.append(avg_probs)
                uncertainties.append(std_probs[pred])
        
        # Convert to numpy
        predictions = np.array(predictions)
        probabilities = np.array(probabilities)
        all_labels = np.array(all_labels)
        uncertainties = np.array(uncertainties)
        
        # Calculate metrics
        self.print_results(all_labels, predictions, probabilities, uncertainties, 
                          len(ai_images), len(real_images))
        
        # Save detailed results
        self.save_detailed_results(all_images, all_labels, predictions, 
                                   probabilities, uncertainties)
        
        return {
            'labels': all_labels,
            'predictions': predictions,
            'probabilities': probabilities,
            'uncertainties': uncertainties
        }
    
    def print_results(self, labels, predictions, probabilities, uncertainties, 
                     num_ai, num_real):
        """Print detailed results"""
        
        print("\n" + "="*60)
        print("TEST RESULTS")
        print("="*60)
        
        # Overall accuracy
        accuracy = (predictions == labels).mean() * 100
        print(f"\nOverall Accuracy: {accuracy:.2f}%")
        
        # Per-class accuracy
        ai_mask = labels == 1
        real_mask = labels == 0
        
        ai_accuracy = (predictions[ai_mask] == labels[ai_mask]).mean() * 100
        real_accuracy = (predictions[real_mask] == labels[real_mask]).mean() * 100
        
        print(f"AI Detection Accuracy: {ai_accuracy:.2f}% ({sum(predictions[ai_mask] == 1)}/{num_ai})")
        print(f"Real Detection Accuracy: {real_accuracy:.2f}% ({sum(predictions[real_mask] == 0)}/{num_real})")
        
        # AUC
        auc = roc_auc_score(labels, probabilities[:, 1])
        print(f"AUC Score: {auc:.4f}")
        
        # Average confidence
        correct_mask = predictions == labels
        incorrect_mask = predictions != labels
        
        correct_conf = probabilities[correct_mask, predictions[correct_mask]].mean() * 100
        incorrect_conf = probabilities[incorrect_mask, predictions[incorrect_mask]].mean() * 100 if incorrect_mask.sum() > 0 else 0
        
        print(f"\nAverage Confidence:")
        print(f"  Correct predictions: {correct_conf:.2f}%")
        print(f"  Incorrect predictions: {incorrect_conf:.2f}%")
        
        # Uncertainty analysis
        avg_uncertainty = uncertainties.mean() * 100
        print(f"\nAverage Uncertainty: ¬±{avg_uncertainty:.2f}%")
        
        # Confusion Matrix
        cm = confusion_matrix(labels, predictions)
        print("\n" + "="*60)
        print("CONFUSION MATRIX")
        print("="*60)
        print(f"{'':15} {'Predicted Real':>15} {'Predicted AI':>15}")
        print(f"{'Actual Real':15} {cm[0][0]:>15} {cm[0][1]:>15}")
        print(f"{'Actual AI':15} {cm[1][0]:>15} {cm[1][1]:>15}")
        
        # Classification Report
        print("\n" + "="*60)
        print("CLASSIFICATION REPORT")
        print("="*60)
        print(classification_report(labels, predictions, target_names=['Real', 'AI']))
        
        # Misclassification analysis
        print("\n" + "="*60)
        print("MISCLASSIFICATION ANALYSIS")
        print("="*60)
        
        false_positives = sum((labels == 0) & (predictions == 1))
        false_negatives = sum((labels == 1) & (predictions == 0))
        
        print(f"False Positives (Real predicted as AI): {false_positives}")
        print(f"False Negatives (AI predicted as Real): {false_negatives}")
    
    def save_detailed_results(self, image_paths, labels, predictions, 
                             probabilities, uncertainties):
        """Save detailed results to JSON"""
        
        results = []
        for i, img_path in enumerate(image_paths):
            result = {
                'image': os.path.basename(img_path),
                'true_label': 'AI' if labels[i] == 1 else 'Real',
                'predicted_label': 'AI' if predictions[i] == 1 else 'Real',
                'correct': bool(labels[i] == predictions[i]),
                'confidence': float(probabilities[i][predictions[i]] * 100),
                'uncertainty': float(uncertainties[i] * 100),
                'prob_real': float(probabilities[i][0] * 100),
                'prob_ai': float(probabilities[i][1] * 100)
            }
            results.append(result)
        
        # Sort by confidence (lowest first - most uncertain)
        results.sort(key=lambda x: x['confidence'])
        
        output_file = 'test_evaluation_results.json'
        with open(output_file, 'w') as f:
            json.dump(results, f, indent=2)
        
        print(f"\n‚úì Detailed results saved to {output_file}")
        
        # Print most uncertain predictions
        print("\n" + "="*60)
        print("MOST UNCERTAIN PREDICTIONS (Top 10)")
        print("="*60)
        
        for i, result in enumerate(results[:10]):
            status = "‚úì" if result['correct'] else "‚úó"
            print(f"{i+1}. {status} {result['image']}")
            print(f"   True: {result['true_label']}, Predicted: {result['predicted_label']}")
            print(f"   Confidence: {result['confidence']:.2f}% (¬±{result['uncertainty']:.2f}%)")
        
        # Print worst mistakes (high confidence but wrong)
        wrong_results = [r for r in results if not r['correct']]
        wrong_results.sort(key=lambda x: x['confidence'], reverse=True)
        
        if wrong_results:
            print("\n" + "="*60)
            print("WORST MISTAKES (High Confidence but Wrong)")
            print("="*60)
            
            for i, result in enumerate(wrong_results[:10]):
                print(f"{i+1}. ‚úó {result['image']}")
                print(f"   True: {result['true_label']}, Predicted: {result['predicted_label']}")
                print(f"   Confidence: {result['confidence']:.2f}%")

# ==========================================
# MAIN EXECUTION
# ==========================================
if __name__ == "__main__":
    
    # Configuration
    MODEL_PATH = r'C:\Users\-\Desktop\.ipynb_checkpoints\Machine_learning\Deep learning\deepfake-detector\ConvNeXt-tiny\checkpoints_convnext_tiny\best_model.pth'
    TEST_FOLDER = r'C:\Users\-\Desktop\.ipynb_checkpoints\Machine_learning\Deep learning\deepfake-detector\Test_images_Ai-Real'
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    NUM_RUNS = 5  # Number of inference passes per image
    
    print("="*60)
    print("AI vs REAL TEST FOLDER EVALUATION")
    print("="*60)
    print(f"Model: {MODEL_PATH}")
    print(f"Test Folder: {TEST_FOLDER}")
    print(f"Device: {DEVICE}")
    print(f"Inference Runs: {NUM_RUNS}")
    
    # Create evaluator
    evaluator = TestFolderEvaluator(
        model_path=MODEL_PATH,
        device=DEVICE,
        img_size=224
    )
    
    # Run evaluation
    results = evaluator.evaluate_folder(TEST_FOLDER, num_runs=NUM_RUNS)
    
    print("\n" + "="*60)
    print("EVALUATION COMPLETE!")
    print("="*60)
    print("\nCheck 'test_evaluation_results.json' for detailed per-image results")

AI vs REAL TEST FOLDER EVALUATION
Model: C:\Users\-\Desktop\.ipynb_checkpoints\Machine_learning\Deep learning\deepfake-detector\ConvNeXt-tiny\checkpoints_convnext_tiny\best_model.pth
Test Folder: C:\Users\-\Desktop\.ipynb_checkpoints\Machine_learning\Deep learning\deepfake-detector\Test_images_Ai-Real
Device: cuda
Inference Runs: 5
LOADING MODEL


  checkpoint = torch.load(model_path, map_location=device)


‚úì Model loaded successfully!
  Training Val Acc: 93.17%
  Training Val AUC: 0.9811

LOADING TEST DATA
AI images: 49
Real images: 16

Total test images: 65

RUNNING EVALUATION


Processing images: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 65/65 [00:04<00:00, 15.60it/s]


TEST RESULTS

Overall Accuracy: 50.77%
AI Detection Accuracy: 40.82% (20/49)
Real Detection Accuracy: 81.25% (13/16)
AUC Score: 0.7041

Average Confidence:
  Correct predictions: 93.75%
  Incorrect predictions: 92.68%

Average Uncertainty: ¬±0.00%

CONFUSION MATRIX
                 Predicted Real    Predicted AI
Actual Real                  13               3
Actual AI                    29              20

CLASSIFICATION REPORT
              precision    recall  f1-score   support

        Real       0.31      0.81      0.45        16
          AI       0.87      0.41      0.56        49

    accuracy                           0.51        65
   macro avg       0.59      0.61      0.50        65
weighted avg       0.73      0.51      0.53        65


MISCLASSIFICATION ANALYSIS
False Positives (Real predicted as AI): 3
False Negatives (AI predicted as Real): 29

‚úì Detailed results saved to test_evaluation_results.json

MOST UNCERTAIN PREDICTIONS (Top 10)
1. ‚úó download (7).jpg
   Tr


