In [2]:
# === 1. Imports ===
import os
import numpy as np
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torchvision.transforms as transforms
from transformers import AutoImageProcessor, SwinModel

In [3]:
# === 2. Device Setup ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# === 3. Swin Transformer Setup for Feature Extraction ===
processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
swin_model = SwinModel.from_pretrained("microsoft/swin-tiny-patch4-window7-224").to(device)
swin_model.eval()  # freeze

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


SwinModel(
  (embeddings): SwinEmbeddings(
    (patch_embeddings): SwinPatchEmbeddings(
      (projection): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
    )
    (norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): SwinEncoder(
    (layers): ModuleList(
      (0): SwinStage(
        (blocks): ModuleList(
          (0): SwinLayer(
            (layernorm_before): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
            (attention): SwinAttention(
              (self): SwinSelfAttention(
                (query): Linear(in_features=96, out_features=96, bias=True)
                (key): Linear(in_features=96, out_features=96, bias=True)
                (value): Linear(in_features=96, out_features=96, bias=True)
                (dropout): Dropout(p=0.0, inplace=False)
              )
              (output): SwinSelfOutput(
                (dense): Linear(in_features=96, out_features=96, bias=True)
            

In [5]:
# === 4. Preprocessing and Feature Extraction ===
def extract_features_from_folder(folder_path, label):
    features, labels = [], []
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith((".png", ".jpg", ".jpeg"))]

    for file in tqdm(image_files, desc=f"Processing {folder_path}"):
        image_path = os.path.join(folder_path, file)
        image = Image.open(image_path).convert("RGB")
        inputs = processor(images=image, return_tensors="pt").to(device)

        with torch.no_grad():
            outputs = swin_model(**inputs)
            cls_embedding = outputs.last_hidden_state[:, 0, :]  # CLS token

        features.append(cls_embedding.squeeze().cpu().numpy())
        labels.append(label)

    return features, labels

In [6]:
# === 5. Extract from Real & Fake Folders ===
real_features, real_labels = extract_features_from_folder(r"C:\Users\Shreya Jadhav\Documents\B.Tech7\Project-III\Training\new-custom\real", label=0)
fake_features, fake_labels = extract_features_from_folder(r"C:\Users\Shreya Jadhav\Documents\B.Tech7\Project-III\Training\new-custom\fake", label=1)

X = np.array(real_features + fake_features)
y = np.array(real_labels + fake_labels)

print(f"✅ Extracted features shape: {X.shape} | Labels shape: {y.shape}")

# Optionally save
np.savez("swin_features.npz", X=X, y=y)

Processing C:\Users\Shreya Jadhav\Documents\B.Tech7\Project-III\Training\new-custom\real: 100%|█| 6683/6683 [45:10<00:0
Processing C:\Users\Shreya Jadhav\Documents\B.Tech7\Project-III\Training\new-custom\fake: 100%|█| 6019/6019 [43:54<00:0


✅ Extracted features shape: (12702, 768) | Labels shape: (12702,)


In [7]:
# === 6. Train-Test Split (80/20 Stratified) ===
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Convert to torch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

train_ds = TensorDataset(X_train_tensor, y_train_tensor)
test_ds = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=16)

In [8]:
# === 7. Advanced Classifier Model ===
class SwinFeatureClassifier(nn.Module):
    def __init__(self, input_dim=768):
        super(SwinFeatureClassifier, self).__init__()
        self.model = nn.Sequential(
            nn.LayerNorm(input_dim),
            nn.Linear(input_dim, 512),
            nn.GELU(),
            nn.Dropout(0.3),
            
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

model = SwinFeatureClassifier().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

In [9]:
# === 8. Training Loop with Early Stopping ===
best_val_loss = float('inf')
patience, counter = 5, 0
epochs = 30

for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        #preds = model(xb).squeeze()   ###   old
        preds = model(xb).view(-1)     ###   new
        yb = yb.view(-1)               ###   new
        loss = criterion(preds, yb)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * xb.size(0)

    train_loss /= len(train_loader.dataset)

    # === Evaluation ===
    model.eval()
    val_loss = 0.0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for xb, yb in test_loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb).squeeze()
            loss = criterion(preds, yb)
            val_loss += loss.item() * xb.size(0)

            all_preds.extend((preds > 0.5).cpu().numpy())
            all_labels.extend(yb.cpu().numpy())

    val_loss /= len(test_loader.dataset)

    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f} | Val Loss = {val_loss:.4f}")
    print(classification_report(all_labels, all_preds, digits=4))

    # === Early stopping ===
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
        torch.save(model.state_dict(), "best_classifier.pt")
        print("✅ Best model saved.")
    else:
        counter += 1
        if counter >= patience:
            print("⛔ Early stopping triggered.")
            break

    scheduler.step()

Epoch 1: Train Loss = 0.2631 | Val Loss = 0.2277
              precision    recall  f1-score   support

         0.0     0.8314    0.9738    0.8970      1337
         1.0     0.9641    0.7807    0.8628      1204

    accuracy                         0.8823      2541
   macro avg     0.8978    0.8773    0.8799      2541
weighted avg     0.8943    0.8823    0.8808      2541

✅ Best model saved.
Epoch 2: Train Loss = 0.1700 | Val Loss = 0.2469
              precision    recall  f1-score   support

         0.0     0.8557    0.9536    0.9020      1337
         1.0     0.9410    0.8214    0.8772      1204

    accuracy                         0.8910      2541
   macro avg     0.8984    0.8875    0.8896      2541
weighted avg     0.8961    0.8910    0.8902      2541

Epoch 3: Train Loss = 0.1491 | Val Loss = 0.1795
              precision    recall  f1-score   support

         0.0     0.9158    0.9028    0.9092      1337
         1.0     0.8937    0.9078    0.9007      1204

    accuracy   

In [10]:
# === 9. Inference Function ===
def predict_from_image(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = swin_model(**inputs).last_hidden_state[:, 0, :].squeeze().unsqueeze(0).to(device)
        prob = model(features).item()
    return "Fake" if prob > 0.5 else "Real"

In [11]:
# === Example usage ===
result = predict_from_image(r"C:\Users\Shreya Jadhav\Pictures\image_DEep.png")
print("Prediction:", result)

Prediction: Fake
