In [2]:
import numpy as np
from skimage.feature import local_binary_pattern
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report
from torchvision.datasets import CIFAR10
from torchvision import transforms as T
from PIL import Image
import os


LBP_POINTS = 8
LBP_RADIUS = 1
LBP_METHOD = 'uniform'

def extract_lbp_features(image_np):
    lbp = local_binary_pattern(image_np, P=LBP_POINTS, R=LBP_RADIUS, method=LBP_METHOD)
    n_bins = int(lbp.max() + 1)
    hist, _ = np.histogram(lbp.ravel(), bins=n_bins, range=(0, n_bins))
    hist = hist.astype('float')
    hist /= (hist.sum() + 1e-7)
    return hist

def convert_to_gray_np(img_pil):
    return np.array(img_pil.convert('L'))

def load_images_from_folder(folder, label, max_samples=50, augment=False):
    if not os.path.isdir(folder):
        raise FileNotFoundError(f"Folder not found: {folder}")
    transform_list = [T.Resize((32, 32))]
    if augment:
        transform_list.extend([T.RandomHorizontalFlip(), T.ColorJitter(brightness=0.2, contrast=0.2)])
    transform = T.Compose(transform_list)
    files = [f for f in sorted(os.listdir(folder)) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    if not files:
        raise ValueError(f"No images found in: {folder}")
    files = files[:min(max_samples, len(files))]
    X, y = [], []
    for fname in files:
        path = os.path.join(folder, fname)
        try:
            img = Image.open(path).convert('RGB')
            img = transform(img)
            gray_np = convert_to_gray_np(img)
            feats = extract_lbp_features(gray_np)
            X.append(feats)
            y.append(label)
        except Exception as e:
            print(f"Could not process {fname}: {e}")
    print(f"Loaded {len(X)} images from {folder} with label {label}")
    return np.array(X), np.array(y)

def load_cifar10_real(num_samples=200, augment=False):
    dataset = CIFAR10(root='./data', train=True, download=True)
    transform_list = [T.Resize((32, 32))]
    if augment:
        transform_list.extend([T.RandomHorizontalFlip(), T.ColorJitter(brightness=0.2, contrast=0.2)])
    transform = T.Compose(transform_list)
    X, y = [], []
    for i, (img, _) in enumerate(dataset):
        if i >= num_samples:
            break
        img = transform(img)
        gray_np = convert_to_gray_np(img)
        feats = extract_lbp_features(gray_np)
        X.append(feats)
        y.append(0)  # label 0 for real
    print(f"Loaded {len(X)} CIFAR-10 real images for training")
    return np.array(X), np.array(y)


X_train_real, y_train_real = load_cifar10_real(num_samples=200, augment=True)
midjourney_synth_train_folder = '/home/dhanraj/Documents/Midjourney_Exp2/train/FAKE'
X_train_synth, y_train_synth = load_images_from_folder(midjourney_synth_train_folder, 1, max_samples=200, augment=True)

X_train = np.concatenate([X_train_real, X_train_synth], axis=0)
y_train = np.concatenate([y_train_real, y_train_synth], axis=0)
print(f"Training samples: {len(y_train)} | Real: {len(y_train_real)}, Fake: {len(y_train_synth)}")


midjourney_synth_folder = '/home/dhanraj/Documents/Midjourney_Exp2/test/FAKE'
X_test_synth, y_test_synth = load_images_from_folder(midjourney_synth_folder, 1, max_samples=50, augment=False)
midjourney_real_folder = '/home/dhanraj/Documents/Midjourney_Exp2/test/REAL'
X_test_real, y_test_real = load_images_from_folder(midjourney_real_folder, 0, max_samples=50, augment=False)

X_test = np.concatenate([X_test_real, X_test_synth], axis=0)
y_test = np.concatenate([y_test_real, y_test_synth], axis=0)

if len(np.unique(y_train)) < 2:
    raise ValueError(f"Training set must have at least 2 classes! Found: {np.unique(y_train)}")
if len(np.unique(y_test)) < 2:
    raise ValueError(f"Test set must have at least 2 classes! Found: {np.unique(y_test)}")


clf = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=10.0, gamma='scale', probability=True))
print("\nTraining SVM (RBF kernel)...")
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"\nTest Accuracy on MidJourney real vs synth: {acc*100:.2f}%")
print("Classification report:\n", classification_report(y_test, y_pred, digits=4))

Loaded 200 CIFAR-10 real images for training
Loaded 200 images from /home/dhanraj/Documents/Midjourney_Exp2/train/FAKE with label 1
Training samples: 400 | Real: 200, Fake: 200
Loaded 50 images from /home/dhanraj/Documents/Midjourney_Exp2/test/FAKE with label 1
Loaded 50 images from /home/dhanraj/Documents/Midjourney_Exp2/test/REAL with label 0

Training SVM (RBF kernel)...

Test Accuracy on MidJourney real vs synth: 68.00%
Classification report:
               precision    recall  f1-score   support

           0     0.6875    0.6600    0.6735        50
           1     0.6731    0.7000    0.6863        50

    accuracy                         0.6800       100
   macro avg     0.6803    0.6800    0.6799       100
weighted avg     0.6803    0.6800    0.6799       100



In [3]:
def predict_multiple_images(image_paths, clf_model):
    results = []
    transform = T.Compose([T.Resize((32, 32))])  # Same as training

    for img_path in image_paths:
        try:
            img = Image.open(img_path).convert('RGB')
            img = transform(img)
            gray_np = convert_to_gray_np(img)

            feats = extract_lbp_features(gray_np).reshape(1, -1)

            pred_label = clf_model.predict(feats)[0]
            label_map = {0: "REAL", 1: "FAKE"}
            pred_text = label_map[pred_label]

            results.append((img_path, pred_text))
        except Exception as e:
            print(f"Error processing {img_path}: {e}")

    return results


test_image_list = [
    '/home/dhanraj/Downloads/lion.jpg',
    '/home/dhanraj/Downloads/srk.jpg',
    '/home/dhanraj/Downloads/person.jpeg',
    '/home/dhanraj/Downloads/mount.jpg',
    '/home/dhanraj/Downloads/girl.png',
    '/home/dhanraj/Downloads/person.jpeg',
    '/home/dhanraj/Downloads/IMG-20250811-WA0004.jpg',
    '/home/dhanraj/Downloads/IMG-20250811-WA0006.jpg',
    '/home/dhanraj/Downloads/kitty.jpeg',
    '/home/dhanraj/Downloads/ponnu.jpg',
    '/home/dhanraj/Downloads/Gina.png',
    '/home/dhanraj/Downloads/Tom.png',
    '/home/dhanraj/Downloads/Allan.png',
]

predictions = predict_multiple_images(test_image_list, clf)
for path, pred in predictions:
    print(f"{path} => {pred}")


/home/dhanraj/Downloads/lion.jpg => FAKE
/home/dhanraj/Downloads/srk.jpg => REAL
/home/dhanraj/Downloads/person.jpeg => REAL
/home/dhanraj/Downloads/mount.jpg => FAKE
/home/dhanraj/Downloads/girl.png => FAKE
/home/dhanraj/Downloads/person.jpeg => REAL
/home/dhanraj/Downloads/IMG-20250811-WA0004.jpg => REAL
/home/dhanraj/Downloads/IMG-20250811-WA0006.jpg => FAKE
/home/dhanraj/Downloads/kitty.jpeg => REAL
/home/dhanraj/Downloads/ponnu.jpg => FAKE
/home/dhanraj/Downloads/Gina.png => REAL
/home/dhanraj/Downloads/Tom.png => REAL
/home/dhanraj/Downloads/Allan.png => REAL
