In [None]:
import os
import shutil
import cv2
import pandas as pd
import numpy as np
from PIL import Image


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader,random_split
from torchvision import models, transforms
from skimage.exposure import match_histograms
import random
from skimage.io import imread
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm


In [None]:
import isCarRealModel as m

In [None]:
# Set device
device = torch.device("mps" if torch.mps.is_available() else "cpu")
print(f"Using device: {device}")

Using device: mps


In [None]:
def predict_image(image_path, model, device):
    if not os.path.exists(image_path):
        print(f"File not found: {image_path}")
        return None, None

    # Ensure model is in evaluation mode
    model.eval()

    # Must match input size expected by the model
    transform = transforms.Compose([
        transforms.Resize((480, 640)),
        transforms.ToTensor(),
    ])

    # Load and preprocess image
    image = Image.open(image_path).convert('RGB')
    image = transform(image)
    image = image.unsqueeze(0)
    image = image.to(device)

    # Make prediction
    with torch.no_grad():
        output = model(image)
        prob = torch.sigmoid(output).item()
        prediction = 1 if prob >= 0.5 else 0

    # Output label_map = {'not': 0, 'real': 1}
    # print(f"Prediction: {'Not Real (0)' if prediction == 0 else 'Real (1)'} | Probability: {prob:.4f}")
    return prediction, prob

In [None]:
import json
import pandas as pd

# Load the JSON file 
json_path = "./celeb spoof/archive/CelebA_Spoof_/CelebA_Spoof/metas/intra_test/test_label.json" 

# Attempt to load JSON content
with open(json_path, "r") as f:
    data = json.load(f)

# Convert JSON data to a list of rows
rows = []
for img_path, vector in data.items():
    row = {"image_path": img_path}
    for i in range(len(vector)):
        row[f"label_{i}"] = vector[i]
    rows.append(row)

# Create DataFrame
df = pd.DataFrame(rows)

In [None]:
# Infer the expected label based on the file path
df["expected_label"] = df["image_path"].apply(lambda x: 0 if "live" in x.lower() else 1)

# Extract actual label from the last index (label_43)
df["actual_label"] = df["label_43"]

# Check if the labels match
df["label_match"] = df["expected_label"] == df["actual_label"]

# Display mismatches
mismatched_df = df[~df["label_match"]].copy()

In [None]:
mismatched_df

df_cleaned = df[df["label_match"]].copy()


In [None]:
df_cleaned

Unnamed: 0,image_path,label_0,label_1,label_2,label_3,label_4,label_5,label_6,label_7,label_8,...,label_37,label_38,label_39,label_40,label_41,label_42,label_43,expected_label,actual_label,label_match
0,Data/test/6964/spoof/494405.png,0,0,0,0,0,0,0,0,0,...,0,0,0,4,2,2,1,1,1,True
1,Data/test/9596/spoof/494406.png,0,0,0,0,0,0,0,0,0,...,0,0,0,9,2,2,1,1,1,True
2,Data/test/9014/spoof/494407.png,0,0,0,0,0,0,0,0,0,...,0,0,0,8,1,2,1,1,1,True
3,Data/test/7607/spoof/494408.png,0,0,0,0,0,0,0,0,0,...,0,0,0,6,1,1,1,1,1,True
4,Data/test/5624/spoof/494409.png,0,0,0,0,0,0,0,0,0,...,0,0,0,2,1,1,1,1,1,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67165,Data/test/6551/spoof/561570.png,0,0,0,0,0,0,0,0,0,...,0,0,0,4,1,1,1,1,1,True
67166,Data/test/6283/spoof/561571.png,0,0,0,0,0,0,0,0,0,...,0,0,0,3,2,1,1,1,1,True
67167,Data/test/4966/spoof/561572.png,0,0,0,0,0,0,0,0,0,...,0,0,0,1,1,1,1,1,1,True
67168,Data/test/5241/spoof/561573.png,0,0,0,0,0,0,0,0,0,...,0,0,0,1,2,1,1,1,1,True


In [None]:
# Extract spoof type label from index 40
df_cleaned["spoof_type_raw"] = df_cleaned["label_40"]

# Define spoof type category
def classify_spoof_type(label40, path):
    if "live" in path.lower():
        return "live"
    elif label40 in [1, 2, 3]:
        return "printed"
    elif label40 in [7, 8, 9]:
        return "display"
    else:
        return "other"

df_cleaned["spoof_type_category"] = df_cleaned.apply(lambda x: classify_spoof_type(x["spoof_type_raw"], x["image_path"]), axis=1)


In [None]:
df_cleaned

Unnamed: 0,image_path,label_0,label_1,label_2,label_3,label_4,label_5,label_6,label_7,label_8,...,label_39,label_40,label_41,label_42,label_43,expected_label,actual_label,label_match,spoof_type_raw,spoof_type_category
0,Data/test/6964/spoof/494405.png,0,0,0,0,0,0,0,0,0,...,0,4,2,2,1,1,1,True,4,other
1,Data/test/9596/spoof/494406.png,0,0,0,0,0,0,0,0,0,...,0,9,2,2,1,1,1,True,9,display
2,Data/test/9014/spoof/494407.png,0,0,0,0,0,0,0,0,0,...,0,8,1,2,1,1,1,True,8,display
3,Data/test/7607/spoof/494408.png,0,0,0,0,0,0,0,0,0,...,0,6,1,1,1,1,1,True,6,other
4,Data/test/5624/spoof/494409.png,0,0,0,0,0,0,0,0,0,...,0,2,1,1,1,1,1,True,2,printed
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67165,Data/test/6551/spoof/561570.png,0,0,0,0,0,0,0,0,0,...,0,4,1,1,1,1,1,True,4,other
67166,Data/test/6283/spoof/561571.png,0,0,0,0,0,0,0,0,0,...,0,3,2,1,1,1,1,True,3,printed
67167,Data/test/4966/spoof/561572.png,0,0,0,0,0,0,0,0,0,...,0,1,1,1,1,1,1,True,1,printed
67168,Data/test/5241/spoof/561573.png,0,0,0,0,0,0,0,0,0,...,0,1,2,1,1,1,1,True,1,printed


In [None]:
filtered_df = df_cleaned[df_cleaned["spoof_type_category"].isin(["live", "printed", "display"])].copy()
filtered_df["flip_label"] = filtered_df["label_43"].apply(lambda x: 1 if x == 0 else 0)



In [None]:
filtered_df

Unnamed: 0,image_path,label_0,label_1,label_2,label_3,label_4,label_5,label_6,label_7,label_8,...,label_40,label_41,label_42,label_43,expected_label,actual_label,label_match,spoof_type_raw,spoof_type_category,flip_label
1,Data/test/9596/spoof/494406.png,0,0,0,0,0,0,0,0,0,...,9,2,2,1,1,1,True,9,display,0
2,Data/test/9014/spoof/494407.png,0,0,0,0,0,0,0,0,0,...,8,1,2,1,1,1,True,8,display,0
4,Data/test/5624/spoof/494409.png,0,0,0,0,0,0,0,0,0,...,2,1,1,1,1,1,True,2,printed,0
5,Data/test/8965/live/494410.png,0,1,1,0,0,0,1,0,0,...,0,0,0,0,0,0,True,0,live,1
6,Data/test/6407/spoof/494411.png,0,0,0,0,0,0,0,0,0,...,3,1,2,1,1,1,True,3,printed,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67160,Data/test/5596/spoof/561565.png,0,0,0,0,0,0,0,0,0,...,2,1,1,1,1,1,True,2,printed,0
67161,Data/test/9909/live/561566.png,0,0,1,0,0,0,0,0,1,...,0,0,0,0,0,0,True,0,live,1
67166,Data/test/6283/spoof/561571.png,0,0,0,0,0,0,0,0,0,...,3,2,1,1,1,1,True,3,printed,0
67167,Data/test/4966/spoof/561572.png,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,True,1,printed,0


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report, f1_score
from tqdm.auto import tqdm

# sampled_df = filtered_df.sample(frac=, random_state=42)
sampled_df = filtered_df


label_map = {'spoof': 0, 'real': 1}

df_to_evaluate = sampled_df

In [None]:
def evaluate_model_predictions(df_to_evaluate, model, device, predict_image, label_map, image_base_path="./celeb spoof/archive/CelebA_Spoof_/CelebA_Spoof/"):

    # Parameters:
    # - df_to_evaluate (pd.DataFrame): DataFrame containing image paths and true labels.
    # - model: The model to use for prediction.
    # - device
    # - predict_image (function)
    # - label_map (dict)
    # - image_base_path (str): Base path to be prepended to image paths in the DataFrame.

    y_true = []
    y_pred = []

    for _, row in tqdm(df_to_evaluate.iterrows(), total=len(df_to_evaluate), desc="Evaluating Images"):
        image_path = image_base_path + row["image_path"]
        true_label = row["flip_label"]

        pred, prob = predict_image(image_path, model, device)

        if pred is not None:
            y_true.append(true_label)
            y_pred.append(pred)

    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, pos_label=0)
    recall = recall_score(y_true, y_pred,pos_label=0)
    f1 = f1_score(y_true, y_pred,pos_label=0)
    specificity = recall_score(y_true, y_pred, pos_label=1)
    conf_matrix = confusion_matrix(y_true, y_pred)
    class_report = classification_report(y_true, y_pred, target_names=[str(name) for name in label_map.values()])



    # Print results
    print("Classification Report:\n", class_report)
    print("Confusion Matrix:\n", conf_matrix)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"F1 Score: {f1:.4f}")




# resnet-18

In [None]:
# Instantiate model
model = m.IsCarRealModel_resnet18(pretrained=True)
model = model.to(device)

# Load best model (map to correct device)
model.load_state_dict(torch.load('best_car_real_model_resnet18.pth', map_location=device))
model.to(device)

#Evalauate
evaluate_model_predictions(df_to_evaluate, model, device, predict_image, label_map)




Evaluating Images:   0%|          | 0/49646 [00:00<?, ?it/s]

Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.93      0.91     29723
           1       0.89      0.85      0.87     19923

    accuracy                           0.90     49646
   macro avg       0.89      0.89      0.89     49646
weighted avg       0.90      0.90      0.90     49646

Confusion Matrix:
 [[27544  2179]
 [ 3014 16909]]
Accuracy: 0.8954
Precision: 0.9014
Recall: 0.9267
Specificity: 0.8487
F1 Score: 0.9139


# resnet-50

In [None]:
# Instantiate model
model = m.IsCarRealModel_resnet50(pretrained=True)
model = model.to(device)

# Load best model (map to correct device)
model.load_state_dict(torch.load('best_car_real_model_resnet50.pth', map_location=device))
model.to(device)

#Evalauate
evaluate_model_predictions(df_to_evaluate, model, device, predict_image, label_map)



Evaluating Images:   0%|          | 0/49646 [00:00<?, ?it/s]

Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.96      0.89     29723
           1       0.93      0.69      0.79     19923

    accuracy                           0.85     49646
   macro avg       0.87      0.83      0.84     49646
weighted avg       0.86      0.85      0.85     49646

Confusion Matrix:
 [[28650  1073]
 [ 6196 13727]]
Accuracy: 0.8536
Precision: 0.8222
Recall: 0.9639
Specificity: 0.6890
F1 Score: 0.8874


# resnet-101

In [None]:
# Instantiate model
model = m.IsCarRealModel_resnet101(pretrained=True)
model = model.to(device)

# Load best model (map to correct device)
model.load_state_dict(torch.load('best_car_real_model_resnet101.pth', map_location=device))
model.to(device)

#Evalauate
evaluate_model_predictions(df_to_evaluate, model, device, predict_image, label_map)



Evaluating Images:   0%|          | 0/49646 [00:00<?, ?it/s]

Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.92      0.92     29723
           1       0.88      0.87      0.88     19923

    accuracy                           0.90     49646
   macro avg       0.90      0.90      0.90     49646
weighted avg       0.90      0.90      0.90     49646

Confusion Matrix:
 [[27354  2369]
 [ 2516 17407]]
Accuracy: 0.9016
Precision: 0.9158
Recall: 0.9203
Specificity: 0.8737
F1 Score: 0.9180


# resnet-152

In [None]:
# Instantiate model
model = m.IsCarRealModel_resnet152(pretrained=True)
model = model.to(device)

# Load best model (map to correct device)
model.load_state_dict(torch.load('best_car_real_model_resnet152.pth', map_location=device))
model.to(device)

#Evalauate
evaluate_model_predictions(df_to_evaluate, model, device, predict_image, label_map)



Evaluating Images:   0%|          | 0/49646 [00:00<?, ?it/s]

Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.93      0.92     29723
           1       0.89      0.87      0.88     19923

    accuracy                           0.91     49646
   macro avg       0.91      0.90      0.90     49646
weighted avg       0.91      0.91      0.91     49646

Confusion Matrix:
 [[27657  2066]
 [ 2517 17406]]
Accuracy: 0.9077
Precision: 0.9166
Recall: 0.9305
Specificity: 0.8737
F1 Score: 0.9235


# mobilenet_v2

In [None]:
# Instantiate model
model = m.IsCarRealModel_mobilenet_v2(pretrained=True)
model = model.to(device)

# Load best model (map to correct device)
model.load_state_dict(torch.load('best_car_real_model_.mobilenet_v2.pth', map_location=device))
model.to(device)

#Evalauate
evaluate_model_predictions(df_to_evaluate, model, device, predict_image, label_map)



Evaluating Images:   0%|          | 0/49646 [00:00<?, ?it/s]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.87      0.89     29723
           1       0.81      0.87      0.84     19923

    accuracy                           0.87     49646
   macro avg       0.86      0.87      0.86     49646
weighted avg       0.87      0.87      0.87     49646

Confusion Matrix:
 [[25744  3979]
 [ 2610 17313]]
Accuracy: 0.8673
Precision: 0.9079
Recall: 0.8661
Specificity: 0.8690
F1 Score: 0.8865


# mobilenet_v3_large

In [None]:
# Instantiate model
model = m.IsCarRealModel_mobilenet_v3_large(pretrained=True)
model = model.to(device)

# Load best model (map to correct device)
model.load_state_dict(torch.load('best_car_real_model_mobilenet_v3_large.pth', map_location=device))
model.to(device)

#Evalauate
evaluate_model_predictions(df_to_evaluate, model, device, predict_image, label_map)

Evaluating Images:   0%|          | 0/49646 [00:00<?, ?it/s]

Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.94      0.93     29723
           1       0.91      0.87      0.89     19923

    accuracy                           0.91     49646
   macro avg       0.91      0.91      0.91     49646
weighted avg       0.91      0.91      0.91     49646

Confusion Matrix:
 [[27990  1733]
 [ 2507 17416]]
Accuracy: 0.9146
Precision: 0.9178
Recall: 0.9417
Specificity: 0.8742
F1 Score: 0.9296


# vgg16

In [None]:
# Instantiate model
model = m.IsCarRealModel_vgg16(pretrained=True)
model = model.to(device)

# Load best model (map to correct device)
model.load_state_dict(torch.load('best_car_real_model_vgg16.pth', map_location=device))
model.to(device)

#Evalauate
evaluate_model_predictions(df_to_evaluate, model, device, predict_image, label_map)



Evaluating Images:   0%|          | 0/49646 [00:00<?, ?it/s]

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.92      0.90     29723
           1       0.87      0.83      0.85     19923

    accuracy                           0.88     49646
   macro avg       0.88      0.87      0.88     49646
weighted avg       0.88      0.88      0.88     49646

Confusion Matrix:
 [[27266  2457]
 [ 3428 16495]]
Accuracy: 0.8815
Precision: 0.8883
Recall: 0.9173
Specificity: 0.8279
F1 Score: 0.9026


# vgg19

In [None]:
# Instantiate model
model = m.IsCarRealModel_vgg19(pretrained=True)
model = model.to(device)

# Load best model (map to correct device)
model.load_state_dict(torch.load('best_car_real_model_vgg19.pth', map_location=device))
model.to(device)

#Evalauate
evaluate_model_predictions(df_to_evaluate, model, device, predict_image, label_map)



Evaluating Images:   0%|          | 0/49646 [00:00<?, ?it/s]

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.94      0.91     29723
           1       0.90      0.81      0.85     19923

    accuracy                           0.89     49646
   macro avg       0.89      0.87      0.88     49646
weighted avg       0.89      0.89      0.89     49646

Confusion Matrix:
 [[27917  1806]
 [ 3778 16145]]
Accuracy: 0.8875
Precision: 0.8808
Recall: 0.9392
Specificity: 0.8104
F1 Score: 0.9091
