# **Import all necessary libraries**

In [1]:
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchmetrics
from sklearn.model_selection import train_test_split
from PIL import Image
import os
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import *
# from tqdm.notebook import tqdm_notebook as tqdm
from transformers import CLIPModel, CLIPProcessor
from torchvision import transforms
import random
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
import torchvision.models as models
from safetensors.torch import load_file
import json
import random

import warnings
warnings.filterwarnings("ignore")

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# **Import required models**

# ***1. Densenet***

In [2]:
class EarlyStopping:
    def __init__(self, model, metric_name="loss", mode="min"):
        self.model = model
        self.metric_name = metric_name
        self.mode = mode
        self.counter = 0
        self.best_metric_value = float("inf") if mode == "min" else 0.0

    def __call__(self, metrics, last_epoch=False):
        metric_value = metrics[self.metric_name]
        delta = metric_value - self.best_metric_value
        improvement = delta > 0 if self.mode == "max" else delta < 0

        if improvement:
            self.counter = 0
            self.best_metric_value = metric_value

            self.model.save("_checkpoint")
        else:
            self.counter += 1

        should_stop = self.counter >= PATIENCE

        if should_stop or last_epoch:
            self.model.load("_checkpoint")

        return should_stop


class CnnBase(nn.Module):
    def __init__(self):
        super().__init__()

        self.initialize()

        self.optimizer = torch.optim.Adam(self.parameters())
        self.loss_function = nn.BCELoss()

        self.to(device)

    def file_name(self, suffix=""):
        return f"{OUTPUT_DIR}/{self.name}{suffix}.pt"

    def get_metrics(self, split, desc_prefix=""):
        with torch.no_grad() if split == "test" else nullcontext():
            if split == "train":
                self.train()
            elif split == "test":
                self.eval()

            desc = {split: f"{desc_prefix}{split.title()}ing" for split in DATA_SPLITS}

            total_loss, total_items = 0.0, 0
            tp, tn, fp, fn = 0, 0, 0, 0

            for images, labels in (
                tqdm(
                    data_loaders[split],
                    desc=desc[split],
                    file=sys.stdout,
                )
                if VERBOSE
                else data_loaders[split]
            ):
                images = images.to(device)
                labels = labels.to(device).unsqueeze(1)
                items = images.shape[0]

                if split == "train":
                    self.optimizer.zero_grad()

                outputs = self(images)
                predictions = (outputs >= 0.5).float()
                loss = self.loss_function(outputs, labels.float())

                if split == "train":
                    loss.backward()
                    self.optimizer.step()

                total_loss += loss.item() * items
                total_items += items

                tp += ((predictions == 1) & (labels == 1)).sum().item()
                tn += ((predictions == 0) & (labels == 0)).sum().item()
                fp += ((predictions == 1) & (labels == 0)).sum().item()
                fn += ((predictions == 0) & (labels == 1)).sum().item()

            loss = total_loss / total_items
            accuracy = (tp + tn) / (tp + tn + fp + fn)
            precision = tp / (tp + fp)
            recall = tp / (tp + fn)
            f1_score = 2 * (precision * recall) / (precision + recall)

            confusion_matrix = torch.tensor(
                [
                    [tn, fp],
                    [fn, tp],
                ]
            ).numpy()

            return {
                "loss": loss,
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
                "f1_score": f1_score,
                "confusion_matrix": confusion_matrix,
            }

    def fit(self):
        early_stopping = EarlyStopping(self)
        history = {}

        for epoch in range(EPOCHS):
            print(f"Epoch {epoch + 1}/{EPOCHS}{':' if VERBOSE else '...'}")

            metrics = {
                split: self.get_metrics(split, desc_prefix="  ")
                for split in DATA_SPLITS
            }

            self.update_history(history, metrics)

            if VERBOSE:
                self.print_results(metrics)

            if early_stopping(metrics["test"], last_epoch=epoch == (EPOCHS - 1)):
                break

        return self.get_metrics(split="test"), history

    def save(self, suffix=""):
        torch.save(
            {
                "model": self.state_dict(),
                "optimizer": self.optimizer.state_dict(),
            },
            self.file_name(suffix),
        )

    def load(self, suffix=""):
        checkpoint = torch.load(self.file_name(suffix))

        self.load_state_dict(checkpoint["model"])
        self.optimizer.load_state_dict(checkpoint["optimizer"])

    def plot_am(self):
        print("\nAblationCAM:")

        tqdm.__init__ = partialmethod(tqdm.__init__, disable=True)

        am = AblationCAM(model=self, target_layers=self.am_target_layers)

        def image_mapper(image):
            grayscale_am = am(
                input_tensor=image.unsqueeze(0),
                targets=[ClassifierOutputTarget(0)],
            )[0]

            return show_cam_on_image(
                image.permute(1, 2, 0).numpy(),
                grayscale_am,
                use_rgb=True,
            )

        show_batches("test", image_mapper, show_label=False)

        tqdm.__init__ = partialmethod(tqdm.__init__, disable=False)

    def run_routine(self):
        print(f"Total Parameters: {self.total_params}\n")
        
        final_metrics, history = self.fit()

        self.save()

        print("\nFinal Metrics:")
        pp(final_metrics)

        print("\nConfusion Matrix:")
        
        with plt.style.context(["science", "ieee", "no-latex"]):
            cm_display = ConfusionMatrixDisplay(
                final_metrics["confusion_matrix"],
                display_labels=LABELS,
            )

            cm_display.plot(cmap=plt.cm.Blues)

            self.plot_history(history)

        self.plot_am()

    def plot_history(self, history):
        history = pd.DataFrame(history)

        for i, metric in enumerate(
            ["loss", "accuracy", "precision", "recall", "f1_score"]
        ):
            columns = [f"{split}_{metric}" for split in DATA_SPLITS]

            plt.figure()
            plt.xlabel("Epoch")
            plt.ylabel(metric.title())

            for column in columns:
                plt.plot(history.index, history[column], label=column.split("_")[0].title())

            plt.legend(loc="best")
            plt.show()

    @staticmethod
    def update_history(history, metrics):
        for split in DATA_SPLITS:
            for key, value in metrics[split].items():
                if not isinstance(value, float):
                    continue

                key = f"{split}_{key}"

                if key not in history:
                    history[key] = []

                history[key].append(value)

    @staticmethod
    def print_results(metrics):
        results = f"  Results:"

        for split in DATA_SPLITS:
            results += f"\n    {split.title()}:"

            for key, value in metrics[split].items():
                if isinstance(value, float):
                    results += f"\n      {key}: {value:.4f}"

        print(results)
        
    @property
    def total_params(self):
        return sum(p.numel() for p in self.parameters())



class CifakeNet(CnnBase):
    def initialize(self):
        self.name = "cifakenet"
        densenet = models.densenet121(pretrained=True)
        self.features = densenet.features
        self.mlp_head = nn.Sequential(
            nn.Linear(densenet.classifier.in_features, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid(),
        )
        self.am_target_layers = [self.features]

    def forward(self, x):
        
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.mlp_head(x)

        return x


In [3]:
dense_model = CifakeNet()
dense_model.load_state_dict(torch.load("/kaggle/input/densenet-without-v2/_checkpoint (2).pth", map_location = torch.device('cpu')))
dense_model = dense_model.to(device)
print(f"The parameters of dense_model are on: {next(dense_model.parameters()).device}")

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 134MB/s] 


The parameters of dense_model are on: cpu


# ***2. ViT***

In [4]:
from transformers import ViTImageProcessor
from transformers import ViTForImageClassification
model_str = 'WinKawaks/vit-tiny-patch16-224'
processor = ViTImageProcessor.from_pretrained(model_str)

vit_model = ViTForImageClassification.from_pretrained(model_str, num_labels=2,ignore_mismatched_sizes=True)

vit_model.load_state_dict(torch.load("/kaggle/input/vit-tiny/vit tiny.pt", map_location = torch.device('cpu')))
vit_model = vit_model.to(device)
print(f"The parameters of vit_model are on: {next(vit_model.parameters()).device}")

preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/22.9M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at WinKawaks/vit-tiny-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 192]) in the checkpoint and torch.Size([2, 192]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


The parameters of vit_model are on: cpu


# **Define dataset classes**

In [5]:
class dataset(Dataset):
    def __init__(self, li, transform, train):
        super().__init__()
        self.li = li
        self.transform = transform
        self.train = train

    def __len__(self):
        return len(self.li)

    def __getitem__(self, idx):
        img = self.li[idx][0]
        # img_arr = np.array(self.li[idx][0])
        y = self.li[idx][1]
        if self.train:
            try:
                img = self.transform(img)
            except:
                img = self.transform(np.array(img))
        return img, y

# **Tranformations**

In [6]:
#transformation for densenet
transform_dense = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor()
])

#Transformation for ViT
image_mean, image_std = processor.image_mean, processor.image_std
size = processor.size["height"]
print(f"Size of the input image that ViT processor takes: {size}x{size}")

transform_vit = transforms.Compose([
    transforms.Resize((size, size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=image_mean, std=image_std)
])

Size of the input image that ViT processor takes: 224x224


# **Write all necessary function definitions**

In [7]:
#This is the function to extract images and their respective indices in the format given in the adobe submission guideline.
def extract_test_data(path, l, transform):
    li = []
    for img_path in tqdm(l):
        idx = int(img_path.split(".")[0])
        img = Image.open(os.path.join(path, img_path))
        img = img.convert("RGB")
        li.append([img, idx])

    data = dataset(li, transform, True)
    return DataLoader(data, batch_size=32, shuffle=False, num_workers=os.cpu_count()), li

In [8]:
def dense_model_infer(dataload, prob=False):
    label_dense = []
    pred_dense = []
    dense_model.eval()
    with torch.inference_mode():
        for img, y in tqdm(dataload):
            label_dense.append(y)
            img=img.to(device)
            if prob:
                y_pred = (dense_model(img)).cpu()
            else:
                y_pred = (torch.round(dense_model(img))).cpu()
            pred_dense.append(y_pred)
    return pred_dense, label_dense

In [9]:
def vit_model_infer(dataload, prob=False):
    label_vit = []
    pred_vit = []
    with torch.inference_mode():
        for img, y in tqdm(dataload):
            label_vit.append(y)
            img=img.to(device)
            if prob:
                y_pred = vit_model(img)[0][:,0].cpu().detach().numpy()
            else:
                y_pred = np.argmin(vit_model(img)[0].cpu().detach().numpy(), axis=1)
            pred_vit.append(y_pred)
    return pred_vit, label_vit 

In [10]:
def weighted_soft_ensemble(predictions, weights):
    """
    Compute the weighted soft ensemble for binary classification.
    
    Args:
        predictions (list of array-like): A list where each element is a numpy array or a list
                                          of shape (n_samples,), representing
                                          the predicted probabilities for the positive class.
        weights (list of float): A list of weights corresponding to the models.
                                 The length of this list must match the length of `predictions`.
    
    Returns:
        numpy.ndarray: An array of shape (n_samples,) representing the ensembled probabilities.
    """
    # Ensure weights and predictions have the same length
    if len(predictions) != len(weights):
        raise ValueError("The number of prediction arrays must match the number of weights.")
    
    # Convert predictions to numpy arrays if they are not already
    predictions = [np.array(pred) for pred in predictions]
    
    # Normalize weights to sum to 1
    normalized_weights = np.array(weights) / np.sum(weights)
    
    # Compute the weighted sum of prediction probabilities
    ensembled_probs = np.zeros_like(predictions[0], dtype=float)
    for pred, weight in zip(predictions, normalized_weights):
        ensembled_probs += weight * pred
    
    return ensembled_probs

In [11]:
from collections import Counter
from typing import List

def hard_ensemble(predictions: List[List[int]]) -> List[int]:
    """
    Combine predictions from multiple models using hard voting.

    Args:
        predictions (List[List[int]]): A list of lists where each inner list contains predictions
                                       from one model. All inner lists must have the same length.

    Returns:
        List[int]: A single list of final predictions after applying hard voting.
    """
    if not predictions:
        raise ValueError("The predictions list cannot be empty.")

    # Check that all models provide predictions of the same length
    prediction_length = len(predictions[0])
    if not all(len(pred) == prediction_length for pred in predictions):
        raise ValueError("All prediction lists must have the same length.")

    # Transpose the predictions to group predictions for each instance
    transposed_predictions = zip(*predictions)

    # Perform majority voting for each instance
    ensemble_result = []
    for instance_predictions in transposed_predictions:
        # Count the frequency of each prediction
        vote_counts = Counter(instance_predictions)
        # Choose the most common prediction
        majority_vote = vote_counts.most_common(1)[0][0]
        ensemble_result.append(majority_vote)

    return ensemble_result

In [12]:
def flatten(li, inv=False):
    if inv:
        return [1-j for i in li for j in i]
    else:
        return [j for i in li for j in i]

In [13]:
def return_dict(pred, idx):
    li = [0 for i in range(len(pred))]
    for i, j in enumerate(pred):
        if j==1:
            li[i] = {
                "index": idx[i],
                "prediction": "fake"
            }
        if j==0:
            li[i] = {
                "index": idx[i],
                "prediction": "real"
            }
    return li

In [14]:
def or_gate(li):
    m = len(li[0])
    or_li = [0 for i in range(m)]
    for i in range(m):
        sum=0
        for l in li:
            sum += l[i]
        if sum>=1:
            or_li[i] = 1
    return or_li

# **Convert Images to JPEG format**

In [15]:
import os
from PIL import Image

# Source directory containing the PNG images
source_dir = "/kaggle/input/interiit-test/perturbed_images_32"
# Destination directory to save the converted JPG images
output_dir = "/kaggle/working/perturbed_images_32_jpg"

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

def convert_png_to_jpg(source_folder, output_folder):
    converted_count = 0  # Counter for converted images
    for root, dirs, files in os.walk(source_folder):
        # Recreate the folder structure in the output directory
        relative_path = os.path.relpath(root, source_folder)
        target_folder = os.path.join(output_folder, relative_path)
        os.makedirs(target_folder, exist_ok=True)

        # Convert PNG to JPG
        for file in files:
            if file.endswith(".png"):
                source_file = os.path.join(root, file)
                target_file = os.path.join(target_folder, file.replace(".png", ".jpg"))

                try:
                    # Open the image
                    with Image.open(source_file) as img:
                        # Convert to RGB (required for JPG format)
                        rgb_img = img.convert("RGB")
                        # Save the image as JPG
                        rgb_img.save(target_file, "JPEG")
                    print(f"Converted: {source_file} -> {target_file}")
                    converted_count += 1  # Increment the counter
                except Exception as e:
                    print(f"Error converting {source_file}: {e}")

    return converted_count

# Call the function to convert all PNG images to JPG
total_converted = convert_png_to_jpg(source_dir, output_dir)

# Print the total count of converted images
print(f"Total number of PNG images converted to JPG: {total_converted}")


Converted: /kaggle/input/interiit-test/perturbed_images_32/173.png -> /kaggle/working/perturbed_images_32_jpg/./173.jpg
Converted: /kaggle/input/interiit-test/perturbed_images_32/248.png -> /kaggle/working/perturbed_images_32_jpg/./248.jpg
Converted: /kaggle/input/interiit-test/perturbed_images_32/94.png -> /kaggle/working/perturbed_images_32_jpg/./94.jpg
Converted: /kaggle/input/interiit-test/perturbed_images_32/236.png -> /kaggle/working/perturbed_images_32_jpg/./236.jpg
Converted: /kaggle/input/interiit-test/perturbed_images_32/48.png -> /kaggle/working/perturbed_images_32_jpg/./48.jpg
Converted: /kaggle/input/interiit-test/perturbed_images_32/227.png -> /kaggle/working/perturbed_images_32_jpg/./227.jpg
Converted: /kaggle/input/interiit-test/perturbed_images_32/238.png -> /kaggle/working/perturbed_images_32_jpg/./238.jpg
Converted: /kaggle/input/interiit-test/perturbed_images_32/61.png -> /kaggle/working/perturbed_images_32_jpg/./61.jpg
Converted: /kaggle/input/interiit-test/perturb

# **Infer for Densenet and ViT**

In [16]:
import time

In [17]:
#Load Data
test_path = "/kaggle/working/perturbed_images_32_jpg"
test_li = os.listdir(test_path)
test_li.remove(".ipynb_checkpoints")
len(test_li)

300

In [18]:
#Create DataLoaders
testload_dense, test_dense_li = extract_test_data(test_path, test_li, transform_dense)
testload_vit, test_vit_li = extract_test_data(test_path, test_li, transform_vit)

100%|██████████| 300/300 [00:00<00:00, 4420.47it/s]
100%|██████████| 300/300 [00:00<00:00, 4847.37it/s]


In [26]:
#Infer of Densenet
s_t = time.time()
pred_dense, index_dense = dense_model_infer(testload_dense)

p_dense = flatten(pred_dense)
p_dense_int = [i.item() for i in p_dense]
idx_dense = flatten(index_dense)
idx_dense_int = [i.item() for i in idx_dense]
e_t = time.time()
print(f"Infer time per image: {(e_t - s_t)/len(test_dense_li)*1000} ms")

100%|██████████| 10/10 [00:01<00:00,  9.24it/s]

Infer time per image: 3.643615245819092 ms





In [20]:
print(f"No. of fake images classified by Densenet: {int(np.array(p_dense_int).sum())}")

No. of fake images classified by Densenet: 138


In [27]:
#Infer of ViT
s_t = time.time()
pred_vit, index_vit = vit_model_infer(testload_vit)

p_vit = flatten(pred_vit)
p_vit_int = [i.item() for i in p_vit]
idx_vit = flatten(index_vit)
idx_vit_int = [i.item() for i in idx_vit]
e_t = time.time()
print(f"Infer time per image: {(e_t - s_t)/len(test_vit_li)*1000} ms")

100%|██████████| 10/10 [00:09<00:00,  1.07it/s]

Infer time per image: 31.324137846628826 ms





In [28]:
print(f"No. of fake images classified by ViT: {int(np.array(p_vit_int).sum())}")

No. of fake images classified by ViT: 133


In [29]:
idx_dense_int == idx_vit_int

True

# ***3. PATCHCRAFT***

In [30]:
# Set seed for NumPy
np.random.seed(42)

# Set seed for Python's random module
random.seed(42)

# Set seed for PyTorch
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.cuda.manual_seed_all(42)  # For multi-GPU setups

# Ensure deterministic behavior in PyTorch
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Set environment variable for deterministic behavior
os.environ['PYTHONHASHSEED'] = str(42)

In [31]:
# Define the original filter
filters = [
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 1, 0, 0],
    [0, 0, -1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 1, 0, 0, 0],
    [0, 0, -1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 1, -1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, -1, 0, 0],
    [0, 1, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, -1, 0, 0],
    [0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, -1, 0, 0],
    [0, 0, 0, 1, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, -1, 1, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 0, 1, 0],
    [0, 0, -1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, -1, 0, 0],
    [0, 0, 3, 0, 0],
    [0, 0, -3, 0, 0],
    [0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [-1, 0, 0, 0, 0],
    [0, 3, 0, 0, 0],
    [0, 0, -3, 0, 0],
    [0, 0, 0, 1, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [-1, 3, -3, 1, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 0, 1, 0],
    [0, 0, -3, 0, 0],
    [0, 3, 0, 0, 0],
    [-1, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 1, 0, 0],
    [0, 0, -3, 0, 0],
    [0, 0, 3, 0, 0],
    [0, 0, -1, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 1, 0, 0, 0],
    [0, 0, -3, 0, 0],
    [0, 0, 0, 3, 0],
    [0, 0, 0, 0, -1]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 1, -3, 3, -1],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, -1],
    [0, 0, 0, 3, 0],
    [0, 0, -3, 0, 0],
    [0, 1, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 1, 0, 0],
    [0, 0, -2, 0, 0],
    [0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 1, 0, 0, 0],
    [0, 0, -2, 0, 0],
    [0, 0, 0, 1, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 1, -2, 1, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 0, 1, 0],
    [0, 0, -2, 0, 0],
    [0, 1, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, -1, 2, -1, 0],
    [0, 2, -4, 2, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, -1, 2, 0, 0],
    [0, 2, -4, 0, 0],
    [0, -1, 2, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0 ,0, 0, 0],
    [0, 2, -4, 2, 0],
    [0, -1, 2, -1, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 2, -1, 0],
    [0, 0, -4, 2, 0],
    [0, 0, 2, -1, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [-1, 2, -2, 2, -1],
    [2, -6, 8, -6, 2],
    [-2, 8, -12, 8, -2],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [-1, 2, -2, 0, 0],
    [2, -6, 8, 0, 0],
    [-2, 8, -12, 0, 0],
    [2, -6, 8, 0, 0],
    [-1, 2, -2, 0, 0]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [-2, 8, -12, 8, -2],
    [2, -6, 8, -6, 2],
    [-1, 2, -2, 2, -1]]),
    np.array([
    [0, 0, -2, 2, -1],
    [0, 0, 8, -6, 2],
    [0, 0, -12, 8, -2],
    [0, 0, 8, -6, 2],
    [0, 0, -2, 2, -1]]),
    np.array([
    [0, 0, 0, 0, 0],
    [0, -1, 2, -1, 0],
    [0, 2, -4, 2, 0],
    [0, -1, 2, -1, 0],
    [0, 0, 0, 0, 0]]),
    np.array([
    [-1, 2, -2, 2, -1],
    [2, -6, 8, -6, 2],
    [-2, 8, -12, 8, -2],
    [2, -6, 8, -6, 2],
    [-1, 2, -2, 2, -1]])
]

In [32]:
def normal_patches(image, patch_size):
    height, width = image.shape[:2]
    patches = [
        image[i:i+patch_size, j:j+patch_size]
        for i in range(0, height, patch_size)
        for j in range(0, width, patch_size)
    ]
    return patches

In [33]:
def reconstruct(patches):
    row_patches = np.concatenate([patch.reshape(-1, 3) for patch in patches], axis=0)
    combined_image = row_patches.reshape(32, 16, 3)
    return combined_image

In [34]:
def texture_diversity(image):
    M = image.shape[0]
    ldiv = 0

    for channel in range(3):
        patch = image[:, :, channel]
        ldiv += np.abs(patch[:, :-1] - patch[:, 1:]).sum()
        ldiv += np.abs(patch[:-1, :] - patch[1:, :]).sum()
        ldiv += np.abs(patch[:-1, :-1] - patch[1:, 1:]).sum()
        ldiv += np.abs(patch[1:, :-1] - patch[:-1, 1:]).sum()

    return ldiv

In [35]:
def plot_images(images):
    num_images = len(images)  
    plt.figure(figsize=(15, 5))
    
    for i, image in enumerate(images): 
        plt.subplot(1, num_images, i+1)
        plt.imshow(image) 
        plt.axis('off')
    
    plt.show

In [36]:
def smash_reconstruction(image, patch_size=8, rich_ratio=0.5):
    
    patches = normal_patches(image, patch_size)
    patches.sort(key=texture_diversity, reverse=True)
    
    # Split patches into rich and poor texture based on sorted texture diversity
    num_rich = int(len(patches) * rich_ratio)
    rich_texture = [patches[i] for i in range(num_rich)]
    poor_texture = [patches[i] for i in range(num_rich, len(patches))]
    
    rich_image = reconstruct(rich_texture)
    poor_image = reconstruct(poor_texture)
    
    return rich_image, poor_image

In [37]:
def high_pass(image):

    filtered_images = []
    for kernel in filters:
        b_channel, g_channel, r_channel = cv2.split(image)
        
        b_filtered = cv2.filter2D(b_channel, -1, kernel)
        g_filtered = cv2.filter2D(g_channel, -1, kernel)
        r_filtered = cv2.filter2D(r_channel, -1, kernel)
        
        filtered_image = cv2.merge([b_filtered, g_filtered, r_filtered])
        filtered_images.append(filtered_image)

    return filtered_images

In [38]:
def concat_high_pass(filtered_images):
    return np.concatenate(np.array(filtered_images), axis=-1)

In [39]:
def plot_images(image, filtered_images):
    # Set up the figure for a row of subplots
    num_images = len(filtered_images) + 1  # Original image + filtered images
    plt.figure(figsize=(15, 5))
    
    # Plot the original image
    plt.subplot(1, num_images, 1)
    plt.title("Original")
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) 
    plt.axis('off')
    
    # Plot each filtered image in a row
    for i, filtered_image in enumerate(filtered_images, start=2):
        plt.subplot(1, num_images, i)
        plt.imshow(cv2.cvtColor(filtered_image, cv2.COLOR_BGR2RGB)) 
        plt.axis('off')

    plt.show()

In [46]:
test_path = "/kaggle/working/perturbed_images_32_jpg"
test_li = [(os.path.join(test_path, filename), int(filename.split(".")[0])) for filename in os.listdir(test_path) if filename.endswith(('.jpg', '.png', '.jpeg'))]

In [47]:
class eval_dataset(Dataset):
    def __init__(self, li):
        super().__init__()
        self.li = li
    
    def __len__(self):
        return len(self.li)
    
    def __getitem__(self, index):
        image_path, idx = self.li[index]
        img = cv2.imread(image_path)
        rich_img, poor_img = smash_reconstruction(img, patch_size=16)
        rich = torch.from_numpy(concat_high_pass(high_pass(rich_img)))
        poor = torch.from_numpy(concat_high_pass(high_pass(poor_img)))
        
        return{
            'rich' : rich.permute(2, 0, 1),
            'poor' : poor.permute(2, 0, 1),
            'index': idx
        }
inter_iit_test = eval_dataset(test_li)
eval_load = DataLoader(inter_iit_test,shuffle=False, batch_size=32)

In [48]:
batch = next(iter(eval_load))

In [49]:
class Residual(nn.Module):
    def __init__(self):
        super().__init__()
        self.poor_cnn = nn.Sequential(
            nn.Conv2d(90, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.Hardtanh()
        )
        self.rich_cnn = nn.Sequential(
            nn.Conv2d(90, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.Hardtanh()
        )
    
    def forward(self, rich, poor):
        rich = rich.float()
        poor = poor.float()
        rich = self.rich_cnn(rich)
        poor = self.poor_cnn(poor)
        residual = rich - poor
        return residual
        

In [50]:
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Sequential layers for the model
        self.features = nn.Sequential(
            # Convolutional Block 1
            nn.Conv2d(64, 32, kernel_size=3, padding=1),  # Input channels = 64, Output channels = 32
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            
            # Average Pooling
            nn.AvgPool2d(kernel_size=2),

            # Convolutional Block 2
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            
            # Average Pooling
            nn.AvgPool2d(kernel_size=2),

            # Convolutional Block 3
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            
            # Average Pooling
            nn.AvgPool2d(kernel_size=2),

            # Convolutional Block 4
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            
            nn.MaxPool2d(kernel_size=2)
        )
        
        # Fully Connected Layer
        self.fc = nn.Linear(64, 2)
        
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x


In [51]:
residual = Residual().to(device)
classifier = Classifier().to(device)

In [52]:
dict = torch.load('/kaggle/input/patchcraft/patchcraft_sd.pth', map_location = torch.device('cpu'))
dict.keys()

dict_keys(['residual_state_dict', 'classifier_state_dict'])

In [53]:
residual.load_state_dict(dict['residual_state_dict'])
classifier.load_state_dict(dict['classifier_state_dict'])

<All keys matched successfully>

In [54]:
def eval(test_loader):
    residual.eval()
    classifier.eval()

    pred = []
    label = []
    with torch.no_grad():
        for batch in tqdm(test_loader):
            label.append(batch['index'])
            rich = batch['rich'].to(device)
            poor = batch['poor'].to(device)

            residual_output = residual(rich, poor)
            predictions = classifier(residual_output).detach().cpu()
            pred.append(predictions)

    pred = torch.argmax(torch.nn.functional.softmax(torch.cat(pred,axis =0),dim = 1),dim=1)
    idx = [j for i in label for j in i]
    return pred, idx

# **Infer of patchcraft**

In [55]:
s_t = time.time()
preds, idx_pc = eval(eval_load)
e_t = time.time()
print(f"Infer time per image: {((e_t - s_t)/300)*1000} ms")

100%|██████████| 10/10 [00:01<00:00,  7.94it/s]

Infer time per image: 4.227723280588786 ms





In [56]:
p_patchcraft = list(preds.numpy())
idx_pc_int = [i.item() for i in idx_pc]

In [57]:
print(f"No. of fake images classified by patchcraft: {np.array(p_patchcraft).sum()}")

No. of fake images classified by patchcraft: 156


In [58]:
idx_pc_int == idx_dense_int

True

In [59]:
final_pred = hard_ensemble([p_patchcraft, p_dense_int, p_vit_int])

In [61]:
print(f"No. of fake images classified by the Ensemble: {int(np.array(final_pred).sum())}")

No. of fake images classified by the Ensemble: 138


In [62]:
or_final_pred = or_gate([p_patchcraft, p_dense_int, p_vit_int])
print(f"No. of fake images classified by the or gate ensemble: {int(np.array(or_final_pred).sum())}")

No. of fake images classified by the or gate ensemble: 212


In [63]:
pred_dict = return_dict(or_final_pred, idx_dense_int)
final_pred_dict = sorted(pred_dict, key=lambda x: x['index'])

print(final_pred_dict)

[{'index': 1, 'prediction': 'fake'}, {'index': 2, 'prediction': 'fake'}, {'index': 3, 'prediction': 'real'}, {'index': 4, 'prediction': 'real'}, {'index': 5, 'prediction': 'fake'}, {'index': 6, 'prediction': 'real'}, {'index': 7, 'prediction': 'fake'}, {'index': 8, 'prediction': 'fake'}, {'index': 9, 'prediction': 'real'}, {'index': 10, 'prediction': 'real'}, {'index': 11, 'prediction': 'fake'}, {'index': 12, 'prediction': 'fake'}, {'index': 13, 'prediction': 'fake'}, {'index': 14, 'prediction': 'fake'}, {'index': 15, 'prediction': 'real'}, {'index': 16, 'prediction': 'real'}, {'index': 17, 'prediction': 'real'}, {'index': 18, 'prediction': 'fake'}, {'index': 19, 'prediction': 'fake'}, {'index': 20, 'prediction': 'fake'}, {'index': 21, 'prediction': 'fake'}, {'index': 22, 'prediction': 'fake'}, {'index': 23, 'prediction': 'fake'}, {'index': 24, 'prediction': 'fake'}, {'index': 25, 'prediction': 'fake'}, {'index': 26, 'prediction': 'fake'}, {'index': 27, 'prediction': 'fake'}, {'index':

In [64]:
# Path to save the JSON file
output_path = '/kaggle/working/53_task1.json'

# Save the list of dictionaries as a JSON file
with open(output_path, 'w') as json_file:
    json.dump(final_pred_dict, json_file, indent=1)

print(f"File saved to: {output_path}")

File saved to: /kaggle/working/53_task1.json
