# Adding a new dataset
## CocoGlide

We will create the wrapper for the CocoGlide dataset using Photoholmes. First, we will download the dataset and unzip it.

In [None]:
!wget https://www.grip.unina.it/download/prog/TruFor/CocoGlide.zip  -q
!unzip CocoGlide.zip -d dataset/

In [None]:
import glob
import os
from photoholmes.datasets.base import BaseDataset
from torch import Tensor

class CocoGlide(BaseDataset):
    IMAGE_EXTENSION = ".png"
    MASK_EXTENSION = ".png"

    def _get_paths(self, dataset_path: str, tampered_only: bool = False):
        image_paths = glob.glob(os.path.join(dataset_path, "fake", f"*{self.IMAGE_EXTENSION}"))
        if not self.tampered_only:
            image_paths += glob.glob(os.path.join(dataset_path, "real", f"*{self.IMAGE_EXTENSION}"))
        print(len( image_paths ))

        mask_paths = [self._get_mask_path(image_path) for image_path in image_paths]
        
        return image_paths, mask_paths
        

    def _get_mask_path(self, image_path: str) -> str:
        if "glide" not in image_path: # fake images all start with "glide"
            return None
        
        image_id = image_path.split("_")[-2]
        return glob.glob(os.path.join(self.dataset_path, "mask", f"*_{image_id}_*{self.MASK_EXTENSION}"))[0]

    def _binarize_mask(self, mask_image: Tensor) -> Tensor:
        return (mask_image == 255).float()


dataset = CocoGlide('dataset/')

In [None]:
from photoholmes.methods.dq import DQ, dq_preprocessing
from photoholmes.benchmark import Benchmark
from photoholmes.metrics import AUROC

metrics = [AUROC()]
method = DQ()
benchmark = Benchmark()
dataset = CocoGlide('dataset/', preprocessing_pipeline=dq_preprocessing)

benchmark.run(method=method, dataset=dataset, metrics=metrics)

