<a href="https://colab.research.google.com/github/ArstmN/Pytorch_DL_FaraDars/blob/main/Custom_datasets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Data Retrieval**

In [1]:
import requests
from pathlib import Path
import zipfile

data_path = Path.cwd()
image_path = data_path / "cats_dogs_images"
url = "https://github.com/ArstmN/Pytorch_DL_FaraDars/raw/main/data/cats_dogs.zip"

if image_path.is_dir():
    print(f"{image_path} directory exist")
else:
    print(f"No {image_path} directory, making one...")
    image_path.mkdir(parents=True, exist_ok=True)

    with open(data_path / "cats_dogs.zip", "wb") as f:
        request = requests.get(url)
        print("Downloading image data")
        f.write(request.content)

    with zipfile.ZipFile(data_path / "cats_dogs.zip", "r") as zip_ref:
        print("Extracting cats and dogs images...")
        zip_ref.extractall(image_path)


No /content/cats_dogs_images directory, making one...
Downloading image data
Extracting cats and dogs images...


# **Custom Dataset**

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd 
from pathlib import Path
from skimage import io

In [14]:
class MyDataset(Dataset):
    # __init__
    def __init__(self, csv_file, image_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

    # __len__
    def __len__(self):
        return len(self.annotations)

    # __getitem__
    def __getitem__(self, index):
        image_path = Path('.').joinpath(self.image_dir, self.annotations.iloc[index, 0])
        image = io.imread(image_path)
        label = self.annotations.iloc[index, 1]

        if self.transform:
            image = self.transform(image)
        
        return image, label

In [15]:
train_data = MyDataset(csv_file='/content/cats_dogs_images/annotations.csv',
                       image_dir='/content/cats_dogs_images/cats_dogs')

In [16]:
train_dataloader = DataLoader(train_data,
                              batch_size=1,
                              shuffle=True)

In [17]:
train_feature, train_label = next(iter(train_dataloader))

In [18]:
train_feature

tensor([[[[ 91, 121,  67],
          [ 92, 123,  66],
          [ 93, 124,  67],
          ...,
          [ 43,  67,  54],
          [ 42,  66,  53],
          [ 41,  65,  52]],

         [[ 91, 121,  67],
          [ 92, 123,  66],
          [ 93, 124,  67],
          ...,
          [ 43,  67,  54],
          [ 42,  66,  53],
          [ 41,  65,  52]],

         [[ 91, 121,  67],
          [ 92, 123,  66],
          [ 93, 124,  67],
          ...,
          [ 44,  68,  55],
          [ 43,  67,  54],
          [ 42,  66,  53]],

         ...,

         [[ 90, 123,  78],
          [ 90, 123,  76],
          [ 88, 125,  74],
          ...,
          [192, 194, 145],
          [182, 191, 138],
          [172, 187, 130]],

         [[ 93, 129,  81],
          [ 94, 131,  80],
          [ 94, 131,  79],
          ...,
          [197, 194, 149],
          [187, 191, 141],
          [178, 188, 135]],

         [[ 99, 136,  85],
          [ 99, 136,  85],
          [ 99, 136,  84],
         

In [19]:
train_label

tensor([1])