In [None]:
import os
import pandas as pd

import torch
from torch.utils.data import DataLoader

from torchvision.io import read_image

import torchvision
import torchvision.models as models
import torchvision.transforms as transforms

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, data, targets, transform=None, target_transform=None):
        self.imgs = data
        self.targets = targets
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img = self.imgs[idx]
        if isinstance(img, str):
          img = read_image(img)
        label = self.targets[idx]
        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            label = self.target_transform(label)
        return img, label

In [None]:
data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)

# simclr DA pipeline
s=1
color_jitter = transforms.ColorJitter(0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s)
transform = transforms.Compose([transforms.RandomResizedCrop(size=size),
                                  transforms.RandomHorizontalFlip(),
                                  transforms.RandomApply([color_jitter], p=0.8),
                                  transforms.RandomGrayscale(p=0.2),
                                  GaussianBlur(kernel_size=int(0.1 * size)),
                                  transforms.ToTensor()])

# create training set from CustomDataset
trainset = ...

In [None]:
dataloader = DataLoader(trainset, batch_size=64, shuffle=True)

In [None]:
# Display image and label.
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
label = train_labels[0]
plt.imshow(img, cmap="gray")
plt.show()
print(f"Label: {label}")


# use this
notransform = transforms.Compose([transforms.ToTensor()])
non_augmented = torchvision.datasets.CIFAR10(root='./data', train=True, transform=notransform)


from torchvision.utils import make_grid
imgs = torch.stack((*[non_augmented[i][0] for i in range(10)],
                    *[trainset[i][0] for i in range(10)],
                    *[trainset[i][1] for i in range(10)]))
grid = make_grid(imgs, nrow=10)

transforms.ToPILImage()(grid)

## Exercise 1
Create the custom training set using the cifar10 images and targets

## Exercise 2
Modify the custom dataset to return two different views of an input image and create a train loop as the following example

In [None]:
dataloader = DataLoader(trainset, batch_size=64, shuffle=True)

for idx, data in enumerate(dataloader):
    images, targets = data
    print(images.shape)
    print(targets.shape)
    if idx == 3:
        break

## Exercise 3

Plot a positive pair and a negative pair of a mini-batch of sample