## Custom Image Data: DataLoader, Transforms

Return `image` and `label` from a given `index`

In [1]:
#import os
import numpy as np
import pandas as pd
from PIL import Image
from skimage import io

import torch
from torch.utils.data import Dataset,DataLoader,random_split

#import torchvision
import torchvision.transforms as transforms

#import torch.nn as nn
#import torch.optim as optim

### Load metadata file

In [2]:
#metadata_file = "metadata_liver_skin.csv"
#df = pd.read_csv(metadata_file)
#df

In [3]:
# Define a set of transformations to augment the data
data_transform = transforms.Compose([
        #transforms.RandomResizedCrop(size=512, scale=(0.8, 1.0)),
        #transforms.RandomRotation(degrees=15),
        #transforms.RandomHorizontalFlip(),
        transforms.Resize(256),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

In [4]:
class CancerDataLoader(Dataset):
    
    def __init__(self, metadata_file, transform=None):
        
        ## Load metadata file
        df = pd.read_csv(metadata_file)
        self.metadata = df.loc[:,['Image_name','Cancer_type']]
        print("metadata:", self.metadata)
        
        ## Create index for image and target
        self.metadata_dict = self.metadata.to_dict()
        #print("metadata_dict:", self.metadata_dict)
        
        ## list of unique class names
        self.classes = self.metadata["Cancer_type"].unique()
        print("classes:", self.classes)

        ## class to index
        self.class_to_idx = {j: i for i, j in enumerate(self.classes)}
        print("class_to_idx:", self.class_to_idx)
        
        ## image_path
        self.folder_name = "image_data"
        
        self.transform = transform
                
    def __len__(self):
        return len(self.metadata)
    
    def __getitem__(self, idx):
        ## file name without .png
        file_name0 = self.metadata_dict["Image_name"][idx]
        #print(file_name0)

        ## file name with .png
        file_name = f"/{file_name0}.png"
        #print(file_name)
        
        image_path = self.folder_name + file_name
        #print("image_path:",image_path)

        ## load image
        image = Image.open(image_path)
        #print("image.size:",image.size)
        #image = io.imread(image_path)

        ## load target
        target = self.metadata_dict["Cancer_type"][idx]
        #print("target:", target)
        
        label = self.class_to_idx[target]
        #print("label:", label)
        
        if self.transform:
            image = self.transform(image)
        
        return (image, torch.tensor(label))

In [5]:
metadata_file = "metadata_liver_skin.csv"

dataset = CancerDataLoader(metadata_file, transform=data_transform)

metadata:   Image_name Cancer_type
0        a11       liver
1        a12       liver
2        a13       liver
3        a14       liver
4        a15       liver
5        b11        skin
6        b12        skin
7        b13        skin
8        b14        skin
9        b15        skin
classes: ['liver' 'skin']
class_to_idx: {'liver': 0, 'skin': 1}


### Dataloader

In [6]:
batch_size = 2
data_loader = DataLoader(dataset=dataset,batch_size=batch_size, shuffle=True)

In [7]:
len(data_loader)

5

In [8]:
for i in data_loader:
    print(i)

[tensor([[[[ 1.9749,  1.9749,  1.9749,  ...,  1.5982,  1.7009,  1.7352],
          [ 1.9407,  1.9407,  1.9578,  ..., -0.1314,  0.2453,  0.5193],
          [ 1.9578,  1.9407,  1.9749,  ..., -0.8164, -0.3712,  0.0741],
          ...,
          [-1.0390, -0.7822, -0.4739,  ..., -0.4397, -0.4739, -0.2171],
          [-1.1760, -0.9705, -0.5082,  ..., -1.1418, -1.2274, -0.7822],
          [-1.2445, -1.0219, -0.4397,  ..., -0.4397, -0.3883, -0.3712]],

         [[ 2.1485,  2.1485,  2.1485,  ...,  1.6408,  1.7108,  1.7458],
          [ 2.0959,  2.0959,  2.0959,  ..., -0.8277, -0.4951, -0.2850],
          [ 2.0959,  2.0959,  2.0959,  ..., -1.3529, -1.0028, -0.6702],
          ...,
          [-1.4930, -1.3880, -1.1779,  ..., -0.8978, -1.1779, -0.9853],
          [-1.5630, -1.4755, -1.2129,  ..., -1.3880, -1.5280, -1.2829],
          [-1.6155, -1.4930, -1.1253,  ..., -1.0903, -0.9328, -0.9853]],

         [[ 2.3611,  2.3611,  2.3611,  ...,  1.8731,  1.9254,  2.0474],
          [ 2.3611,  2.3437, 

[tensor([[[[ 1.9235,  1.9235,  1.9235,  ...,  2.0434,  2.0434,  2.0434],
          [ 1.9235,  1.9235,  1.9235,  ...,  2.0434,  2.0092,  2.0263],
          [ 1.9235,  1.9235,  1.9235,  ...,  2.0434,  2.0434,  2.0434],
          ...,
          [-0.0629,  0.9988,  1.9235,  ..., -0.0801,  0.2111,  0.1597],
          [ 0.4166,  0.8276,  1.8379,  ...,  0.1083,  0.3481,  0.4508],
          [ 0.2796,  0.1939,  1.3755,  ...,  0.5878,  1.2214,  1.4954]],

         [[ 2.0959,  2.0959,  2.0959,  ...,  2.2185,  2.2185,  2.2185],
          [ 2.0959,  2.0959,  2.0959,  ...,  2.2185,  2.1835,  2.2185],
          [ 2.0959,  2.0959,  2.0959,  ...,  2.2185,  2.2185,  2.2185],
          ...,
          [-0.3725,  0.9055,  2.0609,  ..., -0.6176, -0.3550, -0.4076],
          [ 0.0651,  0.6254,  1.9384,  ..., -0.3901, -0.1275,  0.1352],
          [-0.1275, -0.1800,  1.4307,  ...,  0.3102,  1.0980,  1.3957]],

         [[ 2.3611,  2.3611,  2.3611,  ...,  2.4308,  2.4308,  2.4308],
          [ 2.3611,  2.3611, 

In [9]:
n_epochs = 3

total_samples = len(dataset)
n_iterations = int(total_samples/batch_size)
print(total_samples, n_iterations)

10 5


In [10]:
for epoch in range(n_epochs):
    for i, (inputs, labels) in enumerate(data_loader):
        ## forward, backward
        #if (i+1) % 5 ==0:
        print(f"epoch {epoch+1}/{n_epochs}, step {i+1}/{n_epochs}, inputs {inputs.shape}, output {labels}")

epoch 1/3, step 1/3, inputs torch.Size([2, 3, 256, 256]), output tensor([0, 1])
epoch 1/3, step 2/3, inputs torch.Size([2, 3, 256, 256]), output tensor([1, 1])
epoch 1/3, step 3/3, inputs torch.Size([2, 3, 256, 256]), output tensor([0, 0])
epoch 1/3, step 4/3, inputs torch.Size([2, 3, 256, 256]), output tensor([0, 1])
epoch 1/3, step 5/3, inputs torch.Size([2, 3, 256, 256]), output tensor([0, 1])
epoch 2/3, step 1/3, inputs torch.Size([2, 3, 256, 256]), output tensor([0, 1])
epoch 2/3, step 2/3, inputs torch.Size([2, 3, 256, 256]), output tensor([0, 0])
epoch 2/3, step 3/3, inputs torch.Size([2, 3, 256, 256]), output tensor([1, 1])
epoch 2/3, step 4/3, inputs torch.Size([2, 3, 256, 256]), output tensor([1, 0])
epoch 2/3, step 5/3, inputs torch.Size([2, 3, 256, 256]), output tensor([1, 0])
epoch 3/3, step 1/3, inputs torch.Size([2, 3, 256, 256]), output tensor([0, 0])
epoch 3/3, step 2/3, inputs torch.Size([2, 3, 256, 256]), output tensor([1, 1])
epoch 3/3, step 3/3, inputs torch.Size([

### Training and Validation datasets

In [11]:
## training and validation datasets
train_size = int(len(dataset)*0.7)
val_size = len(dataset) - train_size

train_set, val_set = random_split(dataset, [train_size, val_size])
print(len(train_set), len(val_set))

7 3


In [12]:
## dataloader
train_loader = DataLoader(dataset=train_set,batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_set,batch_size=batch_size)