In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
import scipy
import torch
import torch.nn as nn
from PIL import Image
from scipy import ndimage
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
import pytorch_lightning as pl
import flash 
from flash.image import ImageClassifier
from flash.core.data.data_module import DataModule

%matplotlib inline
np.random.seed(1)

In [2]:
arr = np.random.randn(10, 5, 3)
arr = arr.transpose(2, 0, 1)
arr.shape

(3, 10, 5)

In [3]:
# Loading the data (signs)
def get_imgs_labels(h5_file_path):
    f = h5py.File(h5_file_path, "r")
    ds_keys = [key for key in f.keys()]
    imgs = np.array(f[ds_keys[1]])    
    labels = np.array(f[ds_keys[2]])
    list_classes = np.array(f[ds_keys[0]])
    #imgs = np.transpose(imgs, (0, 3, 1, 2))
    return imgs, labels, list_classes

train_x, train_y, train_classes = get_imgs_labels("./datasets/train_signs.h5")
test_x, test_y, test_classes = get_imgs_labels("./datasets/test_signs.h5")
print(train_x.shape, train_y.shape)
print(test_x.shape, test_y.shape)

(1080, 64, 64, 3) (1080,)
(120, 64, 64, 3) (120,)


In [4]:
to_tensor = transforms.ToTensor()
img_tensor_arr = [to_tensor(img) for img in train_x]
# stack will arrange the tensors one over the other with dim=0 being the new dimension that  
# stores the number of tensors stacked. This new dimension can be placed at any index
img_tensor_arr = torch.stack(img_tensor_arr)
img_tensor_arr.shape

torch.Size([1080, 3, 64, 64])

In [5]:
# CONSTANTS

NUM_FOLDS = 5
BATCH_SIZE = 64
NUM_WORKERS = 4

In [6]:
class NpArrayImageDataset(Dataset):
    def __init__(self, img_arr, label_arr, transform):
        self.img_arr = img_arr
        self.label_arr = label_arr
        self.transform = transform

    def __len__(self):
        return len(self.label_arr)

    def __getitem__(self, index):
        if self.transform:            
            tfmd_img = self.transform(self.img_arr[index])
            # ToTensor transformation causes the RGB channel dimension to shift from index 2 to 0
            # we interchange the dimensions at index 0 and 2 to move channel dim back to index 2
        #return (tfmd_img.transpose(0,2), self.label_arr[index])
        return (tfmd_img, self.label_arr[index])

In [7]:
# for a training and label data in form of numpy arrays, return a fold_index array whose elements
# represent the fold index. The length of this fold_index array is same as length of input dataset
# and the items for which fold_index array value == cv iteration count are to be used for validation 
# in the corresponding cross validation iteration with rest of the items ( for which fold_index 
# array value != cv iteration count ) being used for training (typical ration being 80:20)
def get_skf_index(num_folds, X, y):
    skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state = 42)
    train_fold_index = np.zeros(len(y))
    for fold, (train_index, val_index) in enumerate(skf.split(X=X, y=y)):
        train_fold_index[val_index] = [fold + 1] * len(val_index)
    return train_fold_index

k_folds = get_skf_index(num_folds=NUM_FOLDS, X=train_x, y=train_y)

In [8]:
def get_imgs_mean_stddev(imgs, axis=None):    
    """Get the mean and standard deviation for images in a dataset / mini-batch
    Args:
        imgs ([2d or 3d numpy array]): images in collection (with no to_tensor transformation applied)
        axis ([tuple of ints], optional): Axis along which mean and std dev is to be calculated.
        Defaults to None.
    Returns:
        [tuple]: tuple of tensors with mean and std.dev. of the imgs
    """
    to_tensor = transforms.ToTensor()
    img_tensor_arr = [to_tensor(img) for img in train_x]
    # stack will arrange the tensors one over the other with dim=0 being the new dimension that  
    # stores the number of tensors stacked. This new dimension can be placed at any index
    img_tensor_arr = torch.stack(img_tensor_arr)
    if axis is not None:
        return torch.mean(img_tensor_arr, axis=axis), torch.std(img_tensor_arr, axis=axis)
    else:            
        return torch.mean(img_tensor_arr, axis=(0, 2, 3)), torch.std(img_tensor_arr, axis=(0,2,3))
    
train_img_mean, train_img_std = get_imgs_mean_stddev(train_x)        
print(train_img_mean, train_img_std)

tensor([0.7630, 0.7105, 0.6634]) tensor([0.1538, 0.1998, 0.2221])


In [9]:
img_transforms = transforms.Compose([transforms.ToTensor(), 
                                     transforms.Normalize(train_img_mean, train_img_std)])

# Get the train and validation data loaders for a specific fold. 
# X: numpy array of input features
# y: numpy array of target labels
# fold: fold index for which to create data loaders                                     
# kfolds: Array that marks each of the data items as belonging to a specific fold
def get_fold_ds(fold, kfolds, X, y):                         
    train_X = X[kfolds != fold]        
    train_y = y[kfolds != fold]    
    val_X = X[kfolds == fold]
    val_y = y[kfolds == fold]
    ds_train_signs = NpArrayImageDataset(train_X, train_y, transform=img_transforms)
    ds_val_signs = NpArrayImageDataset(val_X, val_y, transform=img_transforms)    
    return ds_train_signs, ds_val_signs

In [10]:
from flash.image.classification.transforms import default_transforms
from flash.core.data.transforms import ApplyToKeys
from flash.core.data.data_source import DefaultDataKeys

ds_train, ds_val = get_fold_ds(0, k_folds, train_x, train_y)

train_transform = {        
        "post_tensor_transform": ApplyToKeys(DefaultDataKeys.INPUT, transforms.Normalize(torch.tensor([0.485, 0.456, 0.406]), torch.tensor([0.229, 0.224, 0.225])))
    }

data_module = DataModule.from_datasets(
    train_dataset=ds_train,
    val_dataset=ds_val, 
    train_transform=train_transform, 
    batch_size=BATCH_SIZE
)

In [11]:
img0 = data_module.train_dataset[0]['input']
img0.shape

torch.Size([3, 64, 64])

In [12]:
from flash.image import ImageClassifier
model = ImageClassifier(backbone="resnet18", num_classes=6 )

trainer = flash.Trainer(max_epochs=1, gpus=torch.cuda.device_count())
trainer.finetune(model, datamodule=data_module, strategy="freeze")

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_warn(f"you defined a {step_name} but have no {loader_name}. Skipping {stage} loop")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type           | Params
-------------------------------------------------
0 | train_metrics | ModuleDict     | 0     
1 | val_metrics   | ModuleDict     | 0     
2 | adapter       | DefaultAdapter | 11.2 M
-------------------------------------------------
12.7 K    Trainable params
11.2 M    Non-trainable params
11.2 M    Total params
44.718    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: -1it [00:00, ?it/s]