# Deep learning

In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim

from tqdm.auto import tqdm
import numpy as np


DATA_DIR = "data/office"
OUTPUT_DIR = "output"
SOURCE_DATASET = "amazon"
VAL_SPLIT = .15
TEST_SPLIT = .1
RANDOM_SEED = 42
BATCH_SIZE = 2

np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [2]:
import sklearn.preprocessing
import imageio.v2 as imageio
from torchvision.transforms import v2

import os
from typing import Callable


def resnet_preprocessor(image: np.ndarray) -> np.ndarray:
    """
    Preprocesses an image for ResNet model.

    :param numpy.ndarray image: The input image.
    :return: Preprocessed image.
    :rtype: numpy.ndarray
    """
    preprocess = torchvision.transforms.Compose(
        [    
            v2.ToImage(),
            v2.ToDtype(torch.uint8, scale=True),  # optional, most input are already uint8 at this point
            v2.RandomResizedCrop(size=(224, 224), antialias=True),  # Or Resize(antialias=True)
            v2.ToDtype(torch.float32, scale=True),  # Normalize expects float input
            v2.ToTensor(),
            v2.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
            ),
        ]
    )

    image = preprocess(image)
    return image


def image_read_func(image_path):
    return imageio.imread(image_path, pilmode='RGB')


class ImageDataset(torch.utils.data.Dataset):
    """
    Lazily loads images from a root directory.
    Directory is assumed to be of shape "<root>/<class_name>/<instance_file>".
    Allows custom functions for reading, preprocessing each image and setting the label encodings.
    """

    def __init__(
        self,
        data_dir: str,
        parser_func: Callable = image_read_func,
        preprocessing_func: Callable[[np.ndarray], np.ndarray] = resnet_preprocessor,
        label_encoder=None,
    ):
        """
        Initializes the ImageDataset.

        :param str data_dir: Root directory containing the dataset.
        :param parser_func: Function to parse images.
        :type parser_func: Callable, optional
        :param preprocessing_func: Function to preprocess images.
        :type preprocessing_func: Callable[[numpy.ndarray], numpy.ndarray], optional
        :param label_encoder: Encoder for label encoding.
        :type label_encoder: sklearn.preprocessing.LabelEncoder or None, optional
        """
        self.parser_func = parser_func
        self.preprocessing_func = preprocessing_func
        self.label_encoder = label_encoder
        self.samples = self._load_dataset_paths(data_dir)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        image_path, label = self.samples[idx]
        image = self.parser_func(image_path)
        image = self.preprocessing_func(image)

        if not torch.is_tensor(image):
            image = torch.tensor(image)

        return image, label

    def _load_dataset_paths(self, data_dir):
        """
        Loads paths of images in the dataset.

        :param str data_dir: Root directory containing the dataset.
        :return: List of tuples containing image paths and their corresponding labels.
        :rtype: List[Tuple[str, int]]
        """
        class_names = os.listdir(data_dir)

        if self.label_encoder is None:
            self.label_encoder = sklearn.preprocessing.LabelEncoder()
            self.label_encoder.fit(class_names)

        samples = []
        for class_name in tqdm(class_names):
            class_data_dir = os.path.join(data_dir, class_name)

            for file_name in os.listdir(class_data_dir):
                samples.append(
                    (
                        os.path.join(class_data_dir, file_name),
                        self.label_encoder.transform([class_name])[0],
                    )
                )

        return samples

In [3]:
def collate_pad(batch):
    # Sort the batch by image height in descending order
    batch = sorted(batch, key=lambda x: x[0].shape[1], reverse=True)

    # Get the maximum height and width among all images in the batch
    max_height = max(img.shape[1] for img, _ in batch)
    max_width = max(img.shape[2] for img, _ in batch)

    # Pad each image to match the maximum height and width
    padded_batch = []
    for img, label in batch:
        # Calculate padding sizes
        pad_height = max_height - img.shape[1]
        pad_width = max_width - img.shape[2]

        # Pad the image
        padded_img = torch.nn.functional.pad(img, (0, pad_width, 0, pad_height))

        padded_batch.append((padded_img, label))

    # Stack images and labels into tensors
    images = torch.stack([img for img, _ in padded_batch])
    labels = torch.tensor([label for _, label in padded_batch])

    return images, labels

In [4]:
from torch.utils.data.sampler import SubsetRandomSampler


def train_val_test_loaders(
    dataset,
    batch_size: int,
    val_split_perc: float,
    test_split_perc: float,
    collate_func: Callable,
):
    dataset_size = len(dataset)

    # Create indices for the dataset
    indices = list(range(dataset_size))
    np.random.shuffle(indices)

    # Calculate split indices
    val_split = int(np.floor(val_split_perc * dataset_size))
    test_split = int(np.floor(test_split_perc * dataset_size))

    # Split indices for train, validation, and test
    train_indices = indices[val_split + test_split :]
    val_indices = indices[:val_split]
    test_indices = indices[val_split : (val_split + test_split)]

    # Create PT data samplers and loaders
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)
    test_sampler = SubsetRandomSampler(test_indices)

    train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, sampler=train_sampler, collate_fn=collate_func
    )
    val_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, sampler=val_sampler, collate_fn=collate_func
    )
    test_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, sampler=test_sampler, collate_fn=collate_func
    )

    return train_loader, val_loader, test_loader

In [5]:
source_dataset = ImageDataset(os.path.join(DATA_DIR, SOURCE_DATASET))
source_train_loader, source_val_loader, source_test_loader = train_val_test_loaders(
    source_dataset, BATCH_SIZE, VAL_SPLIT, TEST_SPLIT, collate_pad
)

  0%|          | 0/31 [00:00<?, ?it/s]

In [6]:
import pickle


def try_load_weights(model, weights_path: str):
    try:
        model.load_state_dict(torch.load(weights_path))
    except Exception as e:
        print("Cannot load proper weights: ", e)
    return model


def try_load_history(history_path):
    try:
        with open(history_path, 'rb') as handle:
            history = pickle.load(handle)
    except:
        print("No history found in path ", history_path)
        history = None

    return history

In [7]:
import torchinfo


output_path = os.path.join(OUTPUT_DIR, SOURCE_DATASET)

model = torch.hub.load(
    "pytorch/vision:v0.10.0", "resnet18", weights="DEFAULT"
).to(device)
model = try_load_weights(model, os.path.join(output_path, "model.pt"))

torchinfo.summary(model, input_size=(BATCH_SIZE, 3, 300, 300))

Using cache found in /home/dimits/.cache/torch/hub/pytorch_vision_v0.10.0


Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [2, 1000]                 --
├─Conv2d: 1-1                            [2, 64, 150, 150]         9,408
├─BatchNorm2d: 1-2                       [2, 64, 150, 150]         128
├─ReLU: 1-3                              [2, 64, 150, 150]         --
├─MaxPool2d: 1-4                         [2, 64, 75, 75]           --
├─Sequential: 1-5                        [2, 64, 75, 75]           --
│    └─BasicBlock: 2-1                   [2, 64, 75, 75]           --
│    │    └─Conv2d: 3-1                  [2, 64, 75, 75]           36,864
│    │    └─BatchNorm2d: 3-2             [2, 64, 75, 75]           128
│    │    └─ReLU: 3-3                    [2, 64, 75, 75]           --
│    │    └─Conv2d: 3-4                  [2, 64, 75, 75]           36,864
│    │    └─BatchNorm2d: 3-5             [2, 64, 75, 75]           128
│    │    └─ReLU: 3-6                    [2, 64, 75, 75]           --
│

In [8]:
import time
import pickle

# code adapted from https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html


class EpochResults:
    def __init__(self, train_loss, train_acc, val_loss, val_acc) -> None:
        self.train_loss = train_loss
        self.train_acc = train_acc
        self.val_loss = val_loss
        self.val_acc = val_acc


def train_model(
    model: nn.Module,
    criterion,
    optimizer,
    scheduler,
    device: str,
    train_dataloader: torch.utils.data.DataLoader,
    val_dataloader: torch.utils.data.DataLoader,
    output_dir: str,
    num_epochs: int = 25,
    patience: int = 1,
    previous_history: dict[str, list[float]] = None,
) -> tuple[nn.Module, dict[str, np.ndarray]]:

    dataloaders = {"train": train_dataloader, "val": val_dataloader}
    dataset_sizes = {
        "train": len(train_dataloader.dataset),
        "val": len(val_dataloader.dataset),
    }
    output_model_path = os.path.join(output_dir, "model.pt")
    output_history_path = os.path.join(output_dir, "history.pickle")

    if previous_history is None:
        history = {
            "train_loss": [],
            "train_acc": [],
            "val_loss": [],
            "val_acc": [],
        }
    else:
        history = previous_history

    since = time.time()
    torch.save(model.state_dict(), output_model_path)
    best_acc = 0.0
    # early stopping counter
    epochs_no_progress = 0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch}/{num_epochs - 1}")
        print("-" * 10)
        res = run_epoch(
            model,
            optimizer,
            criterion,
            scheduler,
            dataloaders,
            dataset_sizes,
            device,
        )
        print(
            f"Train Loss: {res.train_loss:.4f} Train Acc: {res.train_acc:.4f}\n"
            f"Val Loss: {res.val_loss:.4f} Val Acc: {res.val_acc:.4f}"
        )

        history = update_save_history(history, res, output_history_path)

        # deep copy the model
        if res.val_acc > best_acc:
            best_acc = res.val_acc
            torch.save(model.state_dict(), output_model_path)
            epochs_no_progress = 0
        else:
            epochs_no_progress += 1

        # early stopping mechanism
        if epochs_no_progress >= patience:
            break

    time_elapsed = time.time() - since
    print(f"Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
    print(f"Best val Acc: {best_acc:4f}")

    # load best model weights
    model.load_state_dict(torch.load(output_model_path))
    return model, history


def update_save_history(
    history: dict, res: EpochResults, hist_output_path: str
) -> dict:
    history["train_loss"].append(res.train_acc)
    history["train_acc"].append(res.train_acc)
    history["val_loss"].append(res.val_loss)
    history["val_acc"].append(res.val_acc)

    try:
        with open(hist_output_path, "wb") as handle:
            pickle.dump(history, handle)
    except Exception as e:
        print("WARNING: Error while saving training history: ", e)

    return history


def run_epoch(
    model: nn.Module,
    optimizer,
    criterion,
    scheduler,
    dataloaders,
    dataset_sizes,
    device: str,
) -> EpochResults:
    train_loss, train_acc = train_epoch(
        model,
        optimizer,
        criterion,
        scheduler,
        dataloaders["train"],
        dataset_sizes["train"],
        device,
    )
    val_loss, val_acc = val_epoch(
        model, criterion, dataloaders["val"], dataset_sizes["val"], device
    )
    return EpochResults(
        train_loss=train_loss,
        train_acc=train_acc,
        val_loss=val_loss,
        val_acc=val_acc,
    )


def train_epoch(
    model: nn.Module,
    optimizer,
    criterion,
    scheduler,
    dataloader,
    dataset_size,
    device: str,
) -> EpochResults:
    # Each epoch has a training and validation phase

    model.train()  # Set model to training mode

    running_loss = 0.0
    running_corrects = 0

    # Iterate over data.
    for inputs, labels in tqdm(dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        # track history if only in train
        with torch.set_grad_enabled(True):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

        scheduler.step()

        # statistics
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / dataset_size
    epoch_acc = running_corrects.double().cpu() / dataset_size

    train_loss = epoch_loss
    train_acc = epoch_acc

    return train_loss, train_acc


def val_epoch(
    model: nn.Module, criterion, dataloader, dataset_size, device: str
) -> tuple[float, float]:

    model.eval()  # Set model to evaluate mode

    running_loss = 0.0
    running_corrects = 0

    # Iterate over data.
    for inputs, labels in tqdm(dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

        # statistics
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / dataset_size
    epoch_acc = running_corrects.double().cpu() / dataset_size

    return epoch_loss, epoch_acc

In [9]:
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer_ft = optim.Adam(model.parameters(), lr=0.0005)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
previous_history = try_load_history(os.path.join(output_path, "history.pickle"))

model, history = train_model(
    model,
    criterion,
    optimizer_ft,
    exp_lr_scheduler,
    device,
    source_train_loader,
    source_val_loader,
    output_dir=output_path,
    num_epochs=25,
    patience=6,
    previous_history=previous_history
)

Epoch 0/24
----------


  0%|          | 0/1163 [00:00<?, ?it/s]



  0%|          | 0/233 [00:00<?, ?it/s]

Train Loss: 3.2284 Train Acc: 0.0277
Val Loss: 0.6946 Val Acc: 0.0081
Epoch 1/24
----------


  0%|          | 0/1163 [00:00<?, ?it/s]

  0%|          | 0/233 [00:00<?, ?it/s]

Train Loss: 3.2132 Train Acc: 0.0316
Val Loss: 0.6897 Val Acc: 0.0103
Epoch 2/24
----------


  0%|          | 0/1163 [00:00<?, ?it/s]

  0%|          | 0/233 [00:00<?, ?it/s]

Train Loss: 3.2146 Train Acc: 0.0335
Val Loss: 0.6510 Val Acc: 0.0097
Epoch 3/24
----------


  0%|          | 0/1163 [00:00<?, ?it/s]

  0%|          | 0/233 [00:00<?, ?it/s]

Train Loss: 3.2166 Train Acc: 0.0297
Val Loss: 0.6493 Val Acc: 0.0110
Epoch 4/24
----------


  0%|          | 0/1163 [00:00<?, ?it/s]

  0%|          | 0/233 [00:00<?, ?it/s]

Train Loss: 3.2187 Train Acc: 0.0310
Val Loss: 0.6763 Val Acc: 0.0081
Epoch 5/24
----------


  0%|          | 0/1163 [00:00<?, ?it/s]

  0%|          | 0/233 [00:00<?, ?it/s]

Train Loss: 3.2121 Train Acc: 0.0313
Val Loss: 0.6902 Val Acc: 0.0097
Epoch 6/24
----------


  0%|          | 0/1163 [00:00<?, ?it/s]

  0%|          | 0/233 [00:00<?, ?it/s]

Train Loss: 3.2214 Train Acc: 0.0316
Val Loss: 0.6676 Val Acc: 0.0106
Epoch 7/24
----------


  0%|          | 0/1163 [00:00<?, ?it/s]

In [None]:
def test(model, test_dataloader):
    model.eval()

    actual = []
    preds = []

    # Iterate over batches
    for inputs, labels in tqdm(test_dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        with torch.no_grad():
            outputs = model(inputs)
        
        # Get and store predictions
        _, predicted = torch.max(outputs, 1)

        for label, pred in zip(labels, predicted):
            actual.append(label.cpu())
            preds.append(pred.cpu())
        
    return np.array(actual), np.array(preds)

In [None]:
actual, predicted = test(model, source_test_loader)

In [None]:
import sklearn.metrics


class_names = source_dataset.label_encoder.classes_
print(
    sklearn.metrics.classification_report(
        actual, predicted, zero_division=0, target_names=class_names
    )
)

In [None]:
# code from https://towardsdatascience.com/neural-network-calibration-using-pytorch-c44b7221a61
def T_scaling(logits, temperature):
    return torch.div(logits, temperature)


temperature = nn.Parameter(torch.ones(1).cuda())
criterion = nn.CrossEntropyLoss()
optimizer = optim.LBFGS([temperature], lr=0.001, max_iter=10000, line_search_fn='strong_wolfe')

logits_list = []
labels_list = []

for i, data in enumerate(tqdm(val_loader, 0)):
    images, labels = data[0].to(device), data[1].to(device)

    net.eval()
    with torch.no_grad():
      logits_list.append(net(images))
      labels_list.append(labels)

# Create tensors
logits_list = torch.cat(logits_list).to(device)
labels_list = torch.cat(labels_list).to(device)

def _eval():
  loss = criterion(T_scaling(logits_list, temperature), labels_list)
  loss.backward()
  return loss

optimizer.step(_eval)