# Deep learning

In [1]:
%load_ext autoreload
%autoreload 2

import tasks.data
import tasks.torch_train_eval
import tasks.adaptive_train_eval
import tasks.calibration

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from tqdm.auto import tqdm
import numpy as np
import matplotlib.pyplot as plt
import sklearn.metrics

import random
import os


DATA_DIR = "data/office"
OUTPUT_DIR = "output"

SOURCE_DATASET = "amazon"
SOURCE_VAL_SPLIT = .15
SOURCE_TEST_SPLIT = .1

TARGET_VAL_SPLIT = .15
TARGET_TEST_SPLIT = .15
TARGET_DATASET = "webcam"

FINETUNED_MODEL_DIR = os.path.join(OUTPUT_DIR, "classifier")
UNSUPERVISED_MODEL_DIR = os.path.join(OUTPUT_DIR, "unsupervised")
SEMI_SUPERVISED_FINETUNED_MODEL_DIR = os.path.join(OUTPUT_DIR, "semi-supervised-finetuned")
SEMI_SUPERVISED_ADAPTIVE_MODEL_DIR = os.path.join(OUTPUT_DIR, "semi-supervised-adaptive")

FINETUNE_MODEL = False
TRAIN_UNSUPERVISED_MODEL = False
FINETUNE_SEMI_SUPERVISED_MODEL = False
TRAIN_SEMI_SUPERVISED_MODEL = True

RANDOM_SEED = 42
BATCH_SIZE = 2

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


## Modern Office Dataset

In [2]:
import imageio.v2 as imageio
from torchvision.transforms import v2


def resnet_preprocessor(image: np.ndarray) -> np.ndarray:
    """
    Preprocesses an image for ResNet model.

    :param numpy.ndarray image: The input image.
    :return: Preprocessed image.
    :rtype: numpy.ndarray
    """
    preprocess = torchvision.transforms.Compose(
        [    
            v2.ToImage(),
            v2.ToDtype(torch.float32, scale=True),  # Normalize expects float input
            v2.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
            ),
        ]
    )

    image = preprocess(image)
    return image


def image_read_func(image_path):
    return imageio.imread(image_path, pilmode='RGB')


In [3]:
source_dataset = tasks.data.ImageDataset(
    parser_func=image_read_func,
    preprocessing_func=resnet_preprocessor,
)
source_dataset.load_from_directory(os.path.join(DATA_DIR, SOURCE_DATASET))

source_train_dataset, source_val_dataset, source_test_dataset = tasks.data.train_val_test_split(
    source_dataset, SOURCE_VAL_SPLIT, SOURCE_TEST_SPLIT
)

  0%|          | 0/31 [00:00<?, ?it/s]

In [4]:
def create_padded_dataloader(
    dataset: tasks.data.ImageDataset,
    shuffle: bool = True,
    sampler = None
):
    # sampler and shuffle are mutually exclusive
    if sampler is None:
        return torch.utils.data.DataLoader(
            dataset,
            batch_size=BATCH_SIZE,
            shuffle=shuffle,
            collate_fn=tasks.data.collate_pad,
        )
    else:
        return torch.utils.data.DataLoader(
            dataset,
            batch_size=BATCH_SIZE,
            sampler=sampler,
            collate_fn=tasks.data.collate_pad,
        )


source_train_loader = create_padded_dataloader(source_train_dataset, shuffle=True)
source_val_loader = create_padded_dataloader(source_val_dataset, shuffle=False)
source_test_loader = create_padded_dataloader(source_test_dataset, shuffle=False)

In [17]:
import pickle


def try_load_weights(model, weights_path: str):
    try:
        model.load_state_dict(torch.load(weights_path))
    except:
        print("No weights found in path ", weights_path)
    return model


def try_load_history(history_path):
    try:
        with open(history_path, 'rb') as handle:
            history = pickle.load(handle)
    except:
        print("No history found in path ", history_path)
        history = None

    return history

In [6]:
target_dataset = tasks.data.ImageDataset(
    parser_func=image_read_func,
    preprocessing_func=resnet_preprocessor,
    label_encoder=source_dataset.label_encoder # use same classes
)
target_dataset.load_from_directory(os.path.join(DATA_DIR, TARGET_DATASET))

target_train_dataset, target_val_dataset, target_test_dataset = train_val_test_split(
    target_dataset, TARGET_VAL_SPLIT, TARGET_TEST_SPLIT
)

target_train_loader = create_padded_dataloader(target_train_dataset, shuffle=True)
target_test_loader = create_padded_dataloader(target_test_dataset, shuffle=False)

  0%|          | 0/31 [00:00<?, ?it/s]

In [7]:
unlabeled_dataset = tasks.data.UnlabeledImageDataset(
    parser_func=image_read_func,
    preprocessing_func=resnet_preprocessor,
)
unlabeled_dataset.load_from_image_dataset(target_train_dataset)

source_history = try_load_history(os.path.join(UNSUPERVISED_MODEL_DIR, "source_history.pickle"))
target_history = try_load_history(os.path.join(UNSUPERVISED_MODEL_DIR, "target_history.pickle"))

In [9]:
to_be_unlabeled_dataset, labeled_dataset = tasks.data.stratified_split(
    target_train_dataset, test_size=0.2
)

len(unlabeled_dataset), len(labeled_dataset), len(target_train_dataset)

(556, 112, 556)

In [10]:
unlabeled_dataset = tasks.data.UnlabeledImageDataset(
    parser_func=labeled_dataset.parser_func,
    preprocessing_func=labeled_dataset.preprocessing_func,
)
unlabeled_dataset.load_from_image_dataset(to_be_unlabeled_dataset)

# combine data from both domain and target datasets
for sample_img, sample_label in source_train_dataset.samples:
    labeled_dataset.add(sample_img, sample_label)

len(labeled_dataset), len(source_train_dataset)

(2224, 2112)

## Source-only model

In [None]:
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer_ft = optim.Adam(model.parameters(), lr=0.0005)
# disable lr for adam
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=100000, gamma=0.05)

In [11]:
import torchinfo

def get_default_model():
    return torch.hub.load(
        "pytorch/vision:v0.10.0", "resnet18", weights="DEFAULT"
    ).to(device)

model = get_default_model()

torchinfo.summary(model, input_size=(BATCH_SIZE, 3, 300, 300))

Using cache found in /home/dimits/.cache/torch/hub/pytorch_vision_v0.10.0


Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [2, 1000]                 --
├─Conv2d: 1-1                            [2, 64, 150, 150]         9,408
├─BatchNorm2d: 1-2                       [2, 64, 150, 150]         128
├─ReLU: 1-3                              [2, 64, 150, 150]         --
├─MaxPool2d: 1-4                         [2, 64, 75, 75]           --
├─Sequential: 1-5                        [2, 64, 75, 75]           --
│    └─BasicBlock: 2-1                   [2, 64, 75, 75]           --
│    │    └─Conv2d: 3-1                  [2, 64, 75, 75]           36,864
│    │    └─BatchNorm2d: 3-2             [2, 64, 75, 75]           128
│    │    └─ReLU: 3-3                    [2, 64, 75, 75]           --
│    │    └─Conv2d: 3-4                  [2, 64, 75, 75]           36,864
│    │    └─BatchNorm2d: 3-5             [2, 64, 75, 75]           128
│    │    └─ReLU: 3-6                    [2, 64, 75, 75]           --
│

In [None]:
history = try_load_history(os.path.join(FINETUNED_MODEL_DIR, "history.pickle"))


if FINETUNE_MODEL:
    model, history = tasks.torch_train_eval.train_model(
        model,
        criterion,
        optimizer_ft,
        exp_lr_scheduler,
        device,
        source_train_loader,
        source_val_loader,
        output_dir=FINETUNED_MODEL_DIR,
        num_epochs=1,
        patience=5,
        warmup_period=1,
        previous_history=history
    )
else:
    model = try_load_weights(model, os.path.join(FINETUNED_MODEL_DIR, "model.pt"))

In [None]:
plt.plot(np.array(range(len(history["train_loss"]))), history["train_loss"])
plt.plot(np.array(range(len(history["val_loss"]))), history["val_loss"])
plt.xlabel("Epoch")
plt.ylabel("Cross Entropy Loss")
plt.title("Training loss")
plt.show()

In [None]:
# validation accuracy has been calculated wrong here, ignore it for now
plt.plot(np.array(range(len(history["train_acc"]))), history["train_acc"])
plt.plot(np.array(range(len(history["val_acc"]))), history["val_acc"])

plt.xlabel("Epoch")
plt.ylabel("Cross Entropy Loss")
plt.title("Training Accuracy")
plt.show()

In [None]:
# save logits for later calibration
source_actual, source_predicted = tasks.torch_train_eval.test(
    model, source_test_loader, device
)
class_names = source_dataset.label_encoder.classes_

print(
    sklearn.metrics.classification_report(
        source_actual,
        source_predicted,
        zero_division=0,
        target_names=class_names,
        labels=np.arange(0, len(class_names), 1),
    )
)

In [None]:
source_cf_matrix = sklearn.metrics.confusion_matrix(source_actual, source_predicted)
display = sklearn.metrics.ConfusionMatrixDisplay(
    confusion_matrix=source_cf_matrix, display_labels=class_names
)
display.plot()
plt.xticks(rotation=90)
plt.show()

In [None]:
target_actual, target_predicted = tasks.torch_train_eval.test(model, target_test_loader, device)
class_names = target_dataset.label_encoder.classes_

print(
    sklearn.metrics.classification_report(
        target_actual,
        target_predicted,
        zero_division=0,
        target_names=class_names,
        labels=np.arange(0, len(class_names), 1),
    )
)

In [None]:
target_cf_matrix = sklearn.metrics.confusion_matrix(target_actual, target_predicted, labels=np.arange(0, len(class_names), 1))
display = sklearn.metrics.ConfusionMatrixDisplay(
    confusion_matrix=target_cf_matrix, display_labels=class_names
)
display.plot()
plt.xticks(rotation=90)
plt.show()

## Unsupervised Domain Adaptation

https://webcache.googleusercontent.com/search?q=cache:https://towardsdatascience.com/pseudo-labeling-to-deal-with-small-datasets-what-why-how-fd6f903213af

https://stats.stackexchange.com/questions/364584/why-does-using-pseudo-labeling-non-trivially-affect-the-results

https://www.sciencedirect.com/science/article/abs/pii/S1077314222001102

In [None]:
if TRAIN_UNSUPERVISED_MODEL:
    model, source_history, target_history = (
        tasks.adaptive_train_eval.train_adaptive_model(
            model=model,
            criterion=criterion,
            optimizer=optimizer_ft,
            scheduler=exp_lr_scheduler,
            device=device,
            source_train_dataset=source_train_dataset,
            source_val_dataset=source_val_dataset,
            labeled_dataloader_initializer=lambda dataset, sampler=None: create_padded_dataloader(
                dataset,
                sampler=sampler,
            ),
            unlabeled_dataloader_initializer=lambda dataset: torch.utils.data.DataLoader(
                dataset, batch_size=1, shuffle=True
            ),
            unlabeled_target_train_dataset=unlabeled_dataset,
            target_val_dataset=target_val_dataset,
            output_dir=UNSUPERVISED_MODEL_DIR,
            num_epochs=20,
            previous_source_history=source_history,
            previous_target_history=target_history,
        )
    )

In [None]:
plt.plot(np.array(range(len(target_history["train_acc"]))), target_history["train_acc"])
plt.plot(np.array(range(len(target_history["val_acc"]))), target_history["val_acc"])
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training loss")
plt.show()

In [None]:
target_actual, target_predicted = tasks.torch_train_eval.test(model, target_test_loader, device)
class_names = target_dataset.label_encoder.classes_

print(
    sklearn.metrics.classification_report(
        target_actual,
        target_predicted,
        zero_division=0,
        target_names=class_names,
        labels=np.arange(0, len(class_names), 1),
    )
)

In [None]:
target_cf_matrix = sklearn.metrics.confusion_matrix(target_actual, target_predicted, labels=np.arange(0, len(class_names), 1))
display = sklearn.metrics.ConfusionMatrixDisplay(
    confusion_matrix=target_cf_matrix, display_labels=class_names
)
display.plot()
plt.xticks(rotation=90)
plt.show()

## Semi-supervised domain adaptation

In [None]:
if FINETUNE_SEMI_SUPERVISED_MODEL:
    print("Starting fine-tuning on mixed dataset...")
    model, history = tasks.torch_train_eval.train_model(
        model=model,
        criterion=criterion,
        optimizer=optimizer_ft,
        scheduler=exp_lr_scheduler,
        device=device,
        #train_dataloader=source_train_loader,
        train_dataloader=create_padded_dataloader(labeled_dataset, shuffle=True),
        val_dataloader=source_val_loader,
        output_dir=SEMI_SUPERVISED_FINETUNED_MODEL_DIR,
        num_epochs=25,
        patience=5,
        warmup_period=5,
        previous_history=history
    )

In [18]:
model = try_load_weights(model, os.path.join(SEMI_SUPERVISED_FINETUNED_MODEL_DIR, "model.pt"))

In [27]:
if TRAIN_SEMI_SUPERVISED_MODEL:
    print("Starting pseudo-labeling task...")
    model, source_history, target_history, label_history = (
        tasks.adaptive_train_eval.train_adaptive_model(
            model=model,
            criterion=criterion,
            optimizer=optimizer_ft,
            scheduler=exp_lr_scheduler,
            device=device,
            source_train_dataset=labeled_dataset,
            source_val_dataset=source_val_dataset,
            labeled_dataloader_initializer=lambda dataset, sampler=None: create_padded_dataloader(
                dataset,
                sampler=sampler,
            ),
            unlabeled_dataloader_initializer=lambda dataset: torch.utils.data.DataLoader(
                dataset, batch_size=1, shuffle=True
            ),
            unlabeled_target_train_dataset=unlabeled_dataset,
            target_val_dataset=target_val_dataset,
            output_dir=SEMI_SUPERVISED_ADAPTIVE_MODEL_DIR,
            num_epochs=20,
            previous_source_history=source_history,
            previous_target_history=target_history,
        )
    )

Starting pseudo-labeling task...
Epoch 0/19
----------


  0%|          | 0/211 [00:00<?, ?it/s]

  0%|          | 0/444 [00:00<?, ?it/s]

Selected 169/444 images to be included in next epoch
[('data/office/webcam/punchers/frame_0025.jpg', 23), ('data/office/webcam/ring_binder/frame_0037.jpg', 30), ('data/office/webcam/paper_notebook/frame_0010.jpg', 18), ('data/office/webcam/monitor/frame_0038.jpg', 30), ('data/office/webcam/headphones/frame_0018.jpg', 10), ('data/office/webcam/mobile_phone/frame_0009.jpg', 16), ('data/office/webcam/file_cabinet/frame_0008.jpg', 9), ('data/office/webcam/monitor/frame_0010.jpg', 15), ('data/office/webcam/ring_binder/frame_0014.jpg', 30), ('data/office/webcam/pen/frame_0004.jpg', 19), ('data/office/webcam/phone/frame_0012.jpg', 20), ('data/office/webcam/printer/frame_0002.jpg', 23), ('data/office/webcam/punchers/frame_0008.jpg', 28), ('data/office/webcam/monitor/frame_0015.jpg', 15), ('data/office/webcam/mouse/frame_0004.jpg', 16), ('data/office/webcam/desk_lamp/frame_0010.jpg', 8), ('data/office/webcam/stapler/frame_0017.jpg', 23), ('data/office/webcam/punchers/frame_0018.jpg', 28), ('dat

  0%|          | 0/85 [00:00<?, ?it/s]

  0%|          | 0/60 [00:00<?, ?it/s]

Target dataset Val Loss: 4.8788 Val Acc: 0.3445
Epoch 1/19
----------


  0%|          | 0/85 [00:00<?, ?it/s]

  0%|          | 0/211 [00:00<?, ?it/s]

Source dataset Train Loss: 1.3494 Train Acc: 0.9290
Source dataset Val Loss: 2.7688 Val Acc: 0.8104



  0%|          | 0/275 [00:00<?, ?it/s]

Selected 22/275 images to be included in next epoch
[('data/office/webcam/bottle/frame_0009.jpg', 4), ('data/office/webcam/scissors/frame_0022.jpg', 26), ('data/office/webcam/back_pack/frame_0002.jpg', 0), ('data/office/webcam/monitor/frame_0011.jpg', 15), ('data/office/webcam/bottle/frame_0007.jpg', 4), ('data/office/webcam/keyboard/frame_0015.jpg', 11), ('data/office/webcam/calculator/frame_0028.jpg', 5), ('data/office/webcam/phone/frame_0007.jpg', 20), ('data/office/webcam/phone/frame_0003.jpg', 20), ('data/office/webcam/pen/frame_0026.jpg', 19), ('data/office/webcam/keyboard/frame_0007.jpg', 5), ('data/office/webcam/bike/frame_0007.jpg', 1), ('data/office/webcam/keyboard/frame_0017.jpg', 11), ('data/office/webcam/keyboard/frame_0009.jpg', 11), ('data/office/webcam/keyboard/frame_0016.jpg', 11), ('data/office/webcam/keyboard/frame_0013.jpg', 11), ('data/office/webcam/punchers/frame_0014.jpg', 23), ('data/office/webcam/back_pack/frame_0011.jpg', 0), ('data/office/webcam/ruler/frame_0

  0%|          | 0/96 [00:00<?, ?it/s]

## Dustbin