In [None]:
import pathlib

import PIL
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.metrics as sklm

import torch
import pytorch_lightning as pl

import dataset
import neural_network

torch.__version__

In [None]:
import sys
print(sys.version)

## Parameters

In [None]:
print(f"Num GPUs Available: {torch.cuda.device_count()}")

In [None]:
CWD = pathlib.Path().absolute()

LABELS_CSV = {
    "train": CWD.parent / "data/ISIC2018_Task3_Training_GroundTruth/ISIC2018_Task3_Training_GroundTruth.csv",
    "test": CWD.parent / "data/ISIC2018_Task3_Validation_GroundTruth/ISIC2018_Task3_Validation_GroundTruth.csv",
}

IMG_DIR = {
    "train": CWD.parent / "data/ISIC2018_Task3_Training_Input/",
    "test": CWD.parent / "data/ISIC2018_Task3_Validation_Input/",
}

## Classes

In [None]:
train_classes = pd.read_csv(LABELS_CSV["train"], skiprows=lambda row: row != 0)

CLASSES = np.array(train_classes.columns[1:].to_list())  # ignore column that stores image name
CLASSES, CLASSES.size

## Preprocessing Dataset

In [None]:
train_filenames = IMG_DIR["train"].glob("*.jpg")

filename = next(train_filenames)
PIL.Image.open(filename)

In [None]:
from torchvision.io import read_image

IMG_CHANNELS, IMG_HEIGHT, IMG_WIDTH = read_image(str(filename)).size()

IMG_CHANNELS, IMG_HEIGHT, IMG_WIDTH

In [None]:
train = dataset.SkinCancerDataset(LABELS_CSV["train"], IMG_DIR["train"])
dataset.plot_some_samples(2, 2, train, CLASSES)

In [None]:
test = dataset.SkinCancerDataset(LABELS_CSV["test"], IMG_DIR["test"])
dataset.plot_some_samples(2, 2, test, CLASSES)

## Modeling the CNN

In [None]:
SEED = 0

pl.seed_everything(SEED, workers=True)

In [None]:
BATCH_SIZE = 16
DATALOADER_NUM_WORKERS = 4

# I think it should be rescaled here
# model.add(layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)))
data_module = dataset.SkinCancerDataModule(
    LABELS_CSV,
    IMG_DIR,
    BATCH_SIZE,
    DATALOADER_NUM_WORKERS,
    transform=None
)

In [None]:
DROPOUT_RATE = 0.25

model = neural_network.ConvNetwork(CLASSES.size, DROPOUT_RATE)

print(model)

In [None]:
LEARNING_RATE = 2E-4

model_module = neural_network.NetworkModule(
    model,
    IMG_CHANNELS,
    IMG_HEIGHT,
    IMG_WIDTH,
    CLASSES.size,
    LEARNING_RATE
)

In [None]:
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.callbacks.progress import TQDMProgressBar

validation = EarlyStopping("val_loss")
progress_bar = TQDMProgressBar()

trainer_callbacks = [validation, progress_bar]

In [None]:
from pytorch_lightning.loggers import CSVLogger

LOG_DIR = CWD.parent / "logs/"

logger = CSVLogger(LOG_DIR)

In [None]:
trainer = pl.Trainer(
    min_epochs=5,
    max_epochs=20,
    accelerator="auto",
    devices="auto",
    logger=logger,
    callbacks=trainer_callbacks,
    deterministic=True,
    # mostly parameters below can be removed
    # when we want to fully train our network
    limit_train_batches=0.05,
    limit_val_batches=0.1,
    log_every_n_steps=25,
)

trainer.fit(model=model_module, datamodule=data_module)

## Metrics

In [None]:
# each time you train a neural network
# it logs the information to a new version folder
version = 0

METRICS_PATH = LOG_DIR / f"lightning_logs/version_{version}" / "metrics.csv"

metrics = pd.read_csv(METRICS_PATH).set_index(["epoch", "step"])

train = metrics[["train_loss_epoch", "train_acc_epoch"]].dropna()
validation = metrics[["val_loss", "val_acc"]].dropna()

validation

In [None]:
metrics = trainer.progress_bar_metrics

epochs = train.index.get_level_values("epoch")

plt.plot(epochs, train["train_acc_epoch"], label="accuracy")
plt.plot(epochs, validation["val_acc"], label = "val_accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend(loc="lower right")

## Confusion Matrix

In [None]:
trainer.test(model_module, data_module)

In [None]:
true_labels_pos = torch.concat(model_module.test_expected).cpu()
predicted_labels_pos = torch.concat(model_module.test_prediction).cpu()


true_labels = CLASSES[true_labels_pos]
predicted_labels = CLASSES[predicted_labels_pos]

In [None]:
confusion_matrix = pd.crosstab(true_labels, predicted_labels, rownames=["Actual"], colnames=["Predicted"])
confusion_matrix

In [None]:
report = sklm.classification_report(true_labels_pos, predicted_labels_pos, target_names=CLASSES)
print(report)