In [None]:
from __future__ import annotations

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

from tqdm.notebook import tqdm
from collections import OrderedDict
import glob
import cv2
import random
import time, datetime
import copy

from torchvision.io import read_image
import matplotlib.pyplot as plt

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from torch import nn
import torch
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.optim import lr_scheduler
from torch.utils.data.sampler import WeightedRandomSampler
from torch.utils.data.dataset import Subset

import json

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report
from collections import Counter

# Fix random seed

In [None]:
def fix_seed(seed):
    # random
    random.seed(seed)
#     # Numpy
#     np.random.seed(seed)
    # Pytorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.use_deterministic_algorithms = True
#     # Tensorflow
#     tf.random.set_seed(seed)

SEED = 3407
fix_seed(SEED)

# Load dataset

In [None]:
# Train data
train_target = pd.read_csv('/kaggle/input/aptos2019-blindness-detection/train.csv')

# train_target['diagnosis'].value_counts().plot(kind='bar');
# plt.title('Class counts');

# does this affect git?

In [None]:
# paths = glob.glob(r'/kaggle/input/aptos2019-blindness-detection/train_images/*.png')
# widths = []
# heights = []

# for path in tqdm(paths):
#     img = cv2.imread(path)
#     h, w = img.shape[:2]
    
#     widths.append(w)
#     heights.append(h)
    
# heights, widths = zip(*[cv2.imread(path).shape[:2] for path in tqdm(paths)])

In [None]:
# plt.hist(heights, bins = 10)
# plt.title('heights')
# plt.show()

In [None]:
# plt.hist(widths, bins = 10)
# plt.title('widths')
# plt.show()

# Define dataset and dataloader

In [None]:
# # Preprocess images
resize_h = 224
resize_w = 224
input_shape = (resize_h, resize_w)
# # train_images = list()
# # for path in paths:
# #     img = cv2.imread(path)
# #     img = cv2.resize(img, dsize=(resize_h, resize_w))
# #     train_images.append(img)
print(resize_h, resize_w)

In [None]:
# Define dataset class

class CustomImageDataset(Dataset):
    def __init__(self, img_labels: pd.DataFrame, img_dir, transform=None, target_transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        self.img_labels = img_labels

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0] + ".png")
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label, idx

In [None]:
# pre_transforms = transforms.Compose([
#     transforms.ToPILImage(),
#     transforms.ToTensor(),
#     transforms.Resize(input_shape)
# ])

# tensor_aptos = CustomImageDataset(train_target, "/kaggle/input/aptos2019-blindness-detection/train_images", pre_transforms)


# imgs = torch.stack([img_t for img_t, _ in tqdm(tensor_aptos)], dim=3)
# imgs.shape

In [None]:
# mean = imgs.view(3, -1).mean(dim=1)
mean = (0.4138, 0.2210, 0.0737)
print(mean)

In [None]:
# std = imgs.view(3, -1).std(dim=1)
std = (0.2745, 0.1499, 0.0808)
print(std)

In [None]:
input_shape = (resize_h, resize_w)

augment_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=180)
])
        
preprocess_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Resize(input_shape),
    transforms.Normalize(mean, std)
])

# train_img_path = "/kaggle/input/aptos2019-blindness-detection/train_images"
train_img_path = "/kaggle/input/aptos2019-blindness-detection/train_images"
trainval_data = CustomImageDataset(train_target, train_img_path, preprocess_transforms)
# X_train, X_val, y_train, y_val = train_test_split(train_target.index.values.tolist(), train_target['diagnosis'], test_size=0.2, shuffle=True, stratify=train_target['diagnosis'].values, random_state=3407)
# val_size = round(len(trainval_data) * 0.2)
# train_size = len(trainval_data) - val_size
# # train_data, val_data = torch.utils.data.random_split(trainval_data, [train_size, val_size], generator=torch.manual_seed(3407))
# train_data, val_data = Subset(trainval_data, X_train), Subset(trainval_data, X_val)

In [None]:
# # sampler to balance the amount of samples in dataset
labels = train_target['diagnosis'].values
class_counts = torch.bincount(torch.tensor(labels))
weights = 1. / class_counts.float()
sample_weights = weights[labels]
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)

In [None]:
# labels_map = {
#     0: "No",
#     1: "Mi",
#     2: "Mo",
#     3: "Se",
#     4: "Pr"
# }

In [None]:
# def display_images(dataset):
#     figure = plt.figure(figsize=(10, 10))
#     cols, rows = 10, 10
#     for i in range(1, cols * rows + 1):
#         sample_idx = torch.randint(len(dataset), size=(1,)).item()
#         img, label = trainval_data[sample_idx]
#         figure.add_subplot(rows, cols, i)
#         plt.title(labels_map[label])
#         plt.axis("off")
#         plt.imshow(img.squeeze().permute(1,2,0))
#     plt.show()

In [None]:
# TrainVal images
# display_images(trainval_data)

In [None]:
# Test images
# display_images(test_data)

In [None]:
# Define dataloader

batch_size = 64

method = "None"

# dataloader_train = torch.utils.data.DataLoader(
#     train_data,
#     batch_size=batch_size,
#     # sampler=sampler
#     shuffle=True
# )

# dataloader_valid = torch.utils.data.DataLoader(
#     val_data,
#     batch_size=batch_size,
#     shuffle=True
# )

dataloader_trainval = torch.utils.data.DataLoader(
    trainval_data,
    batch_size=batch_size,
    # sampler=sampler
    shuffle=True
)

# dataloaders_dict = {'Train': dataloader_train, 'Validation': dataloader_valid}
# dataloader_fine_tune = {'Train': dataloader_train}
dataloader_dict = {'TrainVal': dataloader_trainval}

# Define CNN model

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
# # modified code from https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html
# def set_parameter_requires_grad(model, train):
#     for param in model.parameters():
#         param.requires_grad = train

In [None]:
def weight_loss(weight, cost_sensitive=False):
    loss = None
    if cost_sensitive:
        loss = nn.CrossEntropyLoss(weight=weight, reduction='mean')
    else:
        loss = nn.CrossEntropyLoss()
    return loss, cost_sensitive

In [None]:
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(train_target['diagnosis'].values), y=train_target['diagnosis'].values)
class_weights=torch.tensor(class_weights,dtype=torch.float)

In [None]:
model = torchvision.models.densenet121(weights=torchvision.models.DenseNet121_Weights.DEFAULT)
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, 5)

optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=0.0001)
criterion, cost_sensitive = weight_loss(class_weights, cost_sensitive=False) # according to an article
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=32, eta_min=1e-4)

# Define train model

In [None]:
def train_model(model, device, dataloaders: dict, criterion, optimizer, scheduler=None, num_epochs=25, is_inception=False, augumented=False):
    since = time.time()
    
    model = model.to(device)
    criterion = criterion.to(device)
    
    histories = {'Accuracy': {phase: list() for phase in dataloaders.keys()}, 'Loss': {phase: list() for phase in dataloaders.keys()}}
    f1_macro = list()
    classification_reports = list()
    
    best_epoch = 20
    best_score = 0
    
    terminate = False
    
    indices_set = set()
    
    class0, class1, class2, class3, class4, = 0, 0, 0, 0, 0

    for epoch in range(num_epochs):
        print('EPOCH: {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in dataloaders.keys():
            if phase == 'Validation':
                model.eval()   # Set model to evaluate mode
            else:
                model.train()  # Set model to training mode
            
            losses = []
            num = 0
            true_num = 0
            
            y_preds = []
            y_trues = []

            # Iterate over data.
            for x, t, idx in tqdm(dataloaders[phase]):
                model.zero_grad()  # Initialise gradient descent
                if (phase == 'Train' or phase == 'TrainVal') and augumented:
                    x = augment_transforms(x)
                x, t = x.to(device), t.to(device)

                # zero the parameter gradients
                # optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'Train' or phase == 'TrainVal'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and (phase == 'Train' or phase == 'TrainVal'):
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        y, aux_outputs = model(x)
                        loss1 = criterion(y, t)
                        loss2 = criterion(aux_outputs, t)
                        loss = loss1 + 0.4*loss2
                    else: # valid
                        y = model(x)  # Forward propagation
                        loss = criterion(y, t)

                    pred = y.argmax(dim=1)  # 最大値を取るラベルを予測ラベルとする

                    # backward + optimize only if in training phase
                    if (phase == 'Train' or phase == 'TrainVal'):
                        loss.backward()
                        optimizer.step()
                        
                        counter = Counter(t.to("cpu").tolist())
                        class0 += counter[0]
                        class1 += counter[1]
                        class2 += counter[2]
                        class3 += counter[3]
                        class4 += counter[4]
                        indices_set.update({*idx.numpy()})
                        print(f'[0: {class0}], [1: {class1}], [2: {class2}], [3: {class3}], [4: {class4}], [data seen: {len(indices_set)}]')
                    else:
                        y_preds += pred.to("cpu").tolist()
                        y_trues+= t.to("cpu").tolist()
                        
                    losses.append(loss.tolist())

                    acc = torch.where(t.to("cpu") - pred.to("cpu") == 0, torch.ones_like(t).to("cpu"), torch.zeros_like(t).to("cpu"))
                    num += acc.size()[0]
                    true_num += acc.sum().item()
                    
            epoch_loss = np.mean(losses)
            epoch_acc = true_num / num
            
            histories['Loss'][phase].append(epoch_loss)
            histories['Accuracy'][phase].append(epoch_acc)
            

            print('{} [Loss: {:.4f}, Accuracy: {:.4f}]'.format(phase, epoch_loss, epoch_acc))
            print()
            scheduler.step()
        
        if terminate:
            break
        print()
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

    return model, num_epochs, best_epoch, histories, augumented, best_score, time_elapsed, f1_macro, classification_reports

In [None]:
def plot(n_epochs, histories: dict):
    epochs = np.arange(1, n_epochs + 1)

    fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(8, 3))

    for ax, metric in zip([ax1, ax2], histories.keys()):
        ax.set_title(metric)
        for key in histories[metric].keys():
            ax.plot(epochs, histories[metric][key], label=key)
        ax.set_xlabel("Epoch")
        ax.legend()

    plt.show()

# Save the initial model

In [None]:
# torch.save(model.state_dict(), 'initial_weight.pth')

# Train the model

In [None]:
# Fine tuning
# set_parameter_requires_grad(model, False)
# model, num_epochs, best_epoch, histories = train_model(model, device, dataloaders_dict, criterion, optimizer, exp_lr_scheduler, num_epochs=100, is_inception=False)

In [None]:
# plot(num_epochs, histories)

In [None]:
# torch.save(model.state_dict(), 'fine_tuned.pth')

In [None]:
# set_parameter_requires_grad(model, True)
model, num_epochs, best_epoch, histories, augumented, best_score, time_elapsed, f1_macro, classification_reports = train_model(model, device, dataloader_dict, criterion, optimizer, scheduler, num_epochs=17, is_inception=False, augumented=True)

In [None]:
plot(num_epochs, histories)

# Define dataset class

class CustomImageDataset(Dataset):
    def __init__(self, img_labels: pd.DataFrame, img_dir, transform=None, target_transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        self.img_labels = img_labels

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0] + ".png")
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label, idx# Save the model

In [None]:
torch.save(model, "densenet121-augmented.pth") # save the entire model with weight parameters, trained with train data

In [None]:
print(best_epoch)