# Imports and Helper Functions

In [22]:
import torch
import torchvision
from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split

In [23]:
# helper function to show an image
def matplotlib_imshow(img, one_channel=False, normalized=False):
    if one_channel:
        img = img.mean(dim=0)
    if normalized:
        img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))

#list full file paths from a directory
def list_file_paths(dirs):
    all_files_paths = []
    for dir in dirs:
        files = os.listdir(dir)
        files_path = [os.path.join(dir, f) for f in files]
        all_files_paths += files_path

    return sorted(all_files_paths)

def train_val_split(frac_train, image_paths):
    image_paths = np.array(image_paths)
    n_images = len(image_paths)

#freeze model parameters
def freeze(module: torch.nn.Module):
    # module.eval()
    for param in module.parameters():
        param.requires_grad_(False)

#unfreeze model parameters   
def unfreeze(module: torch.nn.Module):
    # module.train()
    for param in module.parameters():
        param.requires_grad_(True)

def accuracy(y_pred, y):
  return torch.sum(y == y_pred) / len(y)

# Datasets

In [25]:
class CoreSlices (torch.utils.data.Dataset):
    def __init__(self, imgs, transform):
        self.imgs = imgs
        self.transform = transform

    def __len__(self):
        return len(self.imgs)
    
    def __getitem__(self, idx):

        img = self.transform(Image.open(self.imgs[idx]))

        #get the root name of the file (no file extension) and extract the label
        label = os.path.splitext(os.path.basename(self.imgs[idx]))[0].split("_")[-1]
        
        return img, label



In [26]:
train_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]), #TODO get values from dataset
    #augmentation
])

test_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]), #TODO get values from dataset
])

#get list of all images in directories
image_dirs = ["data/slices/"]#,"data/Core Images/21-20a- 1/","data/Core Images/42-29- 6/","data/Core Images/206-01- 2/"]

images = list_file_paths(image_dirs)

images_train, images_val = train_test_split(images, train_size=0.8)
images_val, images_test = train_test_split(images_val, train_size=0.5)

slices = {}
slices["train"] = CoreSlices(images_train, train_transform)
slices["val"] = CoreSlices(images_train, train_transform)
slices["test"] = CoreSlices(images_train, test_transform)



# Dataloaders

In [27]:
dataloaders = {}
dataloaders["train"] = torch.utils.data.DataLoader(slices["train"], batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, drop_last=True)
dataloaders["val"] = torch.utils.data.DataLoader(slices["val"], batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, drop_last=True)
dataloaders["test"] = torch.utils.data.DataLoader(slices["test"], batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, drop_last=True)

# Model

In [28]:
model = torchvision.models.resnet152(pretrained=True)

model.classifier = torch.nn.Sequential(
    torch.nn.Linear(in_features=2048, out_features=  N_CLASSES),
)

freeze(model.conv1)
freeze(model.bn1)
freeze(model.maxpool)
freeze(model.layer1)
freeze(model.layer2)
freeze(model.layer3)
freeze(model.layer4)
freeze(model.fc)
freeze(model.avgpool)

# Runner

In [31]:
class Runner():
    def __init__(self, model, optimizer, criterion, device, summarywriter=None, epoch_scheduler=None, batch_scheduler=None):
        self.model = self.model.to(device)
        self.optimizer = optimizer
        self.criterion = criterion
        self.device = device
        self.summarywriter = summarywriter
        self.epoch_scheduler = epoch_scheduler
        self.batch_scheduler = batch_scheduler

        #TODO implement the class based metrics used int the AiCourse notebooks
        self.metrics = {
            "train":{
                "loss":0,
                "accuracy":0
            },
            "val":{
                "loss":0,
                "accuracy":0
            }
        }

    def predict(self, outputs):
      return torch.argmax(outputs, dim=1)

    def predict_proba(self, outputs):
        return torch.nn.functional.softmax(outputs, dim=1)

    def train(self, dataloader, epoch):

        #switch to train mode
        self.model.train()
        train_step = 0
        
        #loop over each sample
        for X,y in dataloader:
            train_step+=1

            X,y = X.to(self.device), y.to(self.device)
            outputs = model.forward(X)

            y_pred = self.predict(outputs)
            self.metrics["train"]["accuracy"] += accuracy(y_pred, y)

            # back prop
            loss = self.criterion
            loss.backward()
            
            self.optimizer.step()
            self.optimizer.zero_grad()

            #run scheduler per step
            if self.batch_scheduler:
                self.batch_scheduler.step()
            
            self.metrics["train"]["loss"] += loss

            yield train_step, loss

        #calculate final metrics 
        self.metrics["train"]["loss"] = self.metrics["train"]["loss"] / train_step
        self.metrics["train"]["accuracy"] = self.metrics["train"]["accuracy"] / train_step

        #output to tensorboard
        if summarywriter:
            self.summarywriter.add_scaler("loss/training", self.metrics["train"]["loss"], epoch)
            self.summarywriter.add_scaler("accuracy/training", self.metrics["train"]["accuracy"], epoch)

        #run scheduler per epoch
        if self.epoch_scheduler:
                    self.epoch_scheduler.step()
    
    def evaluate(self, dataloader, epoch)

        self.model.eval()
        val_step = 0

        with torch.no_grad()
            for X,y in dataloader:
                val_step+=1

                X,y = X.to(self.device), y.to(self.device)
                outputs = model.forward(X)

                y_pred = self.predict(outputs)
                self.metrics["val"]["accuracy"] += accuracy(y_pred, y)

                loss = self.criterion(outputs, y)
                self.metrics["val"]["loss"] += loss

                yield val_step, loss

        #calculate final metrics 
        self.metrics["val"]["loss"] = self.metrics["val"]["loss"] / train_step
        self.metrics["val"]["accuracy"] = self.metrics["val"]["accuracy"] / train_step

        #output to tensorboard
        if summarywriter:
            self.summarywriter.add_scaler("loss/evaluation", self.metrics["evaluation"]["loss"], epoch)
            self.summarywriter.add_scaler("accuracy/evaluation", self.metrics["evaluation"]["accuracy"], epoch)

    def fit(self, epochs, dataloaders):
        for epoch in range(epochs):
            print(f"EPOCH {epoch+1}")
            #TRAIN
            for step, loss in self.train(dataloaders["train"], epoch):
                print(f"Training - Step: {step} Loss: {loss.item()}")
            
            #EVALUATE
            for step, loss in self.evaluate(dataloaders["validation"], epoch):
            print(f"Validation - Step: {step}| Loss: {loss.item()}")
        

# Config

In [None]:
criterion = torch.nn.CrossEntropyLoss() #BCEwithLogits?
optimizer = torch.optim.Adam(model.parameters())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
summarywriter = SummaryWriter()
N_CLASSES = 7
BATCH_SIZE = BATCH_SIZE = 32

runner = Runner(model, optimizer, criterion, device, summarywriter)

# Run and Tensorboard

In [None]:
epochs = 10
runner.fit(epochs, dataloaders)

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs