In [None]:
import pandas as pd
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# "patient_id exam_id spot[1-14] frame_number score"
# load patients data in a dataframe from images folder
import glob
images_paths = glob.glob("images/*.png", recursive=True)
# images are named as: patientid_examid_spotnumber_framenumber_score.png
# create a dataframe with the data removing "images/"
images_df = pd.DataFrame([path[7:-4].split("_") for path in images_paths], columns=["patient_id", "exam_id", "spot", "frame_number", "score"])
images_df["score"] = images_df["score"].astype(str)
images_df["frame_number"] = images_df["frame_number"].astype(str)
images_df["spot"] = images_df["spot"].astype(str)
images_df["patient_id"] = images_df["patient_id"].astype(str)
images_df["exam_id"] = images_df["exam_id"].astype(str)

# save images_df to excel
images_df.to_excel("images_df.xlsx")


In [None]:
# PREPARING DATA SPLITTING

# get the set of patients id
patients_ids = set(images_df["patient_id"])
patients_ids = list(patients_ids)

# print the number of frames per score for the patient 1017 exam 1047 spot 1
p = "1050"
e = "1127"
s = "1"
print(images_df[(images_df["patient_id"] == p) & (images_df["exam_id"] == e) & (images_df["spot"] == s)].groupby("score").count())



import random
random.shuffle(patients_ids)
train_patients = patients_ids[:int(len(patients_ids)*0.7)]
test_patients = patients_ids[int(len(patients_ids)*0.7):]
print("train patients: ", train_patients, "test patients: ", test_patients)

In [None]:
from torch.utils.data import Dataset
import numpy as np
import torch
from torchvision.io import read_image
import os
class MIDataset(Dataset):
    def __init__(self, dataset: pd.DataFrame, patient_list: list):
        tmp = dataset[dataset["patient_id"].isin(patient_list)]
        # create a list with the path of the images
        self.images_paths = ["images/" + "_".join(row) + ".png" for row in tmp.values]
        # self.images_paths to np array
        self.images_paths = np.array(self.images_paths)
        # create a list with the score of the images
        self.images_scores = tmp["score"].values

    
    def __len__(self):
        return len(self.images_paths)
    
    def __getitem__(self, idx):
        image = read_image(self.images_paths[idx])
        label = self.images_scores[idx]
        return image, label

In [None]:
import torch
import torch.nn as nn
# fine tune resnet18 to work with 4 output classes
class MyModel(nn.Module):
    def __init__(self, num_classes=4):
        super(MyModel, self).__init__()
        self.resnet18 = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True)
        self.shrink = nn.Linear(512, num_classes)

    def forward(self, x):
        # set image back to 3 channels
        x = torch.cat((x, x, x), 1)
        # from torch.cuda.ByteTensor to torch.cuda.FloatTensor
        x = x.float()
        resnet_res = self.resnet18(x)
        res = self.shrink(resnet_res)
        return res

# define the training loop
def train(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    # set model to training mode
    model.train()
    # set the device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # move model to device
    model.to(device)
    # set the best accuracy to 0
    best_accuracy = 0
    # loop over the epochs
    for epoch in range(num_epochs):
        # loop over the batches
        for batch_idx, (data, target) in enumerate(train_loader):
            # move data and target to device
            data = data.to(device)
            print("target: ", target)
            # zero the gradients
            optimizer.zero_grad()
            # forward pass
            output = model(data)
            print("output: ", output)
            # calculate the loss
            loss = criterion(output, target)
            # backward pass
            loss.backward()
            # update the weights
            optimizer.step()
            # print the loss
            print("Epoch: ", epoch, "Batch: ", batch_idx, "Loss: ", loss.item())
        # evaluate the model   
        accuracy = evaluate(model, val_loader)
        # if the accuracy is better than the best accuracy
        if accuracy > best_accuracy:
            # save the model
            torch.save(model.state_dict(), "best_model.pt")
            # update the best accuracy
            best_accuracy = accuracy
            print("Best accuracy: ", best_accuracy)
    return model

# define the evaluation loop
def evaluate(model, val_loader):
    # set model to evaluation mode
    model.eval()
    # set the device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # move model to device
    model.to(device)
    # set the number of correct predictions to 0
    correct = 0
    # set the number of total predictions to 0
    total = 0
    # loop over the batches
    for batch_idx, (data, target) in enumerate(val_loader):
        # move data and target to device
        data = data.to(device)
        # forward pass
        output = model(data)
        # get the predictions
        _, predicted = torch.max(output.data, 1)
        # update the total number of predictions
        total += target.size(0)
        # update the number of correct predictions
        correct += (predicted == target).sum().item()
    # calculate the accuracy
    accuracy = 100 * correct / total
    # print the accuracy
    print("Accuracy: ", accuracy)
    return accuracy

# define the test loop
def test(model, test_loader):
    # set model to evaluation mode
    model.eval()
    # set the device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # move model to device
    model.to(device)
    # set the number of correct predictions to 0
    correct = 0
    # set the number of total predictions to 0
    total = 0
    # loop over the batches
    for batch_idx, (data, target) in enumerate(test_loader):
        # move data and target to device
        # data, target = data.to(device), target.to(device)
        # forward pass
        output = model(data)
        # get the predictions
        _, predicted = torch.max(output.data, 1)
        # update the total number of predictions
        total += target.size(0)
        # update the number of correct predictions
        correct += (predicted == target).sum().item()
    # calculate the accuracy
    accuracy = 100 * correct / total
    # print the accuracy
    print("Accuracy: ", accuracy)
    return accuracy

# define the dataset
dataset = MIDataset(images_df, train_patients)
# define the dataloader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
# define the model
model = MyModel()
# define the loss function
criterion = nn.CrossEntropyLoss()
# define the optimizer SGD
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# train the model
model = train(model, dataloader, dataloader, criterion, optimizer, num_epochs=10)

# define the test dataset
test_dataset = MIDataset(images_df, test_patients)
# define the test dataloader
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)
# test the model
result = test(model, test_dataloader)
print("Test accuracy: ", result)






In [None]:
import torch
train_dataset = MIDataset(images_df, train_patients)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True)

test_dataset = MIDataset(images_df, test_patients)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=2, shuffle=True)

loss_function = torch.nn.CrossEntropyLoss()

In [None]:
import torchvision
def initialize_resnet18(num_classes = 4):
    resnet18 = torchvision.models.resnet18(pretrained=True)
    
    in_features = resnet18.fc.in_features
    
    resnet18.fc = torch.nn.Linear(in_features=in_features,
                                  out_features=num_classes)
    return resnet18

def get_optimizer(model, lr=0.001):
    final_layer_weights = []
    rest_of_the_net_weights = []
    
    for name, param in model.named_parameters():
        if name.startswith('fc'):
            final_layer_weights.append(param)
        else:
            rest_of_the_net_weights.append(param)
    
    optimizer = torch.optim.Adam([
            {'params': rest_of_the_net_weights},
            {'params': final_layer_weights, 'lr': lr}
        ], lr=lr)
    
    return optimizer

In [None]:
def training_step(net, dataloader, optimizer, loss_function, device='cuda'):
    
    samples = 0.
    cumulative_loss = 0.
    cumulative_accuracy = 0.
    
    net.train()
    
    for batch_idx, (inputs, targets) in enumerate(dataloader):
        print(inputs.shape)
        print(targets.shape)
        
        inputs = inputs.to(device)
        targets = targets[0].to(device)
        
        outputs = net(inputs)
        
        loss = loss_function(outputs, targets)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        samples += inputs.shape[0]
        cumulative_loss += loss.item()
        _, predicted = outputs.max(dim=1)
        
        cumulative_accuracy += predicted.eq(targets).sum().item()
    
    return cumulative_loss/samples, cumulative_accuracy/samples*100

In [None]:
def test_step(net, dataloader, loss_function, device='cuda'):
    
    samples = 0.
    cumulative_loss = 0.
    cumulative_accuracy = 0.
    
    net.eval()
    
    with torch.no_grad():
        
        for batch_idx, (inputs, targets) in enumerate(dataloader):
            inputs = inputs.to(device)
            targets = targets.to(device)
            
            outputs = net(inputs)
            loss = loss_function(outputs, targets)
            
            samples += inputs.shape[0]
            cumulative_loss += loss.item()
            _, predicted = outputs.max(dim=1)
            
            cumulative_accuracy += predicted.eq(targets).sum().item()
    
    return cumulative_loss/samples, cumulative_accuracy/samples*100

In [None]:
net = initialize_resnet18(4).to(device)
optimizer = get_optimizer(net, lr=0.001)

for e in range(20):   
        train_loss, train_accuracy = training_step(net, train_loader, optimizer, loss_function, device)
        test_loss, test_accuracy = test_step(net, test_loader, loss_function, device)