In [1]:
import pandas as pd

# "patient_id exam_id spot[1-14] frame_number score"
# load patients data in a dataframe from images folder
import glob
images_paths = glob.glob("images/*.png", recursive=True)
# images are named as: patientid_examid_spotnumber_framenumber_score.png
# create a dataframe with the data removing "images/"
images_df = pd.DataFrame([path[7:-4].split("_") for path in images_paths], columns=["patient_id", "exam_id", "spot", "frame_number", "score"])
images_df["score"] = images_df["score"].astype(str)
images_df["frame_number"] = images_df["frame_number"].astype(str)
images_df["spot"] = images_df["spot"].astype(str)
images_df["patient_id"] = images_df["patient_id"].astype(str)
images_df["exam_id"] = images_df["exam_id"].astype(str)
images_df.tail()


Unnamed: 0,patient_id,exam_id,spot,frame_number,score
47716,1069,1158,9,96,0
47717,1069,1158,9,97,0
47718,1069,1158,9,98,0
47719,1069,1158,9,99,0
47720,1069,1158,9,9,0


In [2]:
# PREPARING DATA SPLITTING

# get the set of patients id
patients_ids = set(images_df["patient_id"])
patients_ids = list(patients_ids)
import random
random.shuffle(patients_ids)
train_patients = patients_ids[:int(len(patients_ids)*0.7)]
test_patients = patients_ids[int(len(patients_ids)*0.7):]
print("train patients: ", train_patients, "test patients: ", test_patients)

train patients:  ['1067', '1051', '1066', '1047', '1069', '1048', '1052'] test patients:  ['1045', '1017', '1068', '1050']


In [9]:
from torch.utils.data import Dataset
import numpy as np
import torch
class MIDataset(Dataset):
    def __init__(self, dataset: pd.DataFrame, patient_list: list):
        tmp = dataset[dataset["patient_id"].isin(patient_list)]
        # create a list with the path of the images
        self.images_paths = ["images/" + "_".join(row) + ".png" for row in tmp.values]
        # self.images_paths to np array
        self.images_paths = np.array(self.images_paths)
        # create a list with the score of the images
        self.images_scores = tmp["score"].values

    
    def __len__(self):
        return len(self.images_paths)
    
    def __getitem__(self, idx):
        return (self.images_paths, self.images_scores)

In [4]:
import torch
train_dataset = MIDataset(images_df, train_patients)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = MIDataset(images_df, test_patients)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)

loss_function = torch.nn.CrossEntropyLoss()

In [5]:
import torchvision
def initialize_resnet18(num_classes = 4):
    resnet18 = torchvision.models.resnet18(pretrained=True)
    
    in_features = resnet18.fc.in_features
    
    resnet18.fc = torch.nn.Linear(in_features=in_features,
                                  out_features=num_classes)
    return resnet18

def get_optimizer(model, lr=0.001):
    final_layer_weights = []
    rest_of_the_net_weights = []
    
    for name, param in model.named_parameters():
        if name.startswith('fc'):
            final_layer_weights.append(param)
        else:
            rest_of_the_net_weights.append(param)
    
    optimizer = torch.optim.Adam([
            {'params': rest_of_the_net_weights},
            {'params': final_layer_weights, 'lr': lr}
        ], lr=lr)
    
    return optimizer

  device: torch.device = torch.device("cpu"),


In [6]:
def training_step(net, dataloader, optimizer, loss_function, device='cuda'):
    
    samples = 0.
    cumulative_loss = 0.
    cumulative_accuracy = 0.
    
    net.train()
    
    for batch_idx, (inputs, targets) in enumerate(dataloader):
        
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        outputs = net(inputs)
        
        loss = loss_function(outputs, targets)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        samples += inputs.shape[0]
        cumulative_loss += loss.item()
        _, predicted = outputs.max(dim=1)
        
        cumulative_accuracy += predicted.eq(targets).sum().item()
    
    return cumulative_loss/samples, cumulative_accuracy/samples*100

In [7]:
def test_step(net, dataloader, loss_function, device='cuda'):
    
    samples = 0.
    cumulative_loss = 0.
    cumulative_accuracy = 0.
    
    net.eval()
    
    with torch.no_grad():
        
        for batch_idx, (inputs, targets) in enumerate(dataloader):
            inputs = inputs.to(device)
            targets = targets.to(device)
            
            outputs = net(inputs)
            loss = loss_function(outputs, targets)
            
            samples += inputs.shape[0]
            cumulative_loss += loss.item()
            _, predicted = outputs.max(dim=1)
            
            cumulative_accuracy += predicted.eq(targets).sum().item()
    
    return cumulative_loss/samples, cumulative_accuracy/samples*100

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = initialize_resnet18(4).to(device)
optimizer = get_optimizer(net, lr=0.001)

for e in range(20):   
        train_loss, train_accuracy = training_step(net, train_loader, optimizer, loss_function, device)
        test_loss, test_accuracy = test_step(net, test_loader, loss_function, device)



TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <U29