In [1]:
# Train a tiny convolutional neural network to rate the spline interpolations 
import torch
import torch.nn as nn
import torch.optim as optim

import sys

WORKING_DIR = "/home/daniel/Documents/Uni/practical-sose23/castellvi/3D-Castellvi-Prediction/"

sys.path.append(WORKING_DIR + "src/")

from dataset.Splines import Splines, ConvexHullDataset
from utils._prepare_data import DataHandler

dataset = [WORKING_DIR  + 'data/dataset-verse19',  WORKING_DIR + 'data/dataset-verse20', WORKING_DIR + 'data/dataset-tri']
data_types = ['rawdata',"derivatives"]
image_types = ["ct"]
master_list = WORKING_DIR + 'src/dataset/Castellvi_list_v3.xlsx'
processor = DataHandler(master_list=master_list ,dataset=dataset, data_types=data_types, image_types=image_types)

dataset = Splines(processor=processor, binary=True)

# Split dataset into train and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size], generator=torch.Generator().manual_seed(42))

####################################
  from .autonotebook import tqdm as notebook_tqdm
  File "/home/daniel/anaconda3/envs/dev-castellvi/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/daniel/anaconda3/envs/dev-castellvi/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/daniel/.local/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/home/daniel/.local/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance
    app.start()
  File "/home/daniel/.local/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 725, in start
    self.io_loop.start()
  File "/home/daniel/.local/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 195, in start
    self.asyncio_loop.run_forever()
  File "/home/daniel/anaconda3/envs/dev-castellvi/lib/python3.10/asyncio/base_events.py", line

[!] subreg is not in list of legal keys. This name 'sub-verse015_seg-subreg_subreg-castcorr_msk.nii.gz' is invalid. Legal keys are: ['sub', 'ses', 'sequ', 'acq', 'task', 'chunk', 'hemi', 'sample', 'ce', 'trc', 'stain', 'rec', 'proc', 'mod', 'recording', 'res', 'dir', 'echo', 'flip', 'inv', 'mt', 'part', 'space', 'seg', 'source', 'snapshot', 'ovl', 'run', 'label', 'split', 'den', 'desc', 'ct']. 
For use see https://bids-specification.readthedocs.io/en/stable/99-appendices/09-entities.html
[!] Unknown format seg-ano in file sub-verse602_dir-iso_seg-ano.nii.gz
[!] Unknown format iso-ctd in file sub-verse616_dir-iso_iso-ctd.json
[!] "verse549" is not a valid key/value pair. Expected "KEY-VALUE" in verse549_CT-iso_seg-ano.nii.gz
[!] "template" is not a valid key/value pair. Expected "KEY-VALUE" in sub-verse519_template_sacrum_msk.nii.gz
[!] "sacrum" is not a valid key/value pair. Expected "KEY-VALUE" in sub-verse519_template_sacrum_msk.nii.gz
[!] cortex is not in list of legal keys. This na

In [2]:
# Define a tiny convolutional neural network to rate the spline interpolations
class SplineRatingNet(nn.Module):
    def __init__(self):
        super(SplineRatingNet, self).__init__()
        # Input shape: (Batch_Size, 128, 3)
        self.conv1 = nn.Conv1d(3, 16, 3, padding=1)
        self.act1 = nn.ReLU()

        self.conv2 = nn.Conv1d(16, 32, 3, padding=1)
        self.act2 = nn.ReLU()

        self.flat = nn.Flatten()

        self.fc1 = nn.Linear(32 * 128, 128)
        self.act3 = nn.ReLU()

        self.fc2 = nn.Linear(128, 3)

    def forward(self, x):
        # Switch shape to (Batch_Size, 3, 128)
        x = x.permute(0, 2, 1)
        x = self.conv1(x)
        x = self.act1(x)

        x = self.conv2(x)
        x = self.act2(x)

        x = self.flat(x)

        x = self.fc1(x)
        x = self.act3(x)

        x = self.fc2(x)

        return x

In [3]:
# Train the network with the Adam optimizer and cross entropy loss
net = SplineRatingNet()
optimizer = optim.Adam(net.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Define a function to train the network
def train(net, optimizer, criterion, train_loader, test_loader, epochs=10):
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader):
            inputs, labels = data

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i % 10 == 9:
                print(f'Epoch {epoch + 1}, batch {i + 1}: loss {running_loss / 10}')
                running_loss = 0.0

        correct = 0
        total = 0
        with torch.no_grad():
            for data in test_loader:
                inputs, labels = data
                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f'Epoch {epoch + 1}: accuracy {100 * correct / total}')

In [4]:
def get_weighted_sampler(dataset):
    true_labels = []
    for index in range(len(dataset)):
        true_labels.append(dataset[index][1])


    true_labels = torch.tensor(true_labels)

    # Count the occurrences of each true label
    label_counts = torch.bincount(true_labels)

    # Compute the inverse of the label counts to get the weights
    per_label_weights = 1.0 / label_counts.float()

    weights = torch.zeros(size = true_labels.size())

    for i in range(len(label_counts)):
        weights[true_labels == i] = per_label_weights[i] / len(label_counts)

    weights = weights.tolist()

    sampler = torch.utils.data.WeightedRandomSampler(weights, 2*len(dataset), replacement = True)

    return sampler

In [5]:
# Define data loaders with weighted random sampling
sampler = get_weighted_sampler(train_dataset)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, sampler=sampler)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

# Train the network
train(net, optimizer, criterion, train_loader, test_loader, epochs=10)

Epoch 1, batch 10: loss 1.0425883084535599
Epoch 1, batch 20: loss 0.6006335884332656
Epoch 1, batch 30: loss 0.58604736328125
Epoch 1, batch 40: loss 0.5605399966239929
Epoch 1: accuracy 80.21978021978022
Epoch 2, batch 10: loss 0.6155560255050659
Epoch 2, batch 20: loss 0.46444404125213623
Epoch 2, batch 30: loss 0.5027242839336395
Epoch 2, batch 40: loss 0.5209060341119767
Epoch 2: accuracy 70.32967032967034
Epoch 3, batch 10: loss 0.5297072112560273
Epoch 3, batch 20: loss 0.5244044423103332
Epoch 3, batch 30: loss 0.46232715249061584
Epoch 3, batch 40: loss 0.49078837037086487
Epoch 3: accuracy 79.67032967032966
Epoch 4, batch 10: loss 0.444505113363266
Epoch 4, batch 20: loss 0.49460318088531496
Epoch 4, batch 30: loss 0.5337766319513321
Epoch 4, batch 40: loss 0.4740148395299911
Epoch 4: accuracy 78.02197802197803
Epoch 5, batch 10: loss 0.5190467864274979
Epoch 5, batch 20: loss 0.49424166679382325
Epoch 5, batch 30: loss 0.4706708490848541
Epoch 5, batch 40: loss 0.48066582381

In [6]:
# Calculate confusion matrix, F1 score for test dataset
from sklearn.metrics import confusion_matrix, f1_score, matthews_corrcoef, cohen_kappa_score, classification_report
import numpy as np

# Get predictions for test dataset
predictions = []
with torch.no_grad():
    for data in torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True):
        inputs, labels = data
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        predictions.append(predicted)

# Calculate confusion matrix
y_pred = np.concatenate(predictions)
y_true = np.array([label for _, label in test_dataset])

# Print metrics and report
print("Confusion Matrix: \n", confusion_matrix(y_true, y_pred))
print("F1 Score: ", f1_score(y_true, y_pred, average='macro'))
print("MCC: ", matthews_corrcoef(y_true, y_pred))
print("Cohens Kappa: ", cohen_kappa_score(y_true, y_pred))
print("Classification Report: \n", classification_report(y_true, y_pred))


Confusion Matrix: 
 [[87 57]
 [25 13]]
F1 Score:  0.46021412037037035
MCC:  -0.04488655290372549
Cohens Kappa:  -0.041015625
Classification Report: 
               precision    recall  f1-score   support

           0       0.78      0.60      0.68       144
           1       0.19      0.34      0.24        38

    accuracy                           0.55       182
   macro avg       0.48      0.47      0.46       182
weighted avg       0.65      0.55      0.59       182



That is not convincing at all. Let's see if the convex hull is more helpful.

In [7]:
dataset = ConvexHullDataset(processor=processor, binary = True)

# Split dataset into train and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size], generator=torch.Generator().manual_seed(42))


In [8]:
# Define tiny convolutional neural network to rate the convex vertices
class ConvexHullRatingNet(nn.Module):
    def __init__(self):
        super(ConvexHullRatingNet, self).__init__()
        # Input shape: (Batch_Size, 256, 6)
        self.conv1 = nn.Conv1d(6, 16, 3, padding=1)
        self.act1 = nn.ReLU()

        self.conv2 = nn.Conv1d(16, 32, 3, padding=1)
        self.act2 = nn.ReLU()

        self.flat = nn.Flatten()

        self.out = nn.Linear(32 * 256, 3)

    def forward(self, x):
        # Switch shape to (Batch_Size, 6, 256)
        x = x.permute(0, 2, 1)
        x = self.conv1(x)
        x = self.act1(x)

        x = self.conv2(x)
        x = self.act2(x)

        x = self.flat(x)

        x = self.out(x)

        return x

In [10]:
#Train the network with the Adam optimizer and cross entropy loss
net = ConvexHullRatingNet()
optimizer = optim.Adam(net.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

sampler = get_weighted_sampler(train_dataset)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, sampler=sampler)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

train(net, optimizer, criterion, train_loader, test_loader, epochs=10)

Epoch 1, batch 10: loss 4.9730241894721985
Epoch 1, batch 20: loss 1.5269003033638
Epoch 1, batch 30: loss 0.8526908785104752
Epoch 1, batch 40: loss 0.46973847299814225
Epoch 1: accuracy 68.13186813186813
Epoch 2, batch 10: loss 0.2773231193423271
Epoch 2, batch 20: loss 0.20079188272356988
Epoch 2, batch 30: loss 0.20724923387169839
Epoch 2, batch 40: loss 0.1413572683930397
Epoch 2: accuracy 82.96703296703296
Epoch 3, batch 10: loss 0.10857817120850086
Epoch 3, batch 20: loss 0.06848738603293895
Epoch 3, batch 30: loss 0.0799126025289297
Epoch 3, batch 40: loss 0.0567275395616889
Epoch 3: accuracy 87.91208791208791
Epoch 4, batch 10: loss 0.0410504998639226
Epoch 4, batch 20: loss 0.03731065336614847
Epoch 4, batch 30: loss 0.029471299797296523
Epoch 4, batch 40: loss 0.019559367652982475
Epoch 4: accuracy 87.36263736263736
Epoch 5, batch 10: loss 0.02080434951931238
Epoch 5, batch 20: loss 0.014101482182741164
Epoch 5, batch 30: loss 0.010628015641123057
Epoch 5, batch 40: loss 0.0

In [11]:
# Get predictions for test dataset
predictions = []
with torch.no_grad():
    for data in torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True):
        inputs, labels = data
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        predictions.append(predicted)

# Calculate confusion matrix
y_pred = np.concatenate(predictions)
y_true = np.array([label for _, label in test_dataset])

# Print metrics and report
print("Confusion Matrix: \n", confusion_matrix(y_true, y_pred))
print("F1 Score: ", f1_score(y_true, y_pred, average='macro'))
print("MCC: ", matthews_corrcoef(y_true, y_pred))
print("Cohens Kappa: ", cohen_kappa_score(y_true, y_pred))
print("Classification Report: \n", classification_report(y_true, y_pred))

Confusion Matrix: 
 [[123  21]
 [ 34   4]]
F1 Score:  0.4721299372462164
MCC:  -0.047902711776139334
Cohens Kappa:  -0.04641438427764988
Classification Report: 
               precision    recall  f1-score   support

           0       0.78      0.85      0.82       144
           1       0.16      0.11      0.13        38

    accuracy                           0.70       182
   macro avg       0.47      0.48      0.47       182
weighted avg       0.65      0.70      0.67       182

