# Prepare Fine Tuning
Fine tune the segmentaion model on Lina's annotations.

We have annotations for 31 subjects, three sections each. I split this in:
- train set: 25 subjects
- validation set: 6 subjects

For each section we have four different dixon sequences. In total this is:
- train set: 300 scans
- validation set: 72 scans

In [1]:
import pandas as pd
import os
import random

# private libraries
import sys

if "../scripts" not in sys.path:
    sys.path.insert(1, "../scripts")
import config

In [2]:
# Look for annotated scans
annotations = [f.name for f in os.scandir(config.ukbb + "annotations") if f.name[-7:] == ".nii.gz"]
patients = set([int(f.split("_")[0]) for f in annotations])

# Load manifest
data = pd.read_csv(config.ukbb + "manifest.csv")
data = data.loc[data["eid"].apply(lambda x: x in patients)]
data = data.loc[data["section"].apply(lambda x: x in [1, 2, 3])]
data = data.reset_index(drop=True)

# Map water only labels to other sequence types
data["label"] = data["image"].apply(lambda x: x.replace("/", "_"))
data["label"] = data["label"].apply(lambda x: x.replace("in", "W"))
data["label"] = data["label"].apply(lambda x: x.replace("opp", "W"))
data["label"] = data["label"].apply(lambda x: x.replace("F", "W"))

print(f"Found {len(data)} scans for a total of {len(patients)} subjects.")

Found 372 scans for a total of 31 subjects.


In [3]:
# Train and validation split
random.seed(42)
valid_patients = set(random.sample(list(patients), 6))
train_patients = patients.difference(valid_patients)
print(
    f"Put {len(train_patients)} subjects in the train set and {len(valid_patients)} subjects in the validation set."
)

Put 25 subjects in the train set and 6 subjects in the validation set.


In [4]:
# Save as csv
train_set = data.loc[data["eid"].apply(lambda x: x in train_patients)].reset_index(drop=True)
valid_set = data.loc[data["eid"].apply(lambda x: x in valid_patients)].reset_index(drop=True)

valid_set.to_csv(config.ukbb + "valid_finetuning.csv", index=False)
train_set.sample(random_state=13, frac=1).to_csv(config.ukbb + "train_finetuning.csv", index=False)