In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

import pickle

# Constants

In [2]:
RANDOM_STATE = 42
TEST_SIZE = 0.2
VAL_SIZE = 0.5
DATASET_CSV = "organs_selected.csv"
DATASET_FOLDER = "./dataset_obj/"

# Load data

In [3]:
organs = pd.read_csv(DATASET_CSV)

In [4]:
available_latents = [latent.replace(".pt", "") for latent in os.listdir(DATASET_FOLDER)]

In [5]:
organs

Unnamed: 0,Name,Category,URL,SubCategory
0,s1273_liver.nii.g_1.stl,liver,https://uni-duisburg-essen.sciebo.de/s/8wGC9bf...,liver
1,s0001_aorta.nii.g_1.stl,aorta,https://uni-duisburg-essen.sciebo.de/s/8wGC9bf...,aorta
2,s0001_kidney_right.nii.g_1.stl,kidney right,https://uni-duisburg-essen.sciebo.de/s/8wGC9bf...,kidney
3,s0001_liver.nii.g_1.stl,liver,https://uni-duisburg-essen.sciebo.de/s/8wGC9bf...,liver
4,s0004_aorta.nii.g_1.stl,aorta,https://uni-duisburg-essen.sciebo.de/s/8wGC9bf...,aorta
...,...,...,...,...
3584,099283_liver.stl,liver,https://uni-duisburg-essen.sciebo.de/s/65svBkR...,liver
3585,099815_aorta.stl,aorta,https://uni-duisburg-essen.sciebo.de/s/65svBkR...,aorta
3586,099815_heart.stl,heart,https://uni-duisburg-essen.sciebo.de/s/65svBkR...,heart
3587,099815_kidneyright.stl,kidney right,https://uni-duisburg-essen.sciebo.de/s/65svBkR...,kidney


In [6]:
len(organs), len(available_latents)

(3589, 3586)

In [7]:
organs['ID']  = organs.index.astype(str) + '_' + organs['Name'].apply(lambda x: x.split('.stl')[0])

In [8]:
organs = organs[organs["ID"].isin(available_latents)]
len(organs), len(available_latents)

(3586, 3586)

In [9]:
organs_train, organs_test = train_test_split(
    organs,
    test_size=TEST_SIZE,
    stratify=organs["Category"]
)
organs_test, organs_val = train_test_split(
    organs_test,
    test_size=VAL_SIZE,
    stratify=organs_test["Category"]
)

organs_train.shape, organs_test.shape, organs_val.shape

((2868, 5), (359, 5), (359, 5))

In [10]:
lista_ids_train = list(organs_train.ID.values)
lista_ids_test = list(organs_test.ID.values)
lista_ids_val = list(organs_val.ID.values)

In [11]:
with open('train_organs.pickle', 'wb') as file:
    pickle.dump(lista_ids_train, file)

with open('test_organs.pickle', 'wb') as file:
    pickle.dump(lista_ids_test, file)

with open('val_organs.pickle', 'wb') as file:
    pickle.dump(lista_ids_val, file)

In [12]:
csv_organs = organs[['ID', 'Category']]

csv_organs.to_csv('organs_selected_complete.csv', index = False, header=False)