# Settings

In [1]:
COLAB = False
SEED = 42
CV_SPLIT_NUM = 1

## Note
CPU does not support half precision

# Get the data

## Note
If you are running the script on colab or any other GPU provider you need to download your own dataset.
<br>
We used google drive to store the dataset. You can find a few useful commands below

In [2]:
if COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Copy your csv with splits 
    !cp ./drive/My\ Drive/Code/CV/split.csv .
    
    # Copy your data 
    !cp ./drive/My\ Drive/data2.tar.gz .
    !tar xzf data2.tar.gz


# Setup consts

In [3]:
SPLIT_PATH = '../split/sample_split.csv'
DATA_PATH = '../data/'
MODELS_PATH = '../models'
RESULTS_PATH = '../results/'
BATCH_SIZE = 2

if COLAB:
    SPLIT_PATH = 'split.csv'
    DATA_PATH = './data/'
    MODELS_PATH = 'models'
    RESULTS_PATH = './drive/My Drive/Code/CV/results/'
    BATCH_SIZE = 10

## Note
Colab requires restart after installation

In [4]:
if COLAB:
    !pip install -e git+https://github.com/fast-radiology/hydrocephalus.git#egg=hydrocephalus\&subdirectory=src
else:
    !pip install -e ../src

Obtaining file:///Users/nozdi/python/hydrocephalus/src
Installing collected packages: hydrocephalus
  Found existing installation: hydrocephalus 1.0.0
    Uninstalling hydrocephalus-1.0.0:


      Successfully uninstalled hydrocephalus-1.0.0
  Running setup.py develop for hydrocephalus
Successfully installed hydrocephalus


In [5]:
import pandas as pd
import numpy as np
import fastai.vision
from fastai.metrics import dice

import hydrocephalus


hydrocephalus.seed.random_seed(SEED)

fastai.vision.image.open_image = hydrocephalus.dicom.open_dcm_image
fastai.vision.image.open_mask = hydrocephalus.dicom.open_dcm_mask
fastai.vision.data.open_image = hydrocephalus.dicom.open_dcm_image
fastai.vision.data.open_mask = hydrocephalus.dicom.open_dcm_mask
open_image = hydrocephalus.dicom.open_dcm_image
open_mask = hydrocephalus.dicom.open_dcm_mask


def iou(preds, target):
    return dice(preds, target, iou=True)


metrics = [
    dice,
    iou, 
]


cv_df = pd.read_csv(SPLIT_PATH)

if CV_SPLIT_NUM <= cv_df.split.max():
    split_series = cv_df[cv_df.split == CV_SPLIT_NUM].squeeze()
    print(f"RUNNING:\n{split_series}")
    validation_patients = split_series['patients'].split(',')
    validation_patient_dirs = [f"{patient}/" for patient in validation_patients]
else:
    print("RUNNING TRAIN ON THE WHOLE DATASET")
    validation_patients = []
    validation_patient_dirs = []

scans = hydrocephalus.data.get_scans(DATA_PATH)

np.random.shuffle(scans)


size = hydrocephalus.dicom.get_shape(scans[0])

valid_func = lambda img_src: any(p in str(img_src) for p in validation_patient_dirs)

data = hydrocephalus.data.get_data(
    scans, valid_func=valid_func, bs=BATCH_SIZE, size=size
)

hydrocephalus.seed.random_seed(SEED)

learn = hydrocephalus.learner.get_learner(data, metrics=metrics, model_dir=MODELS_PATH)
learn.fit_one_cycle(5, 1e-4, pct_start=0.3, wd=1e-7)

scores = []

for i in range(len(learn.recorder.nb_batches)):
    score = {
        'split': CV_SPLIT_NUM,
        'val_patients': ','.join(validation_patients),
        'epoch': i+1,
        'train_loss': learn.recorder.losses[sum(learn.recorder.nb_batches[:(i+1)]) - 1].item(),
    }
    if validation_patients:
        score['val_loss'] = learn.recorder.val_losses[i].item()
        score.update(dict(zip(map(lambda f: f.__name__, metrics), map(lambda m: m.item(), learn.recorder.metrics[i]))))
    scores.append(score)

columns = ['split', 'val_patients', 'epoch', 'train_loss']
if validation_patients:
    columns += ['val_loss'] + list(map(lambda f: f.__name__, metrics))

scores_df = pd.DataFrame(scores)[columns]
scores_df.to_csv(f'{RESULTS_PATH}{CV_SPLIT_NUM}.csv', index=False)
learn.save(f'{CV_SPLIT_NUM}')

!cp "{MODELS_PATH}/{CV_SPLIT_NUM}.pth" "{RESULTS_PATH}"
scores_df

RUNNING:
num_examinations      2,1
num_samples             3
patients            P1,P3
split                   1
Name: 1, dtype: object


epoch,train_loss,valid_loss,dice,iou,time
0,0.878447,0.869752,0.028337,0.014399,03:32
1,0.800474,0.870337,0.124878,0.066598,02:59
2,0.821161,0.856889,0.137489,0.07382,02:42
3,0.78833,0.766701,0.0,0.0,02:30
4,0.684027,0.821083,0.0,0.0,02:30


Unnamed: 0,split,val_patients,epoch,train_loss,val_loss,dice,iou
0,1,"P1,P3",1,0.878447,0.869752,0.028337,0.014399
1,1,"P1,P3",2,0.800474,0.870337,0.124878,0.066598
2,1,"P1,P3",3,0.821161,0.856889,0.137489,0.07382
3,1,"P1,P3",4,0.78833,0.766701,0.0,0.0
4,1,"P1,P3",5,0.684027,0.821083,0.0,0.0
