# Settings

In [1]:
COLAB = False
POSTPROCESS = True
SEED = 42

## Note
CPU does not support half precision

# Get the data

## Note
If you are running the script on colab or any other GPU provider you need to download your own dataset.
<br>
We used google drive to store the dataset. You can find a few useful commands below

In [2]:
if COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Copy models created before
    !cp -r ./drive/My\ Drive/Code/CV/results/ .

    # Copy your csv with splits 
    !cp ./drive/My\ Drive/Code/CV/split.csv .
    
    # Copy your data 
    !cp ./drive/My\ Drive/data2.tar.gz .
    !tar xzf data2.tar.gz
    
    # Create useful directories
    !mkdir models
    for i in range(10):
        !results/cp {i}.pth models/

## Setup consts

In [3]:
SPLIT_PATH = '../split/sample_split.csv'
DATA_PATH = '../data/'
MODELS_PATH = '../models'
RESULTS_PATH = '../results/'

if COLAB:
    SPLIT_PATH = 'split.csv'
    DATA_PATH = './data/'
    MODELS_PATH = 'models'
    RESULTS_PATH = './drive/My Drive/Code/CV/results/'

## Note
Colab requires restart after installation

In [4]:
if COLAB:
    !pip install -e git+https://github.com/fast-radiology/hydrocephalus.git#egg=hydrocephalus\&subdirectory=src
else:
    !pip install -e ../src

Obtaining file:///Users/nozdi/python/hydrocephalus/src
Installing collected packages: hydrocephalus
  Found existing installation: hydrocephalus 1.0.0
    Uninstalling hydrocephalus-1.0.0:


      Successfully uninstalled hydrocephalus-1.0.0
  Running setup.py develop for hydrocephalus
Successfully installed hydrocephalus


In [5]:
import pandas as pd
import numpy as np
import fastai.vision

import hydrocephalus


hydrocephalus.seed.random_seed(SEED)

fastai.vision.image.open_image = hydrocephalus.dicom.open_dcm_image
fastai.vision.image.open_mask = hydrocephalus.dicom.open_dcm_mask
fastai.vision.data.open_image = hydrocephalus.dicom.open_dcm_image
fastai.vision.data.open_mask = hydrocephalus.dicom.open_dcm_mask
open_image = hydrocephalus.dicom.open_dcm_image
open_mask = hydrocephalus.dicom.open_dcm_mask


metrics = [
    hydrocephalus.metrics.dice,
    hydrocephalus.metrics.iou,
    hydrocephalus.metrics.accuracy,
    hydrocephalus.metrics.tp,
    hydrocephalus.metrics.fp,
    hydrocephalus.metrics.fn,
]


cv_df = pd.read_csv(SPLIT_PATH)

for cv_split_num in range(cv_df.split.max() + 1):
    split_series = cv_df[cv_df.split == cv_split_num].squeeze()
    print(f"RUNNING:\n{split_series}")

    scans = hydrocephalus.data.get_scans(DATA_PATH)

    validation_patients = split_series['patients'].split(',')
    validation_patient_dirs = [f"{patient}/" for patient in validation_patients]

    size = hydrocephalus.dicom.get_shape(scans[0])
    bs = 10

    results = []

    for val_patient_dir in validation_patient_dirs:
        val_filtering_func = lambda img_src: val_patient_dir in str(img_src)
        val_patient_scans = list(filter(val_filtering_func, scans))
        examinations = pd.Series(val_patient_scans).str.split('/').str[3].unique()

        for examination in examinations:
            val_examination_filtering_func = lambda img_src: (
                val_patient_dir + examination
            ) in str(img_src)
            val_patient_examination_scans = list(
                filter(val_examination_filtering_func, scans)
            )

            data = hydrocephalus.data.get_data(
                scans, valid_func=val_examination_filtering_func, bs=bs, size=size
            )

            hydrocephalus.seed.random_seed(SEED)

            learn = hydrocephalus.learner.get_learner(data, model_dir=MODELS_PATH)

            learn.load(f'{cv_split_num}')
            preds, true = learn.get_preds()
            true = true.squeeze()
            preds = preds.argmax(1)

            if POSTPROCESS:
                preds = hydrocephalus.postprocess.postprocess(preds)
            
            results.append(hydrocephalus.metrics.get_result(
                preds=preds,
                true=true,
                scans=val_patient_examination_scans,
                metrics=metrics,
                split=cv_split_num,
                examination=examination,
            ))
            
    csv_name = f'{RESULTS_PATH}{cv_split_num}_proper{"_postprocess" if POSTPROCESS else ""}.csv'
    df = pd.DataFrame(results)
    df.to_csv(csv_name, index=False)
    print(df)
    print(
        df[
            ['accuracy', 'dice', 'iou', 'precision', 'recall', 'volumetric_similarity']
        ].mean()
    )


   accuracy      dice examination   fn   fp       iou  precision  \
0  0.999466  0.995206        P1B1  171  109  0.990458   0.996264   
1  0.999504  0.995157        P1B2  163   97  0.990361   0.996382   
2  0.999466  0.995206        P3B1  171  109  0.990458   0.996264   

   preds_volume    recall  split     tp  true_volume  volumetric_similarity  
0   2901.708678  0.994151      1  29064  2907.875543               0.998939  
1   2666.870499  0.993936      1  26715  2673.435226               0.998771  
2   2901.708678  0.994151      1  29064  2907.875543               0.998939  
accuracy                 0.999479
dice                     0.995190
iou                      0.990426
precision                0.996303
recall                   0.994079
volumetric_similarity    0.998883
dtype: float64
