In [None]:
from project.dataset import Dataset
import pandas as pd
import nibabel as nib

In [None]:
ds = Dataset()

### Load all cases


generate slice count for each case using the list comprehension `slices`


In [None]:
mri = ds.load_raw_mri()
masks = ds.load_cmb_masks()
slices = [nib.load(x).get_fdata().shape[2] for x in mri]

create `standard_df` for the unprocessed data


In [None]:
standard_df = pd.DataFrame({
    'mri': mri,
    'masks': masks,
    'slices': slices
})

In [None]:
sample_mask = nib.load(masks[0]).get_fdata()
sample_mask

In [None]:
sample_mask.shape

In [None]:
sample_mask[:, :, 0]

function to generate all the target slices for each case


In [None]:
def generate_target_slice(mri, masks, slices, cohort=0):
    if len(mri) != len(masks):
        print(f'Unequal amount of mri cases to cmb masks\t{len(mri)} to {len(masks)}')
    if len(mri) != len(slices):
        print(f'Unequal amount of mri cases to case slice counts\t{len(mri)} to {len(slices)}')

    # ls = [(mri[i], masks[i], target) for i in range(len(mri)) for target in range(slices[i])]
    ls = []
    
    for i in range(len(mri)):
        maxval = nib.load(mri[i]).get_fdata().max()
        mask_data = nib.load(masks[i]).get_fdata()
        has_microbleed_case = 1 if mask_data.max() > 0 else 0
            
        for target in range(slices[i]):
            has_microbleed_slice = 1 if mask_data[:, :, target].max() > 0 else 0
            ls.append((
                mri[i], 
                masks[i], 
                target, 
                has_microbleed_case, 
                has_microbleed_slice,
                cohort,
                maxval
            ))
            
    df = pd.DataFrame(ls, columns=[
        'mri',
        'masks', 
        'target', 
        'has_microbleed_case', 
        'has_microbleed_slice',
        'cohort',
        'max_value'
    ])
    # ls = [(case, target) for case, slices in zip(case, slices) for target in range(slices)]
    return df

In [None]:
df = generate_target_slice(mri, masks, slices)

### Load per cohort

In [None]:
ch1mri = ds.load_raw_mri(1)
ch1masks = ds.load_cmb_masks(1)
ch1slices = [nib.load(x).get_fdata().shape[2] for x in ch1mri]

ch1df = generate_target_slice(ch1mri, ch1masks, ch1slices, cohort=1)

In [None]:
ch2mri = ds.load_raw_mri(2)
ch2masks = ds.load_cmb_masks(2)
ch2slices = [nib.load(x).get_fdata().shape[2] for x in ch2mri]

ch2df = generate_target_slice(ch2mri, ch2masks, ch2slices, cohort=2)

In [None]:
ch3mri = ds.load_raw_mri(3)
ch3masks = ds.load_cmb_masks(3)
ch3slices = [nib.load(x).get_fdata().shape[2] for x in ch3mri]

ch3df = generate_target_slice(ch3mri, ch3masks, ch3slices, cohort=3)

### Check if all data except cohort number matches with the old version `df`

In [None]:
ff = pd.concat([ch1df, ch2df, ch3df], ignore_index=True)

In [None]:
check = df[['mri', 'masks', 'target', 'has_microbleed_case', 'has_microbleed_slice', 'max_value', 'cohort']] == ff[['mri', 'masks', 'target', 'has_microbleed_case', 'has_microbleed_slice', 'max_value', 'cohort']]

In [None]:
sns.heatmap(check)

if all is well and good, make `ff` the new `df`

In [None]:
df = ff

---

In [None]:
df.sample(5)

This should return none because this is invalid


In [None]:
df[(df['has_microbleed_case'] == 0) & (df['has_microbleed_slice'] == 1)]

Check how many cases have microbleeds


In [None]:
len(df['masks'].drop_duplicates())

In [None]:
df[df['has_microbleed_case'] == 1]['masks'].nunique()

In [None]:
ar = df.groupby('mri').target.max()
ar

In [None]:
arr = standard_df.groupby('mri').slices.max()
ar

### Check if all slices match with the raw dataframe count


In [None]:
(ar == (arr - 1)).all()

### Export as metadata


In [None]:
df.to_csv('targets.csv', index=False)