# Setup

In [1]:
from IPython import get_ipython
from tqdm.notebook import tqdm
import pickle
import os

# Custom modules/
from SliceViewer import ImageSliceViewer3D
from investigate import *

# Custom functions
def save_object(obj, filename):
    with open(filename, 'wb') as output:  # Overwrites any existing file.
        pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)

def load_object(filename):        
    with open(filename, 'rb') as input:
        return pickle.load(input)

In [2]:
get_ipython().run_line_magic('tb', '')

No traceback available to show.


In [3]:
cwd = os.getcwd()
data = '/tf/data'
output = '/tf/output'

## Section 1

In [None]:
# Load valid patient list
infile  = 'patlist_with_validBMI.csv'
df_P = pd.read_csv(infile, index_col=False)
df_P = df_P.loc[:, ~df_P.columns.str.contains('^Unnamed')]
df_P = df_P[['GIVEN_MRN','PAT_ID','ACC']]
print('Columns of df_P: ', list(df_P))
print('Length of df_P: ', len(df_P))
display(df_P.head(10))
#print('# of Unique patients: ', len(df_P.subject_id.unique()))

In [None]:
pats = next(os.walk(data))[1]
print('Total patient folders in data dir: ',len(pats))

In [None]:
patids = [pat.split('-')[0] for pat in pats]

valid_ids = [valid_id for valid_id,valid_dir in zip(patids,pats) if valid_id in df_P.PAT_ID.values]
valid_dirs = [os.path.join(data,valid_dir) for valid_id,valid_dir in zip(patids,pats) if valid_id in df_P.PAT_ID.values]

valid_ids = set(valid_ids)
valid_dirs = set(valid_dirs)

In [None]:
print('valid ids: ',len(valid_ids))

In [None]:
# Import modules and config file
import json
from run_sma_experiment import find_l3_images,output_images
import pprint
pp = pprint.PrettyPrinter(indent=1)


configfile = os.path.join(cwd,'config/debug_ES/series_filter_ds1.json')
with open(configfile, "r") as f:
        config = json.load(f)

print('Current config dict: ')
pp.pprint(config)

In [None]:
config = config["series_filter"]
config["new_tim_dicom_dir_structure"] = True
config["valid_ids"] = valid_ids

In [None]:
# Debug
from L3_finder import *

In [None]:
# Debug
print("Finding subjects")

subjects = list(
    find_subjects(
        config["dicom_dir"],
        new_tim_dir_structure=config["new_tim_dicom_dir_structure"]
    )
)

print('Subjects found: ', len(subjects))

In [None]:
subjects = [subject for subject in subjects if subject.id_ in valid_ids]
print('Subjects found: ', len(subjects))
print('Valid Subjects: ', len(valid_ids))

## Section 2

In [None]:
# Find Duplicate Subjects
unique_subjects = []
duplicate_subjects = []
for subject in subjects:
    if subject.id_ not in unique_subjects:
        unique_subjects.append(subject.id_)
    else:
        duplicate_subjects.append(subject.id_)

print(duplicate_subjects)           

In [None]:
print_subject_paths(subjects)

In [None]:
print_subject_series('Z619766','/tf/data/Z619766-19070630-Z619766-_')

In [None]:
imseries = get_subject_series('Z619766','Z619766-SE-1-2.0')
print(imseries.orientation,' ' , imseries.slice_thickness)
imdata = imseries.pixel_data

In [None]:
%matplotlib inline
print(imdata.shape)
ImageSliceViewer3D(imdata)

In [None]:
save_object(subjects, os.path.join(output,'subjects_noduplicates.pkl'))

## Section 3

In [None]:
subjects = load_object(os.path.join(output,'subjects_noduplicates.pkl'))

In [None]:
%%time
from l3finder.ingest import *
from multiprocessing import get_context
from multiprocessing import set_start_method
#set_start_method("spawn")

if __name__=='__main__':
    # Find series images
    print("Finding series")
    series = list(flatten(s.find_series() for s in subjects))

    # Separate series
    print("Separating series")
    #sagittal_series, axial_series, excluded_series = separate_series(series)
    
    excluded_series = []

    sag_filter = functools.partial(
        same_orientation,
        orientation='sagittal',
        excluded_series=excluded_series
    )
    
    axial_filter = functools.partial(
        same_orientation,
        orientation='axial',
        excluded_series=excluded_series
    )

    def pool_filter(pool, func, candidates):
        return [
            c for c, keep
            in zip(candidates, tqdm(pool.imap(func, candidates),total=len(candidates)))
            if keep]
    
    print('Filtering series using ', multiprocessing.cpu_count(), ' cores:')
    with get_context("spawn").Pool() as p:
        sagittal_series = pool_filter(p, sag_filter, series)
        print("Processed Sagittals")
        axial_series = pool_filter(p, axial_filter, series)
        print("Processed Axials")

    
    
    print("Series seperated")

#remove_start_method("spawn")

In [None]:
print("Length of valid pats: ", len(subjects))
print("Length of sagittal series", len(sagittal_series))
print("Length of axial series", len(axial_series))
#print("Length of excluded series", len(excluded_series))
#print("Length of all series in dataset", len(series))

In [None]:
# Save required objects
save_object(axial_series, os.path.join(output,'axial_series.pkl'))
save_object(sagittal_series, os.path.join(output,'sagittal_series.pkl'))

## Section 4 - Create query dataframes before filtering the series

In [None]:
axial_series = load_object(os.path.join(output,'axial_series.pkl'))
sagittal_series = load_object(os.path.join(output,'sagittal_series.pkl'))
subjects = load_object(os.path.join(output,'subjects_noduplicates.pkl'))

In [None]:
df_a = get_summary_dfs(axial_series,sagittal_series,subjects)
save_object(df_a, os.path.join(output,'df_a.pkl'))

In [None]:
df_a_axials = get_summary_by_serieslength(axial_series)
df_a_sags = get_summary_by_serieslength(sagittal_series)
save_object(df_a_axials, os.path.join(output,'df_a_axials.pkl'))
save_object(df_a_sags, os.path.join(output,'df_a_sags.pkl'))

## Section 5 - Investigate subjects and series

In [None]:
df_a_axials = load_object(os.path.join(output,'df_a_axials.pkl'))
df_a_sags = load_object(os.path.join(output,'df_a_sags.pkl'))
df_a = load_object(os.path.join(output,'df_a.pkl'))

In [None]:
print("Length of subjects with atleast 1 axial or sagittal series: ", len(df_a))
print("Length of subjects with atleast 1 axial series: ", len(df_a_axials['ID'].unique()))
print("Length of subjects with atleast 1 sagittal series: ", len(df_a_sags['ID'].unique()))

In [None]:
# Patients without Axial
pats = [pat for pat in df_a['ID'].values if pat not in df_a_axials['ID'].values]
print(len(pats))
print(pats)

In [None]:
# Patients without Sagittal
pats = [pat for pat in df_a['ID'].values if pat not in df_a_sags['ID'].values]
print(len(pats))

In [None]:
imseries = get_subject_series('Z837620','Z837620-SE-6-Vol_Body_Vol._0.5',subjects)
print(imseries.orientation,' ' , imseries.slice_thickness)
imdata = imseries.pixel_data

In [None]:
%matplotlib inline
print(imdata.shape)
ImageSliceViewer3D(imdata)

In [None]:
print_summary_by_serieslength(df_a_axials)

In [None]:
print_summary_by_serieslength(df_a_sags)

## Section 6

In [None]:
axial_series = [a_s for a_s in axial_series if a_s.number_of_dicoms > 20]

In [None]:
len(axial_series)

In [None]:
save_object(axial_series, os.path.join(output,'axial_series_gt20.pkl'))

In [None]:
df_b_axials = get_summary_by_serieslength(axial_series)

In [None]:
print_summary_by_serieslength(df_b_axials)

## Section 7

In [4]:
from L3_finder import *

Using TensorFlow backend.


In [5]:
axial_series = load_object(os.path.join(output,'axial_series_gt20.pkl'))
sagittal_series = load_object(os.path.join(output,'sagittal_series.pkl'))
subjects = load_object(os.path.join(output,'subjects_noduplicates.pkl'))

In [6]:
exclusions = []
if __name__=='__main__':
    axial_series, ax_exclusions = filter_axial_series(axial_series)
    print('Axial series filtered')
    exclusions.extend(ax_exclusions)
    #constructed_sagittals = construct_series_for_subjects_without_sagittals(
    #    subjects, sagittal_series, axial_series
    #)
    #print('Missing Sagittals reconstructed')
    #sagittal_series.extend(constructed_sagittals)
    sagittal_series, sag_exclusions = filter_sagittal_series(sagittal_series)
    print('Sagittal series filtered')
    exclusions.extend(sag_exclusions)

Axial series filtered
Sagittal series filtered


In [7]:
save_object(axial_series, os.path.join(output,'axial_series_filtered.pkl'))
save_object(sagittal_series, os.path.join(output,'sagittal_series_filtered.pkl'))
save_object(sag_exclusions, os.path.join(output,'sag_exclusions.pkl'))
save_object(ax_exclusions, os.path.join(output,'ax_exclusions.pkl'))

In [8]:
df_b = get_summary_dfs(axial_series,sagittal_series,subjects)
save_object(df_b, os.path.join(output,'df_b.pkl'))

In [9]:
df_b_axials = get_summary_by_serieslength(axial_series)
df_b_sags = get_summary_by_serieslength(sagittal_series)
save_object(df_b_axials, os.path.join(output,'df_b_axials.pkl'))
save_object(df_b_sags, os.path.join(output,'df_b_sags.pkl'))

## Section 8 - Investigate 

In [4]:
df_a_axials = load_object(os.path.join(output,'df_a_axials.pkl'))
df_a_sags = load_object(os.path.join(output,'df_a_sags.pkl'))
df_a = load_object(os.path.join(output,'df_a.pkl'))

df_b_axials = load_object(os.path.join(output,'df_b_axials.pkl'))
df_b_sags = load_object(os.path.join(output,'df_b_sags.pkl'))
df_b = load_object(os.path.join(output,'df_b.pkl'))

In [5]:
subjects = load_object(os.path.join(output,'subjects_noduplicates.pkl'))

axial_series = load_object(os.path.join(output,'axial_series_filtered.pkl'))
sagittal_series = load_object(os.path.join(output,'sagittal_series_filtered.pkl'))

In [6]:
print_summary_by_serieslength(df_b_axials)
#print_summary_by_serieslength(df_b_sags)

interactive(children=(Dropdown(description='ptype', options=('equal', 'equal_greater', 'lesser'), value='equal…

In [7]:
print_subject_paths(subjects)

interactive(children=(Text(value='Z1243452', description='ID'), Output()), _dom_classes=('widget-interact',))

In [8]:
print_subject_series('Z416634','/tf/data/Z416634-19160310-Z416634-CT_ABDOMEN_PELVIS_W_CONTRAST',subjects,axial_series)

interactive(children=(Text(value='Z416634', description='ID'), Text(value='/tf/data/Z416634-19160310-Z416634-C…

In [13]:
# Visualize
imseries = get_subject_series('Z416634','Z416634-SE-4-Axial_Body_5.0',subjects)
print(imseries.orientation,' ' , imseries.slice_thickness)
imdata = imseries.pixel_data

%matplotlib inline
print(imdata.shape)
ImageSliceViewer3D(imdata)

axial   5.0
(70, 512, 512)


interactive(children=(Output(),), _dom_classes=('widget-interact',))

<SliceViewer.ImageSliceViewer3D at 0x7f5422762be0>

In [16]:
a=  axial_series[0]

In [18]:
True if a in axial_series[1:] else False

False

In [None]:
len(constructed_sagittals)

In [None]:
len(df_b)

In [None]:
print_summary_df(df_b)

In [None]:
print_summary_counts(df_a)

In [None]:
print_summary_counts(df_b)

In [None]:
exclusions[0][0]

In [None]:
print_summary_exclusions(exclusions)

In [None]:
# Check if df_a and df_b filters for axial are right
l1 = get_patientsbycount(df_a,'Axials',0)
l2 = [l for l in get_patientsbycount(df_b,'Axials',0) if l not in l1]

In [None]:
len(l2)

In [None]:
print(l2)

In [None]:
df_e =  get_exclusion_df()

In [None]:
print_exclusions_subject(df_e,l2)

In [None]:
exclusions[0][0].series.id_#.slice_thickness

In [None]:
display(df_e.head(100))

In [None]:
len(subjects)

In [None]:
len(set(subjects))

In [None]:
subjects.index('Z1243452')

In [None]:
valid_ids.index('Z1243452')

In [None]:
valid_ids[352]

In [None]:
valid_ids[353]

In [None]:
len(valid_ids)

In [None]:
len(set(valid_ids))