# Setup

In [None]:
from IPython.display import display, HTML

In [None]:
# from IPython import get_ipython
from tqdm.notebook import tqdm
import pickle
import os


# Custom modules
from SliceViewer import ImageSliceViewer3D, ImageSliceViewer3D_1view,ImageSliceViewer3D_2views
from investigate import *

#pd.set_option("display.max_rows", 10)

# Custom functions
def save_object(obj, filename):
    with open(filename, 'wb') as output:  # Overwrites any existing file.
        pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)

def load_object(filename):        
    with open(filename, 'rb') as input:
        return pickle.load(input)

In [None]:
get_ipython().run_line_magic('tb', '')

In [None]:
cwd = os.getcwd()
data = '/tf/data'
output = '/tf/output'

## Section 1

In [None]:
# Load valid patient list
infile  = 'patlist_with_validBMI.csv'
df_P = pd.read_csv(infile, index_col=False)
df_P = df_P.loc[:, ~df_P.columns.str.contains('^Unnamed')]
df_P = df_P[['GIVEN_MRN','PAT_ID','ACC']]
print('Columns of df_P: ', list(df_P))
print('Length of df_P: ', len(df_P))
display(df_P.head(10))
#print('# of Unique patients: ', len(df_P.subject_id.unique()))

In [None]:
pats = next(os.walk(data))[1]
print('Total patient folders in data dir: ',len(pats))

In [None]:
patids = [pat.split('-')[0] for pat in pats]

valid_ids = [valid_id for valid_id,valid_dir in zip(patids,pats) if valid_id in df_P.PAT_ID.values]
valid_dirs = [os.path.join(data,valid_dir) for valid_id,valid_dir in zip(patids,pats) if valid_id in df_P.PAT_ID.values]

valid_ids = set(valid_ids)
valid_dirs = set(valid_dirs)

In [None]:
print('valid ids: ',len(valid_ids))

In [None]:
# Import modules and config file
import json
from run_sma_experiment import find_l3_images,output_images
import pprint
pp = pprint.PrettyPrinter(indent=1)


configfile = os.path.join(cwd,'config/debug_ES/series_filter_ds1.json')
with open(configfile, "r") as f:
        config = json.load(f)

print('Current config dict: ')
pp.pprint(config)

In [None]:
config = config["series_filter"]
config["new_tim_dicom_dir_structure"] = True
config["valid_ids"] = valid_ids

In [53]:
# Debug
from L3_finder import *

Using TensorFlow backend.


In [None]:
# Debug
print("Finding subjects")

subjects = list(
    find_subjects(
        config["dicom_dir"],
        new_tim_dir_structure=config["new_tim_dicom_dir_structure"]
    )
)

print('Subjects found: ', len(subjects))

In [None]:
subjects = [subject for subject in subjects if subject.id_ in valid_ids]
print('Subjects found: ', len(subjects))
print('Valid Subjects: ', len(valid_ids))

## Section 2

In [None]:
# Find Duplicate Subjects
unique_subjects = []
duplicate_subjects = []
for subject in subjects:
    if subject.id_ not in unique_subjects:
        unique_subjects.append(subject.id_)
    else:
        duplicate_subjects.append(subject.id_)

print(duplicate_subjects)           

In [None]:
print_subject_paths(subjects)

In [None]:
print_subject_series('Z619766','/tf/data/Z619766-19070630-Z619766-_')

In [None]:
imseries = get_subject_series('Z619766','Z619766-SE-1-2.0')
print(imseries.orientation,' ' , imseries.slice_thickness)
imdata = imseries.pixel_data

In [None]:
%matplotlib inline
print(imdata.shape)
ImageSliceViewer3D(imdata)

In [None]:
save_object(subjects, os.path.join(output,'subjects_noduplicates.pkl'))

## Section 3

In [None]:
subjects = load_object(os.path.join(output,'subjects_noduplicates.pkl'))

In [None]:
%%time
# Debug
from L3_finder import *
from l3finder.ingest import *
from multiprocessing import get_context
from multiprocessing import set_start_method
#set_start_method("spawn")

if __name__=='__main__':
    # Find series images
    print("Finding series")
    series = list(flatten(s.find_series() for s in subjects))

    # Separate series
    print("Separating series")
    #sagittal_series, axial_series, excluded_series = separate_series(series)
    
    excluded_series = []

    sag_filter = functools.partial(
        same_orientation,
        orientation='sagittal',
        excluded_series=excluded_series
    )
    
    axial_filter = functools.partial(
        same_orientation,
        orientation='axial',
        excluded_series=excluded_series
    )

    def pool_filter(pool, func, candidates):
        return [
            c for c, keep
            in zip(candidates, tqdm(pool.imap(func, candidates),total=len(candidates)))
            if keep]
    
    print('Filtering series using ', multiprocessing.cpu_count(), ' cores:')
    with get_context("spawn").Pool() as p:
        sagittal_series = pool_filter(p, sag_filter, series)
        print("Processed Sagittals")
        axial_series = pool_filter(p, axial_filter, series)
        print("Processed Axials")

    
    
    print("Series seperated")

#remove_start_method("spawn")

In [None]:
print("Length of valid pats: ", len(subjects))
print("Length of sagittal series", len(sagittal_series))
print("Length of axial series", len(axial_series))
#print("Length of excluded series", len(excluded_series))
#print("Length of all series in dataset", len(series))

In [None]:
# Save required objects
save_object(axial_series, os.path.join(output,'axial_series.pkl'))
save_object(sagittal_series, os.path.join(output,'sagittal_series.pkl'))

## Section 4 - Create query dataframes before filtering the series

In [None]:
axial_series = load_object(os.path.join(output,'axial_series.pkl'))
sagittal_series = load_object(os.path.join(output,'sagittal_series.pkl'))
subjects = load_object(os.path.join(output,'subjects_noduplicates.pkl'))

In [None]:
df_a = get_summary_dfs(axial_series,sagittal_series,subjects)
save_object(df_a, os.path.join(output,'df_a.pkl'))

In [None]:
df_a_axials = get_summary_by_serieslength(axial_series)
df_a_sags = get_summary_by_serieslength(sagittal_series)
save_object(df_a_axials, os.path.join(output,'df_a_axials.pkl'))
save_object(df_a_sags, os.path.join(output,'df_a_sags.pkl'))

## Section 5 - Investigate subjects and series

In [None]:
df_a_axials = load_object(os.path.join(output,'df_a_axials.pkl'))
df_a_sags = load_object(os.path.join(output,'df_a_sags.pkl'))
df_a = load_object(os.path.join(output,'df_a.pkl'))

In [None]:
print("Length of subjects with atleast 1 axial or sagittal series: ", len(df_a))
print("Length of subjects with atleast 1 axial series: ", len(df_a_axials['ID'].unique()))
print("Length of subjects with atleast 1 sagittal series: ", len(df_a_sags['ID'].unique()))

In [None]:
# Patients without Axial
pats = [pat for pat in df_a['ID'].values if pat not in df_a_axials['ID'].values]
print(len(pats))
print(pats)

In [None]:
# Patients without Sagittal
pats = [pat for pat in df_a['ID'].values if pat not in df_a_sags['ID'].values]
print(len(pats))

In [None]:
imseries = get_subject_series('Z837620','Z837620-SE-6-Vol_Body_Vol._0.5',subjects)
print(imseries.orientation,' ' , imseries.slice_thickness)
imdata = imseries.pixel_data

In [None]:
%matplotlib inline
print(imdata.shape)
ImageSliceViewer3D(imdata)

In [None]:
print_summary_by_serieslength(df_a_axials)

In [None]:
print_summary_by_serieslength(df_a_sags)

## Section 6

In [None]:
axial_series = [a_s for a_s in axial_series if a_s.number_of_dicoms > 20]

In [None]:
len(axial_series)

In [None]:
save_object(axial_series, os.path.join(output,'axial_series_gt20.pkl'))

In [None]:
df_b_axials = get_summary_by_serieslength(axial_series)

In [None]:
print_summary_by_serieslength(df_b_axials)

## Section 7

In [None]:
from L3_finder import *

In [None]:
axial_series = load_object(os.path.join(output,'axial_series_gt20.pkl'))
sagittal_series = load_object(os.path.join(output,'sagittal_series.pkl'))
subjects = load_object(os.path.join(output,'subjects_noduplicates.pkl'))

In [None]:
exclusions = []
if __name__=='__main__':
    axial_series, ax_exclusions = filter_axial_series(axial_series)
    print('Axial series filtered')
    exclusions.extend(ax_exclusions)
    #constructed_sagittals = construct_series_for_subjects_without_sagittals(
    #    subjects, sagittal_series, axial_series
    #)
    #print('Missing Sagittals reconstructed')
    #sagittal_series.extend(constructed_sagittals)
    sagittal_series, sag_exclusions = filter_sagittal_series(sagittal_series)
    print('Sagittal series filtered')
    exclusions.extend(sag_exclusions)

In [None]:
save_object(axial_series, os.path.join(output,'axial_series_filtered.pkl'))
save_object(sagittal_series, os.path.join(output,'sagittal_series_filtered.pkl'))
save_object(sag_exclusions, os.path.join(output,'sag_exclusions.pkl'))
save_object(ax_exclusions, os.path.join(output,'ax_exclusions.pkl'))

In [None]:
df_b = get_summary_dfs(axial_series,sagittal_series,subjects)
save_object(df_b, os.path.join(output,'df_b.pkl'))

In [None]:
df_b_axials = get_summary_by_serieslength(axial_series)
df_b_sags = get_summary_by_serieslength(sagittal_series)
save_object(df_b_axials, os.path.join(output,'df_b_axials.pkl'))
save_object(df_b_sags, os.path.join(output,'df_b_sags.pkl'))

## Section 8 - Create df with best pairvalidity scores to select optimal axial and sagittal series for each subject

In [None]:
df_a_axials = load_object(os.path.join(output,'df_a_axials.pkl'))
df_a_sags = load_object(os.path.join(output,'df_a_sags.pkl'))
df_a = load_object(os.path.join(output,'df_a.pkl'))

df_b_axials = load_object(os.path.join(output,'df_b_axials.pkl'))
df_b_sags = load_object(os.path.join(output,'df_b_sags.pkl'))
df_b = load_object(os.path.join(output,'df_b.pkl'))

In [None]:
subjects = load_object(os.path.join(output,'subjects_noduplicates.pkl'))

axial_series = load_object(os.path.join(output,'axial_series_filtered.pkl'))
sagittal_series = load_object(os.path.join(output,'sagittal_series_filtered.pkl'))

In [None]:
ax_exclusions = load_object(os.path.join(output,'ax_exclusions.pkl'))

In [None]:
print_summary_by_serieslength(df_b_axials)

In [None]:
print_summary_by_subject(df_b_axials)

In [None]:
print_summary_by_subject(df_b_sags)

In [None]:
len(df_b_axials['ID'].unique())

In [None]:
len(df_b_sags['ID'].unique())

In [None]:
df_pairs = get_finalpairs_df(df_b_axials,df_b_sags,subjects)

In [None]:
save_object(df_pairs, os.path.join(output,'df_pairs.pkl'))

# Section 9 - Save subjects with valid axial, sagittal pairs

In [None]:
df_pairs = load_object(os.path.join(output,'df_pairs.pkl'))

subjects = load_object(os.path.join(output,'subjects_noduplicates.pkl'))

In [None]:
axial_series = load_object(os.path.join(output,'axial_series_filtered.pkl'))
sagittal_series = load_object(os.path.join(output,'sagittal_series_filtered.pkl'))
df_b_axials = load_object(os.path.join(output,'df_b_axials.pkl'))
df_b_sags = load_object(os.path.join(output,'df_b_sags.pkl'))
df_b = load_object(os.path.join(output,'df_b.pkl'))

In [None]:
print('Length Pairs: ', len(df_pairs))
print('Length Subjects: ',len(subjects))

In [None]:
df_nopairs = df_pairs[(df_pairs['Axial'].isnull()) & (df_pairs['Sagittal'].isnull())]
print('Missing axial and sagittals: ',len(df_nopairs))
df_noaxials_only = df_pairs[(df_pairs['Axial'].isnull()) & (df_pairs['Sagittal'].notnull()) ]
print('Missing axials only :', len(df_noaxials_only))

df_nosags_only = df_pairs[(df_pairs['Axial'].notnull()) & (df_pairs['Sagittal'].isnull()) ]
print('Missing Sagittals only :',len(df_nosags_only))


df_validpairs = df_pairs[(df_pairs['Axial'].notnull()) & (df_pairs['Sagittal'].notnull()) ]
print('valid pairs :',len(df_validpairs))

In [None]:
# incomplete df
df_incomp  = df_nopairs
df_incomp = df_incomp.append(df_noaxials_only)
df_incomp = df_incomp.append(df_nosags_only)
print(len(df_incomp))

In [None]:
# Save valid pairs and throw remaining series
save_object(df_validpairs, os.path.join(output,'df_validpairs.pkl'))
save_object(df_incomp, os.path.join(output,'df_incomp.pkl'))

In [None]:
# Save valid subjects
valid_subjects = []
incomp_subjects = []
for s in subjects:
    if s.id_ in df_validpairs[['ID']].values:
        valid_subjects.append(s)
    else:
        incomp_subjects.append(s)

print('Valid subjects: ', len(valid_subjects))        
print('Incomplete subjects: ', len(incomp_subjects))        
        
save_object(valid_subjects, os.path.join(output,'valid_subjects.pkl'))
save_object(incomp_subjects, os.path.join(output,'incomp_subjects.pkl'))

In [None]:
# Save valid series and incomplete series
valid_axials = []
valid_sags = []
incomp_axials = []
incomp_sags = []
for a in axial_series:
    if a.id_ in df_validpairs[['Axial']].values:
        valid_axials.append(a)
    else:
        incomp_axials.append(a)

for s in sagittal_series:
    if s.id_ in df_validpairs[['Sagittal']].values:
        valid_sags.append(s)
    else:
        if s.number_of_dicoms > 5:
            incomp_sags.append(s)

In [None]:
print('axial_series: ', len(axial_series))
print('valid_axials: ', len(valid_axials))
print('incomp_axials: ', len(incomp_axials))

In [None]:
print('sagittal_series: ', len(sagittal_series))
print('valid_sags: ', len(valid_sags))
print('incomp_sags: ', len(incomp_sags))

In [None]:
save_object(valid_axials, os.path.join(output,'valid_axials.pkl'))
save_object(valid_sags, os.path.join(output,'valid_sags.pkl'))

# Section 10 - Invesitgate incomplete subjects


In [None]:
# Load incomplete subjects
df_incomp= load_object(os.path.join(output,'df_incomp.pkl'))
subjects = load_object(os.path.join(output,'incomp_subjects.pkl'))

In [None]:
# Load unfiltered series
axial_series = load_object(os.path.join(output,'axial_series.pkl'))
sagittal_series = load_object(os.path.join(output,'sagittal_series.pkl'))

df_a_axials = load_object(os.path.join(output,'df_a_axials.pkl'))
df_a_sags = load_object(os.path.join(output,'df_a_sags.pkl'))

In [None]:
print(" No of incomplete subjects: ", len(df_incomp))

In [None]:
print(len(axial_series))
print(len(sagittal_series))

In [None]:
# select series belonging to incomplete subjects alone
axial_series = [series for series in axial_series if series.subject.id_ in df_incomp['ID'].values]
sagittal_series = [series for series in sagittal_series if series.subject.id_ in df_incomp['ID'].values]

#similary for dfs
df_i_axials = df_a_axials.loc[df_a_axials['ID'].isin(df_incomp['ID'].values)]
df_i_sags = df_a_sags.loc[df_a_sags['ID'].isin(df_incomp['ID'].values)]

In [None]:
display(df_i_axials.head(10))

In [None]:
print(len(axial_series))
print(len(sagittal_series))

In [None]:
df_incomp_pairs = get_finalpairs_df(df_i_axials,df_i_sags,subjects)

In [None]:
save_object(df_incomp_pairs, os.path.join(output,'df_incomp_pairs.pkl'))

save_object(df_i_axials, os.path.join(output,'df_i_axials.pkl'))
save_object(df_i_sags, os.path.join(output,'df_i_sags.pkl'))

save_object(axial_series, os.path.join(output,'axial_series_i.pkl'))
save_object(sagittal_series, os.path.join(output,'sagittal_series_i.pkl'))

# Section 11 - Build new filter to handle incomplete subjects

In [28]:
#Load incomplete subjects
subjects = load_object(os.path.join(output,'incomp_subjects.pkl'))

In [29]:
df_incomp_pairs = load_object(os.path.join(output,'df_incomp_pairs.pkl'))

In [30]:
len(df_incomp_pairs)

905

In [31]:
# Load unfiltered series
axial_series = load_object(os.path.join(output,'axial_series_i.pkl'))
sagittal_series = load_object(os.path.join(output,'sagittal_series_i.pkl'))

df_i_axials = load_object(os.path.join(output,'df_i_axials.pkl'))
df_i_sags = load_object(os.path.join(output,'df_i_sags.pkl'))

In [32]:
df_incomp_pairs.head(10)

Unnamed: 0,ID,Axial,Sagittal,Overlap,MissingScore,PairValidity,AxSlices,SagSlices
0,Z1000273,Z1000273-SE-3-Vol_Std_0.5,,,1.672,,1284.0,
1,Z1000800,Z1000800-SE-4-Body_5.0_CE,,,2.345,,124.0,
2,Z1002072,,,,,,,
3,Z1003148,,,,,,,
4,Z1004562,,,,,,,
5,Z1005788,Z1005788-SE-9-DELAY_BLADDER,,,1.058,,18.0,
6,Z1009243,Z1009243-SE-5-Vol_Std_0.5_CE,,,1.668,,1251.0,
7,Z1009248,Z1009248-SE-5-Vol_Std_0.5_CE,,,1.669,,1676.0,
8,Z1009393,Z1009393-SE-3-Vol_Std_0.5,,,1.668,,1051.0,
9,Z1010426,Z1010426-SE-3-Standard_Vol,,,1.667,,817.0,


In [None]:
test = filter_finalpairs('Z1000273',df_i_axials,df_i_sags,subjects)
print(test)

In [None]:
axial_series[0].slice_thickness

In [None]:
%%time
# Debuga
from L3_finder import *
from l3finder.ingest import *
from multiprocessing import get_context
from multiprocessing import set_start_method
#set_start_method("spawn")

if __name__=='__main__':
    # Find series images
    print("Finding IDs")
    
    IDs = [s.id_ for s in subjects]
    
    #IDs = IDs[:20]
    
    pair_filter = functools.partial(
        filter_finalpairs,
        df_ax=df_i_axials,
        df_sag=df_i_sags,
        subjects=subjects
    )
    
    def pool_filter(pool, func, candidates):
        return [result for result in tqdm(pool.imap_unordered(func, candidates),total=len(candidates))]
    
    print('Filtering series using ', multiprocessing.cpu_count(), ' cores:')
        
    with get_context("spawn").Pool(processes=14) as p:
        out_params = pool_filter(p, pair_filter, IDs)
        p.close()
        p.join()
    
    print('parallel processing over')
     # Start from here
    df  = pd.DataFrame(columns=['ID','Axial','Sagittal','Overlap','MissingScore','PairValidity', 
                                'AxSlices','SagSlices','AxThick','SagThick'])
    for i,op in enumerate(out_params):
        df.loc[i] = op

    print("Processed")

In [None]:
display(df.head(100))

In [None]:
save_object(df, os.path.join(output,'df_incomp_pairs_gt10filter.pkl'))

In [None]:
%%time
# Single core
if __name__=='__main__':
    # Find series images
    print("Finding IDs")
    
    IDs = [s.id_ for s in subjects]
    
    IDs = IDs[:5]

    dfs  = pd.DataFrame(columns=['ID','Axial','Sagittal','Overlap','MissingScore','PairValidity','AxSlices','SagSlices'])
    for i,ID in tqdm(enumerate(IDs),total=len(IDs)): 
        dfs.loc[i] = filter_finalpairs(ID,df_i_axials,df_i_sags,subjects)
    
    display(dfs)

# Section 11b - Investigate old filter vs new 

In [5]:
# Incomplete subjects from old filter
df_incomp_1= load_object(os.path.join(output,'df_incomp.pkl'))
subjects = load_object(os.path.join(output,'incomp_subjects.pkl'))

In [6]:
# Results from new filter
df_incomp_2 = load_object(os.path.join(output,'df_incomp_pairs_gt10filter.pkl'))

axial_series = load_object(os.path.join(output,'axial_series_i.pkl'))
sagittal_series = load_object(os.path.join(output,'sagittal_series_i.pkl'))


ex_subjects = []
def remove_subjects(ID):
    global subjects,ex_subjects,axial_series,sagittal_series
    subjects,axial_series,sagittal_series,ex_subjects=exclude_subjects(subjects,
                                                                  axial_series,
                                                                  sagittal_series,
                                                                  ex_subjects,
                                                                  ID)
    print(ID, ' Removed from subject and series objects')

In [7]:
print('Subjects: ', len(subjects))
print('df_incomp_1: ', len(df_incomp_1))
print('df_incomp_2: ', len(df_incomp_2))

Subjects:  905
df_incomp_1:  905
df_incomp_2:  905


In [8]:
# drop unknown IDs in df2
df_incomp_2 = df_incomp_2.dropna(subset=['ID'])

In [9]:
# Sagittals only
df_ax_nosag_1 = df_incomp_1[(df_incomp_1['Axial'].notnull()) & (df_incomp_1['Sagittal'].isnull()) ]
print('Missing Sagittals only in df 1:',len(df_ax_nosag_1))

df_ax_nosag_2 = df_incomp_2[(df_incomp_2['Axial'].notnull()) & (df_incomp_2['Sagittal'].isnull()) ]
print('Missing Sagittals only in df 2 :',len(df_ax_nosag_2))


df_ax_sag_1 = df_incomp_1[(df_incomp_1['Axial'].notnull()) & (df_incomp_1['Sagittal'].notnull()) ]
print('complete pairs in df 1 :',len(df_ax_sag_1))

df_ax_sag_2 = df_incomp_2[(df_incomp_2['Axial'].notnull()) & (df_incomp_2['Sagittal'].notnull()) ]
print('complete pairs in df 2 :',len(df_ax_sag_2))


# Axials only
df_noax_1 = df_incomp_1[(df_incomp_1['Axial'].isnull()) & (df_incomp_1['Sagittal'].notnull()) ]
print('Missing axials only in df 1 :',len(df_noax_1))

df_noax_2 = df_incomp_2[(df_incomp_2['Axial'].isnull()) & (df_incomp_2['Sagittal'].notnull()) ]
print('Missing axials only in df 2 :',len(df_noax_2))

# Both missing
df_noax_nosag_1 = df_incomp_1[(df_incomp_1['Axial'].isnull()) & (df_incomp_1['Sagittal'].isnull()) ]
print('Missing axials and sags in df 1 :',len(df_noax_nosag_1))

df_noax_nosag_2 = df_incomp_2[(df_incomp_2['Axial'].isnull()) & (df_incomp_2['Sagittal'].isnull()) ]
print('Missing axials and sags in df 2 :',len(df_noax_nosag_2))

Missing Sagittals only in df 1: 842
Missing Sagittals only in df 2 : 784
complete pairs in df 1 : 0
complete pairs in df 2 : 119
Missing axials only in df 1 : 12
Missing axials only in df 2 : 0
Missing axials and sags in df 1 : 51
Missing axials and sags in df 2 : 0


In [10]:
# FInal complete valid dataframe, call it df_valid_lq (low quality as the first valid df is higher quality)

In [11]:
df_valid_lq = df_incomp_1.copy()

### Remove Cases not in df_incomp_2:

In [12]:
# Remove cases not in df_2
missingID = df_incomp_1[~df_incomp_1['ID'].isin(df_incomp_2['ID'].values)]['ID'].values.tolist()
df_valid_lq = df_valid_lq[~df_valid_lq['ID'].isin(missingID)]
print(len(df_valid_lq))

903


### Keep complete cases from df_incomp_2 when df_incomp_1 does not have axial series types.

In [13]:
completeIDs = df_incomp_2[df_incomp_2['Axial'].notnull() & df_incomp_2['Sagittal'].notnull()]['ID'].values.tolist()

# Print missing axials in df1
display(df_noax_1)

Unnamed: 0,ID,Axial,Sagittal,Overlap,MissingScore,PairValidity,AxSlices,SagSlices
65,Z1041413,,Z1041413-SE-7-Stnd_Pediatric_2.0_CE,,1.007,,,139
194,Z1140026,,Z1140026-SE-602-L_Spine_Sagittal_2mmx2mm,,1.006,,,167
235,Z1174241,,Z1174241-SE-8-Body_Std._Axial_AIDR_-_H_Sagitta...,,0.908,,,79
318,Z1224134,,Z1224134-SE-602-SAG,,1.013,,,160
437,Z1302641,,"Z1302641-SE-80325-MPR,_SAGITTAL,_Sagittal",,0.003,,,64
633,Z1687886,,Z1687886-SE-6-Stnd_Pediatric_3.0_CE,,1.011,,,93
1055,Z465714,,Z465714-SE-12-Sagittal_Body_Sagittal_3.000,,1.925,,,102
1142,Z505334,,Z505334-SE-602-Sagital_Abdomen,,0.144,,,72
1456,Z651305,,Z651305-SE-23-Sagittal_Body_Sagittal_3.000,,4.581,,,47
1680,Z846542,,Z846542-SE-602-L_Spine_Sagittal_2mmx2mm,,1.011,,,184


In [14]:
# print corresponding df2 pairs
df_2_dfnoax1 = df_incomp_2[df_incomp_2['ID'].isin(df_noax_1['ID'].values)]
display(df_2_dfnoax1)

Unnamed: 0,ID,Axial,Sagittal,Overlap,MissingScore,PairValidity,AxSlices,SagSlices,AxThick,SagThick
34,Z1041413,Z1041413-SE-4-Stnd_Pediatric_2.0_CE,Z1041413-SE-9-Stnd_Pediatric_2.0_CE,0.662,1.0,1.662,213,135,2.0,2.0
71,Z1140026,Z1140026-SE-2-ABD_PELVIS,Z1140026-SE-602-L_Spine_Sagittal_2mmx2mm,0.0,1.0,1.0,144,167,2.5,2.0
114,Z1174241,Z1174241-SE-6-Body_Std._Axial_AIDR_-_H_Vol._0.5,Z1174241-SE-13-Bone_Sharp_Sagittal_3.000,0.993,0.959,1.952,1992,63,0.5,3.0
130,Z1224134,Z1224134-SE-2-ABD_PELVIS,Z1224134-SE-602-SAG,0.996,1.0,1.996,195,160,2.5,2.0
180,Z1302641,Z1302641-SE-2-2MM_ID_4,"Z1302641-SE-80325-MPR,_SAGITTAL,_Sagittal",0.003,0.009,0.012,188,64,2.0,3.0
266,Z1687886,Z1687886-SE-4-Stnd_Pediatric_0.5_CE,Z1687886-SE-6-Stnd_Pediatric_3.0_CE,1.0,1.0,2.0,1317,93,0.5,3.0
411,Z465714,Z465714-SE-4-Axial_Body_2.0,Z465714-SE-12-Sagittal_Body_Sagittal_3.000,0.995,1.0,1.995,200,102,2.0,3.0
437,Z505334,Z505334-SE-2-ABD_PELVIS,Z505334-SE-602-Sagital_Abdomen,0.0,0.616,0.616,175,72,2.5,0.703125
584,Z651305,Z651305-SE-7-Vol_Body_Vol._0.5,Z651305-SE-9005-Sag-MIP_Body_Sag-MIP_4.000,0.763,1.0,1.763,458,77,0.5,4.0
675,Z846542,Z846542-SE-2-ABD_PELVIS,Z846542-SE-602-L_Spine_Sagittal_2mmx2mm,0.0,1.0,1.0,168,184,2.5,2.0


In [15]:
# Insert axials and sagittals when overlap > 0.7, else insert only axials
for ind,row in df_2_dfnoax1.iterrows():
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Axial']= row['Axial']
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Sagittal']= row['Sagittal']
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Overlap']= row['Overlap']
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'MissingScore']= row['MissingScore']
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'PairValidity']= row['PairValidity']
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'AxSlices']= row['AxSlices']
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'SagSlices']= row['SagSlices']
    if row['Overlap'] < 0.7:
        df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Sagittal']= None
        df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'SagSlices']= None
        df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Overlap']= 0
        
display(df_valid_lq[df_valid_lq['ID'].isin(df_noax_1['ID'].values)])

Unnamed: 0,ID,Axial,Sagittal,Overlap,MissingScore,PairValidity,AxSlices,SagSlices
65,Z1041413,Z1041413-SE-4-Stnd_Pediatric_2.0_CE,,0.0,1.0,1.662,213,
194,Z1140026,Z1140026-SE-2-ABD_PELVIS,,0.0,1.0,1.0,144,
235,Z1174241,Z1174241-SE-6-Body_Std._Axial_AIDR_-_H_Vol._0.5,Z1174241-SE-13-Bone_Sharp_Sagittal_3.000,0.993,0.959,1.952,1992,63.0
318,Z1224134,Z1224134-SE-2-ABD_PELVIS,Z1224134-SE-602-SAG,0.996,1.0,1.996,195,160.0
437,Z1302641,Z1302641-SE-2-2MM_ID_4,,0.0,0.009,0.012,188,
633,Z1687886,Z1687886-SE-4-Stnd_Pediatric_0.5_CE,Z1687886-SE-6-Stnd_Pediatric_3.0_CE,1.0,1.0,2.0,1317,93.0
1055,Z465714,Z465714-SE-4-Axial_Body_2.0,Z465714-SE-12-Sagittal_Body_Sagittal_3.000,0.995,1.0,1.995,200,102.0
1142,Z505334,Z505334-SE-2-ABD_PELVIS,,0.0,0.616,0.616,175,
1456,Z651305,Z651305-SE-7-Vol_Body_Vol._0.5,Z651305-SE-9005-Sag-MIP_Body_Sag-MIP_4.000,0.763,1.0,1.763,458,77.0
1680,Z846542,Z846542-SE-2-ABD_PELVIS,,0.0,1.0,1.0,168,


### Keep complete cases from df_incomp_2 when df_incomp_1 does not have axials and sagittals

In [16]:
display(df_noax_nosag_1.head(10))
print(len(df_noax_nosag_1))

Unnamed: 0,ID,Axial,Sagittal,Overlap,MissingScore,PairValidity,AxSlices,SagSlices
64,Z1041077,,,,,,,
91,Z1056573,,,,,,,
109,Z1066955,,,,,,,
114,Z1068362,,,,,,,
132,Z1085469,,,,,,,
153,Z1104715,,,,,,,
206,Z1152452,,,,,,,
210,Z1155066,,,,,,,
247,Z1182990,,,,,,,
276,Z1199673,,,,,,,


51


In [17]:
# print corresponding df2 pairs
df_2_dfnoaxsag1 = df_incomp_2[df_incomp_2['ID'].isin(df_noax_nosag_1['ID'].values)]
display(df_2_dfnoaxsag1.head(10))

Unnamed: 0,ID,Axial,Sagittal,Overlap,MissingScore,PairValidity,AxSlices,SagSlices,AxThick,SagThick
35,Z1056573,Z1056573-SE-5-Venous_Phase_Standard_Axial,,,1.0,,81,,5.0,
36,Z1068362,Z1068362-SE-3-KIDNEY_STONE,,,1.0,,124,,2.5,
44,Z1066955,Z1066955-SE-4-Venous_Phase_Standard_Axial,,,1.0,,79,,5.0,
61,Z1085469,Z1085469-SE-3-Body_Std._Axial_AIDR_-_H_5.0,Z1085469-SE-6-Body_Std._Axial_AIDR_-_H_Sagitta...,0.988,1.0,1.988,68,43.0,5.0,3.0
62,Z1104715,Z1104715-SE-4-Venous_Phase_Pediatric_Axial,,,1.0,,93,,5.0,
85,Z1155066,Z1155066-SE-5-Venous_Phase_Standard_Axial,,,1.0,,76,,5.0,
86,Z1152452,Z1152452-SE-5-Venous_Phase_Standard_Axial,,,1.0,,91,,5.0,
91,Z1182990,Z1182990-SE-2-Body_5.0_Venous_Phase_CE,,,1.0,,79,,5.0,
109,Z1199673,Z1199673-SE-3-CT_ABDPEL_5MM,Z1199673-SE-603-SAGITTAL,0.984,1.0,1.984,85,310.0,5.0,1.25
117,Z1211258,Z1211258-SE-6-AX,,,1.0,,144,,4.0,


In [18]:
for ind,row in df_2_dfnoaxsag1.iterrows():
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Axial']= row['Axial']
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Sagittal']= row['Sagittal']
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Overlap']= row['Overlap']
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'MissingScore']= row['MissingScore']
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'PairValidity']= row['PairValidity']
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'AxSlices']= row['AxSlices']
    df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'SagSlices']= row['SagSlices']
    if row['Overlap']:
        if row['Overlap'] < 0.7:
            df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Sagittal']= None
            df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'SagSlices']= None
            df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Overlap']= 0

display(df_valid_lq[df_valid_lq['ID'].isin(df_noax_nosag_1['ID'].values)].head(10))

Unnamed: 0,ID,Axial,Sagittal,Overlap,MissingScore,PairValidity,AxSlices,SagSlices
91,Z1056573,Z1056573-SE-5-Venous_Phase_Standard_Axial,,,1,,81,
109,Z1066955,Z1066955-SE-4-Venous_Phase_Standard_Axial,,,1,,79,
114,Z1068362,Z1068362-SE-3-KIDNEY_STONE,,,1,,124,
132,Z1085469,Z1085469-SE-3-Body_Std._Axial_AIDR_-_H_5.0,Z1085469-SE-6-Body_Std._Axial_AIDR_-_H_Sagitta...,0.988,1,1.988,68,43.0
153,Z1104715,Z1104715-SE-4-Venous_Phase_Pediatric_Axial,,,1,,93,
206,Z1152452,Z1152452-SE-5-Venous_Phase_Standard_Axial,,,1,,91,
210,Z1155066,Z1155066-SE-5-Venous_Phase_Standard_Axial,,,1,,76,
247,Z1182990,Z1182990-SE-2-Body_5.0_Venous_Phase_CE,,,1,,79,
276,Z1199673,Z1199673-SE-3-CT_ABDPEL_5MM,Z1199673-SE-603-SAGITTAL,0.984,1,1.984,85,310.0
296,Z1211258,Z1211258-SE-6-AX,,,1,,144,


### Keep sagittals alone from df_incomp_2 when df_incomp_1 does not have sagittals

In [19]:
print(len(df_ax_nosag_1))
display(df_ax_nosag_1.head(10))

842


Unnamed: 0,ID,Axial,Sagittal,Overlap,MissingScore,PairValidity,AxSlices,SagSlices
0,Z1000273,Z1000273-SE-2-Axial_Std_5.0,,,1.013,,78,
1,Z1000800,Z1000800-SE-4-Body_5.0_CE,,,2.345,,124,
5,Z1002072,Z1002072-SE-4-Standard_Axial,,,1.015,,66,
6,Z1003148,Z1003148-SE-4-Axial_Body_5.0,,,1.011,,92,
10,Z1004562,Z1004562-SE-4-Axial_Body_5.0,,,1.012,,85,
11,Z1005788,Z1005788-SE-2-ABD._W_O,,,1.025,,41,
14,Z1009243,Z1009243-SE-2-Axial_Std_5.0_CE,,,1.013,,76,
15,Z1009248,Z1009248-SE-4-Axial_Std_5.0_CE,,,1.021,,96,
16,Z1009393,Z1009393-SE-2-Axial_Std_5.0,,,1.017,,60,
18,Z1010426,Z1010426-SE-2-Standard,,,1.02,,50,


In [20]:
# print corresponding df2 pairs
df_2_dfnosag1 = df_incomp_2[df_incomp_2['ID'].isin(df_ax_nosag_1['ID'].values)]
display(df_2_dfnosag1.head(10))

Unnamed: 0,ID,Axial,Sagittal,Overlap,MissingScore,PairValidity,AxSlices,SagSlices,AxThick,SagThick
0,Z1000800,Z1000800-SE-4-Body_5.0_CE,,,1.0,,124,,5.0,
1,Z1005788,Z1005788-SE-6-ABD_WITH,,,1.0,,81,,5.0,
2,Z1015067,Z1015067-SE-3-ABD_PELVIS_WITH,,,1.0,,80,,5.0,
3,Z1010426,Z1010426-SE-2-Standard,,,1.0,,50,,5.0,
4,Z1009393,Z1009393-SE-2-Axial_Std_5.0,,,1.0,,60,,5.0,
5,Z1013953,Z1013953-SE-4-Axial_3.0_Axial,,,1.0,,86,,3.0,
6,Z1015174,Z1015174-SE-2-Axial_Stnd,,,1.0,,58,,5.0,
7,Z1002072,Z1002072-SE-4-Standard_Axial,,,1.0,,66,,5.0,
8,Z1017745,Z1017745-SE-2-Body_5.0_CE,,,1.0,,59,,5.0,
9,Z1013996,Z1013996-SE-4-Std_Pediatric_5.0_CE,,,1.0,,71,,5.0,


In [21]:
i = 0
for ind,row in df_2_dfnosag1.iterrows():
    if row['Overlap']:
        if row['Overlap'] > 0.7:    
            print('Found Sagittal in df 2, missing in df1: ', i)
            i = i+1
            df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Axial']= row['Axial']
            df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Sagittal']= row['Sagittal']
            df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'Overlap']= row['Overlap']
            df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'MissingScore']= row['MissingScore']
            df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'PairValidity']= row['PairValidity']
            df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'AxSlices']= row['AxSlices']
            df_valid_lq.loc[df_valid_lq['ID']==row['ID'],'SagSlices']= row['SagSlices']

display(df_valid_lq[df_valid_lq['ID'].isin(df_ax_nosag_1['ID'].values)].head(10))

Found Sagittal in df 2, missing in df1:  0
Found Sagittal in df 2, missing in df1:  1
Found Sagittal in df 2, missing in df1:  2
Found Sagittal in df 2, missing in df1:  3
Found Sagittal in df 2, missing in df1:  4
Found Sagittal in df 2, missing in df1:  5
Found Sagittal in df 2, missing in df1:  6
Found Sagittal in df 2, missing in df1:  7
Found Sagittal in df 2, missing in df1:  8
Found Sagittal in df 2, missing in df1:  9
Found Sagittal in df 2, missing in df1:  10
Found Sagittal in df 2, missing in df1:  11
Found Sagittal in df 2, missing in df1:  12
Found Sagittal in df 2, missing in df1:  13
Found Sagittal in df 2, missing in df1:  14
Found Sagittal in df 2, missing in df1:  15
Found Sagittal in df 2, missing in df1:  16
Found Sagittal in df 2, missing in df1:  17
Found Sagittal in df 2, missing in df1:  18
Found Sagittal in df 2, missing in df1:  19
Found Sagittal in df 2, missing in df1:  20
Found Sagittal in df 2, missing in df1:  21
Found Sagittal in df 2, missing in df1:  2

Unnamed: 0,ID,Axial,Sagittal,Overlap,MissingScore,PairValidity,AxSlices,SagSlices
0,Z1000273,Z1000273-SE-2-Axial_Std_5.0,,,1.013,,78,
1,Z1000800,Z1000800-SE-4-Body_5.0_CE,,,2.345,,124,
5,Z1002072,Z1002072-SE-4-Standard_Axial,,,1.015,,66,
6,Z1003148,Z1003148-SE-4-Axial_Body_5.0,,,1.011,,92,
10,Z1004562,Z1004562-SE-4-Axial_Body_5.0,,,1.012,,85,
11,Z1005788,Z1005788-SE-2-ABD._W_O,,,1.025,,41,
14,Z1009243,Z1009243-SE-2-Axial_Std_5.0_CE,,,1.013,,76,
15,Z1009248,Z1009248-SE-4-Axial_Std_5.0_CE,,,1.021,,96,
16,Z1009393,Z1009393-SE-2-Axial_Std_5.0,,,1.017,,60,
18,Z1010426,Z1010426-SE-2-Standard,,,1.02,,50,


In [23]:
print(len(df_valid_lq))

903


In [25]:
# Take a look at newly added sagittals:
df_nsags = df_valid_lq[(df_valid_lq['ID'].isin(df_ax_nosag_1['ID'].values)) & df_valid_lq['Overlap'] > 0.7]
display(df_nsags.sort_values(by=['AxSlices'],ascending=[False]).head(5))

Unnamed: 0,ID,Axial,Sagittal,Overlap,MissingScore,PairValidity,AxSlices,SagSlices
1538,Z689656,Z689656-SE-4-Vol._Body,Z689656-SE-8-Sagittal_Bone,1.0,1.0,2.0,1501,86
525,Z1368200,Z1368200-SE-6-Vol_Body_Vol._0.5,Z1368200-SE-9-Sagittal_Bone_Sagittal_3.0,0.987,1.0,1.987,1501,45
333,Z1231814,Z1231814-SE-6-Vol_Body_Vol._0.5,Z1231814-SE-13-Sagittal_Bone_Sagittal_3.0,1.0,1.0,2.0,1500,48
870,Z387639,Z387639-SE-5-Vol_Std_0.5_CE,Z387639-SE-9-Sagittal_Bone_3.0_CE,0.99,1.0,1.99,1317,45
1321,Z592592,Z592592-SE-5-Stnd_Pediatric_Vol_Stnd,Z592592-SE-8-Bone_Sagittal_Pediatric_Sagittal_...,1.0,0.899,1.899,1198,77


In [43]:
ret = filter_finalpairs('Z1368200',df_i_axials,df_i_sags,subjects)

Unnamed: 0,Axial,Sagittal,Overlap,MissingScore,PairValidity,AxSlices,SagSlices,AxThick,SagThick
0,Z1368200-SE-6-Vol_Body_Vol._0.5,Z1368200-SE-9-Sagittal_Bone_Sagittal_3.0,0.987,1,1.987,1501,45,0.5,3
1,Z1368200-SE-5-Axial_Body_5.0,Z1368200-SE-9-Sagittal_Bone_Sagittal_3.0,0.877,1,1.877,81,45,5.0,3


In [45]:
s0 = get_subject_series('Z1368200','Z1368200-SE-9-Sagittal_Bone_Sagittal_3.0',subjects)
s1 = get_subject_series('Z1368200','Z1368200-SE-6-Vol_Body_Vol._0.5',subjects)
s2 = get_subject_series('Z1368200','Z1368200-SE-5-Axial_Body_5.0',subjects)    

s = [s0,s1,s2]
for sd in s:
    print(sd.id_,' ' ,sd.z_range_pair)

Z1368200-SE-9-Sagittal_Bone_Sagittal_3.0   (1802.82056, 1347.42056)
Z1368200-SE-6-Vol_Body_Vol._0.5   (1800.5, 1350.5)
Z1368200-SE-5-Axial_Body_5.0   (1800.5, 1400.5)


In [50]:
len(df_valid_lq[df_valid_lq['AxSlices'] > 500])

9

In [62]:
axial_series = [series for series in axial_series if series.id_ in df_valid_lq['Axial'].values]
sagittal_series = [series for series in sagittal_series if series.id_ in df_valid_lq['Sagittal'].values]

In [66]:
subjects = [subject for subject in subjects if subject.id_ in df_valid_lq['ID'].values]

In [67]:
len(subjects)

903

In [68]:
# Save df_valid_lq
save_object(df_valid_lq, os.path.join(output,'df_valid_lq.pkl'))
# Save axials_lq
save_object(axial_series, os.path.join(output,'axials_lq_series.pkl'))
# Save sagittals_lq
save_object(sagittal_series, os.path.join(output,'sags_lq_series.pkl'))

# Save subjects_lq
save_object(subjects, os.path.join(output,'subjects_lq.pkl'))

## Combine lq and hq valid df

In [61]:
# Load dfs needed
df_lq = load_object(os.path.join(output,'df_valid_lq.pkl'))
df_lq['QC'] = 'lq'
df_hq = load_object(os.path.join(output,'df_validpairs.pkl'))
df_hq['QC'] = 'hq'
df_final = df_hq.append(df_lq)
print('Final no of patients: ', len(df_final))

Final no of patients:  2052


In [69]:
# Load lists needed:
subjects_hq = load_object(os.path.join(output,'valid_subjects.pkl'))
subjects_lq = load_object(os.path.join(output,'subjects_lq.pkl'))

axials_hq = load_object(os.path.join(output,'valid_axials.pkl'))
axials_lq = load_object(os.path.join(output,'axials_lq_series.pkl'))

sagittals_hq = load_object(os.path.join(output,'valid_sags.pkl'))
sagittals_lq = load_object(os.path.join(output,'sags_lq_series.pkl'))

# Concatenate
subjects_final = subjects_hq + subjects_lq
axials_final = axials_hq + axials_lq
sagittals_final = sagittals_hq + sagittals_lq

# Check length
print('Length of df_final ',len(df_final))
print('Length of subjects_final ',len(subjects_final))
print('Length of axials_final ',len(axials_final))
print('Length of sagittals_final ',len(sagittals_final))

Length of df_final  2052
Length of subjects_final  2052
Length of axials_final  2052
Length of sagittals_final  1256


In [70]:
save_object(df_final, os.path.join(output,'df_final.pkl'))
save_object(subjects_final, os.path.join(output,'subjects_final.pkl'))
save_object(axials_final, os.path.join(output,'axials_final.pkl'))
save_object(sagittals_final, os.path.join(output,'sagittals_final.pkl'))