Skip to content

Commit

Permalink
Subseries (#61)
Browse files Browse the repository at this point in the history
* subseries implementation step 1

* Fixes for issues #57, #53.

* update README.

* fixed pytest error
  • Loading branch information
skim2257 committed Oct 14, 2022
1 parent 4adf148 commit f8f3606
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 48 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
![GitHub forks](https://img.shields.io/github/forks/bhklab/med-imagetools?style=social)
[![Documentation Status](https://readthedocs.org/projects/med-imagetools/badge/?version=documentation)](https://med-imagetools.readthedocs.io/en/documentation/?badge=documentation)

### Latest Updates (v1.0.2) - Oct 12th, 2022
Documentation is now available at: https://med-imagetools.readthedocs.io

## Latest Updates (v1.0.3) - Oct 13th, 2022
* Documentation is now available at: https://med-imagetools.readthedocs.io
* Fixed relative path handling issue #53 and extra patient folder issue #57
* Subseries crawl feature added, but not yet integrated into AutoPipeline. Will collect user data with prototypes first.

#### Med-ImageTools core features
* AutoPipeline CLI
Expand Down
51 changes: 29 additions & 22 deletions imgtools/autopipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ def __init__(self,
is_nnunet_inference=False,
dataset_json_path="",
continue_processing=False,
dry_run=False):
dry_run=False,
verbose=False):
"""Initialize the pipeline.
Parameters
Expand Down Expand Up @@ -114,6 +115,7 @@ def __init__(self,

self.continue_processing = continue_processing
self.dry_run = dry_run
self.v = verbose

if dry_run:
is_nnunet = False
Expand All @@ -127,13 +129,20 @@ def __init__(self,
# pipeline configuration
if not os.path.isabs(input_directory):
input_directory = pathlib.Path(os.getcwd(), input_directory).as_posix()
else:
input_directory = pathlib.Path(input_directory).as_posix() # consistent parsing. ensures last child directory doesn't end with slash

if not os.path.isabs(output_directory):
output_directory = pathlib.Path(os.getcwd(), output_directory).as_posix()
else:
output_directory = pathlib.Path(output_directory).as_posix() # consistent parsing. ensures last child directory doesn't end with slash

if not os.path.exists(output_directory):
# raise FileNotFoundError(f"Output directory {output_directory} does not exist")
os.makedirs(output_directory)
if not os.path.exists(input_directory):
raise FileNotFoundError(f"Input directory {input_directory} does not exist")

self.input_directory = pathlib.Path(input_directory).as_posix()
self.output_directory = pathlib.Path(output_directory).as_posix()

Expand Down Expand Up @@ -399,7 +408,8 @@ def process_one_subject(self, subject_id):
mult_conn = colname.split("_")[-1].isnumeric()
num = colname.split("_")[-1]

# print(output_stream) #could include for verbose
if self.v:
print("output_stream:", output_stream)

if read_results[i] is None:
print("The subject id: {} has no {}".format(subject_id, colname))
Expand All @@ -413,7 +423,8 @@ def process_one_subject(self, subject_id):
extractor.SetIndex([0, 0, 0, 0])

image = extractor.Execute(image)
# print(image.GetSize()) #could include with verbose
if self.v:
print("image.GetSize():", image.GetSize())
try:
image = self.resample(image)
except Exception as e:
Expand Down Expand Up @@ -464,10 +475,7 @@ def process_one_subject(self, subject_id):
doses = read_results[i]

# save output
if not mult_conn:
self.output(subject_id, doses, output_stream)
else:
self.output(f"{subject_id}_{num}", doses, output_stream)
self.output(subject_id, doses, output_stream)
metadata[f"size_{output_stream}"] = str(doses.GetSize())
metadata[f"metadata_{colname}"] = [read_results[i].get_metadata()]

Expand Down Expand Up @@ -515,10 +523,10 @@ def process_one_subject(self, subject_id):
if name not in self.existing_roi_names.keys():
self.existing_roi_names[name] = len(self.existing_roi_names)
mask.existing_roi_names = self.existing_roi_names
# print(self.existing_roi_names,"alskdfj")

# save output
# print(mask.GetSize()) #could include with verbose

if self.v:
print("mask.GetSize():", mask.GetSize())
mask_arr = np.transpose(sitk.GetArrayFromImage(mask))

if self.is_nnunet:
Expand All @@ -534,18 +542,18 @@ def process_one_subject(self, subject_id):
if len(mask_arr.shape) == 3:
mask_arr = mask_arr.reshape(1, mask_arr.shape[0], mask_arr.shape[1], mask_arr.shape[2])

# print(mask_arr.shape) #could include with verbose
if self.v:
print(mask_arr.shape)

roi_names_list = list(mask.roi_names.keys())
for i in range(mask_arr.shape[0]):
new_mask = sitk.GetImageFromArray(np.transpose(mask_arr[i]))
new_mask.CopyInformation(mask)
new_mask = Segmentation(new_mask)
mask_to_process = new_mask
if not mult_conn:
# self.output(roi_names_list[i], mask_to_process, output_stream)
self.output(subject_id, mask_to_process, output_stream, True, roi_names_list[i])
else:
self.output(f"{subject_id}_{num}", mask_to_process, output_stream, True, roi_names_list[i])

# output
self.output(subject_id, mask_to_process, output_stream, True, roi_names_list[i])

if hasattr(structure_set, "metadata") and structure_set.metadata is not None:
metadata.update(structure_set.metadata)
Expand All @@ -561,10 +569,8 @@ def process_one_subject(self, subject_id):
Warning("No CT image present. Returning PT/PET image without resampling.")
pet = read_results[i]

if not mult_conn:
self.output(subject_id, pet, output_stream)
else:
self.output(f"{subject_id}_{num}", pet, output_stream)
# output
self.output(subject_id, pet, output_stream)
metadata[f"size_{output_stream}"] = str(pet.GetSize())
metadata[f"metadata_{colname}"] = [read_results[i].get_metadata()]

Expand Down Expand Up @@ -665,7 +671,7 @@ def run(self):
"""Execute the pipeline, possibly in parallel.
"""
# Joblib prints progress to stdout if verbose > 50
verbose = 51 if self.show_progress else 0
verbose = 51 if self.v or self.show_progress else 0

subject_ids = self._get_loader_subject_ids()
patient_ids = []
Expand Down Expand Up @@ -733,7 +739,8 @@ def main():
is_nnunet_inference=args.is_nnunet_inference,
dataset_json_path=args.dataset_json_path,
continue_processing=args.continue_processing,
dry_run=args.dry_run)
dry_run=args.dry_run,
verbose=args.verbose)
if not args.dry_run:
print(f'starting AutoPipeline...')
pipeline.run()
Expand Down
4 changes: 0 additions & 4 deletions imgtools/modules/datagraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,6 @@ def parser(self, query_string: str) -> pd.DataFrame:
else:
raise ValueError("Please enter the correct query")

print(self.mods, final_df)
final_df.reset_index(drop=True, inplace=True)
final_df["index_chng"] = final_df.index.astype(str) + "_" + final_df["patient_ID"].astype(str)
final_df.set_index("index_chng", inplace=True)
Expand Down Expand Up @@ -398,8 +397,6 @@ def graph_query(self,
col_ids = [cols for cols in list(final_df.columns)[1:] if bad != cols.split("_")[1]]
final_df = final_df[[*list(final_df.columns)[:1], *col_ids]]

final_df.to_csv("final_df.csv")

if return_components:
return self.final_dict
else:
Expand Down Expand Up @@ -529,7 +526,6 @@ def _get_df(self,
for j in range(len(CT_series)):
#Check if the number of nodes in a components isn't less than the query nodes, if yes then remove that component
mods_present = set([items.split("_")[1] for items in save_folder_comp[j].keys() if items.split("_")[0] == "folder"])
print('\nmods_present', mods_present, mods_wanted)
#Checking if all the read modalities are present in a component
if mods_wanted.issubset(mods_present) == True:
remove_index.append(j)
Expand Down
3 changes: 3 additions & 0 deletions imgtools/utils/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def parser():
parser.add_argument("--dry_run", default=False, action="store_true",
help="Make a dry run of the pipeline, only producing the edge table and dataset.csv.")

parser.add_argument("--verbose", default=False, action="store_true",
help="Verbose output flag.")

# parser.add_argument("--custom_train_test_split_path", type=str,
# help="Path to the YAML file defining the custom train-test-split.")

Expand Down
52 changes: 35 additions & 17 deletions imgtools/utils/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,23 +59,33 @@ def crawl_one(folder):
except:
series_description = ""

try:
subseries = str(meta.AcquisitionNumber)
except:
subseries = "default"

# try:


if patient not in database:
database[patient] = {}
if study not in database[patient]:
database[patient][study] = {'description': study_description}
if series not in database[patient][study]:
parent, _ = os.path.split(folder)
rel_path = pathlib.Path(os.path.split(parent)[1], os.path.relpath(path, parent)).as_posix()
database[patient][study][series] = {'instances': [],
'instance_uid': instance,
'modality': meta.Modality,
'description': series_description,
'reference_ct': reference_ct,
'reference_rs': reference_rs,
'reference_pl': reference_pl,
'reference_frame': reference_frame,
'folder': rel_path}
database[patient][study][series]['instances'].append(instance)
database[patient][study][series] = {'description': series_description}
if subseries not in database[patient][study][series]:
database[patient][study][series][subseries] = {'instances': [],
'instance_uid': instance,
'modality': meta.Modality,
'reference_ct': reference_ct,
'reference_rs': reference_rs,
'reference_pl': reference_pl,
'reference_frame': reference_frame,
'folder': rel_path}

database[patient][study][series][subseries]['instances'].append(instance)
except:
pass

Expand All @@ -86,13 +96,21 @@ def to_df(database_dict):
for pat in database_dict:
for study in database_dict[pat]:
for series in database_dict[pat][study]:
if series != 'description':
columns = ['patient_ID', 'study', 'study_description', 'series', 'series_description', 'modality', 'instances', 'instance_uid', 'reference_ct', 'reference_rs', 'reference_pl', 'reference_frame', 'folder']
values = [pat, study, database_dict[pat][study]['description'], series, database_dict[pat][study][series]['description'], database_dict[pat][study][series]['modality'], len(database_dict[pat][study][series]['instances']),
database_dict[pat][study][series]['instance_uid'], database_dict[pat][study][series]['reference_ct'], database_dict[pat][study][series]['reference_rs'], database_dict[pat][study][series]['reference_pl'],
database_dict[pat][study][series]['reference_frame'], database_dict[pat][study][series]['folder']]
df_add = pd.DataFrame([values], columns=columns)
df = pd.concat([df, df_add], ignore_index=True)
if series != 'description': # skip description key in dict
for subseries in database_dict[pat][study][series]:
if subseries != 'description': # skip description key in dict
columns = ['patient_ID', 'study', 'study_description',
'series', 'series_description', 'subseries', 'modality',
'instances', 'instance_uid',
'reference_ct', 'reference_rs', 'reference_pl', 'reference_frame', 'folder']
values = [pat, study, database_dict[pat][study]['description'],
series, database_dict[pat][study][series]['description'],
subseries, database_dict[pat][study][series][subseries]['modality'],
len(database_dict[pat][study][series][subseries]['instances']), database_dict[pat][study][series][subseries]['instance_uid'],
database_dict[pat][study][series][subseries]['reference_ct'], database_dict[pat][study][series][subseries]['reference_rs'],
database_dict[pat][study][series][subseries]['reference_pl'], database_dict[pat][study][series][subseries]['reference_frame'], database_dict[pat][study][series][subseries]['folder']]
df_add = pd.DataFrame([values], columns=columns)
df = pd.concat([df, df_add], ignore_index=True)
return df

def crawl(top,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setup(
name="med-imagetools",
version="1.0.2",
version="1.0.3",
author="Sejin Kim, Michal Kazmierski, Kevin Qu, Vishwesh Ramanathan, Benjamin Haibe-Kains",
author_email="benjamin.haibe.kains@utoronto.ca",
description="Transparent and reproducible image processing pipelines in Python.",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def test_pipeline(self, modalities):
n_jobs = 2
output_path_mod = pathlib.Path(self.output_path, str("temp_folder_" + ("_").join(modalities.split(",")))).as_posix()
#Initialize pipeline for the current setting
pipeline = AutoPipeline(self.input_path, output_path_mod, modalities, n_jobs=n_jobs,spacing=(5,5,5))
pipeline = AutoPipeline(self.input_path, output_path_mod, modalities, n_jobs=n_jobs,spacing=(5,5,5), overwrite=True)
#Run for different modalities
comp_path = pathlib.Path(output_path_mod, "dataset.csv").as_posix()
pipeline.run()
Expand Down

0 comments on commit f8f3606

Please sign in to comment.