-
Notifications
You must be signed in to change notification settings - Fork 3
/
AHItoDICOM.py
352 lines (307 loc) · 17.1 KB
/
AHItoDICOM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
"""
AHItoDICOM Module : This class contains the logic to query the Image pixel raster.
SPDX-License-Identifier: Apache-2.0
"""
from .AHIDataDICOMizer import *
from .AHIFrameFetcher import *
from .AHIClientFactory import *
import json
import logging
import collections
from threading import Thread
from time import sleep
from PIL import Image
import gzip
import tempfile
import os
import shutil
import multiprocessing as mp
class AHItoDICOM:
AHIclient = None
frameFetcherThreadList = []
frameDICOMizerThreadList = []
fetcherProcessCount = None
DICOMizerProcessCount = None
ImageFrames = None
frameToDICOMize = None
FrameDICOMizerPoolManager = None
DICOMizedFrames = None
CountToDICOMize = 0
still_processing = False
aws_access_key = None
aws_secret_key = None
AHI_endpoint = None
logger = None
def __init__(self, aws_access_key : str = None, aws_secret_key : str = None , AHI_endpoint : str = None , fetcher_process_count : int = None , dicomizer_process_count : int = None ) -> None:
"""
Helper class constructor.
:param aws_access_key: Optional IAM user access key.
:param aws_secret_key: Optional IAM user secret key.
:param AHI_endpoint: Optional AHI endpoint URL. Only useful to AWS employees.
:param fetcher_process_count: Optional number of processes to use for fetching frames. Will default to CPU count x 8
:param dicomizer_process_count: Optional number of processes to use for DICOMizing frames.Will default to CPU count.
"""
self.logger = logging.getLogger(__name__)
self.ImageFrames = collections.deque()
self.frameToDICOMize = collections.deque()
self.DICOMizedFrames = collections.deque()
self.aws_access_key = aws_access_key
self.aws_secret_key = aws_secret_key
self.AHI_endpoint = AHI_endpoint
if fetcher_process_count is None:
self.fetcherProcessCount = int(os.cpu_count()) * 8
else:
self.fetcherProcessCount = fetcher_process_count
if dicomizer_process_count is None:
self.DICOMizerProcessCount = int(os.cpu_count())
else:
self.DICOMizerProcessCount = dicomizer_process_count
self.logger.debug(f"[{__name__}] - Fetcher process count : {self.fetcherProcessCount} , DICOMizer process count : {self.DICOMizerProcessCount}")
#mp.set_start_method('fork')
def DICOMizeByStudyInstanceUID(self, datastore_id : str = None , study_instance_uid : str = None , header_only : bool = False):
"""
DICOMizeByStudyInstanceUID(datastore_id : str = None , study_instance_uid : str = None).
:param datastore_id: The datastoreId containtaining the DICOM Study.
:param study_instance_uid: The StudyInstanceUID (0020,000d) of the Study to be DICOMized from AHI.
:return: A list of pydicom DICOM objects.
"""
search_criteria = {
'filters': [
{
'values': [
{
'DICOMStudyInstanceUID': study_instance_uid
}
],
'operator': 'EQUAL'
}
]
}
client = AHIClientFactory(self.aws_access_key , self.aws_secret_key , self.AHI_endpoint )
search_result = client.search_image_sets(datastoreId=datastore_id, searchCriteria = search_criteria) ### in theory we should check if a continuation token is returned and loop until we have all the results...
instances = []
for imageset in search_result["imageSetsMetadataSummaries"]:
current_imageset = imageset["imageSetId"]
self.logger.debug(f"[{__name__}] - Exporting {current_imageset} instances in memory.")
instances += self.DICOMizeImageSet(datastore_id=datastore_id , image_set_id=current_imageset , header_only=header_only)
return instances
def DICOMizeImageSet(self, datastore_id : str = None , imageset_id : str = None, image_set_id : str = None , header_only = False):
"""
DICOMizeImageSet(datastore_id : str = None , imageset_id : str = None).
:param datastore_id: The datastoreId containing the DICOM Study.
:param imageset_id: The ImageSetID of the data to be DICOMized from AHI.
:return: A list of pydicom DICOM objects.
"""
#this is to prevent breaking changes in imageset_id paramater name.
if image_set_id is not None and imageset_id is None:
imageset_id = image_set_id
self.ImageFrames = collections.deque()
self.frameToDICOMize = collections.deque()
self.DICOMizedFrames = collections.deque()
client = AHIClientFactory(self.aws_access_key , self.aws_secret_key , self.AHI_endpoint )
self.still_processing = True
self.FrameDICOMizerPoolManager = Thread(target = self.AssignDICOMizeJob)
AHI_metadata = self.getMetadata(datastore_id, imageset_id, client)
if AHI_metadata is None:
self.logger.error(f"[{__name__}] - No metadata found for datastore_id : {datastore_id} , imageset_id : {imageset_id}")
return None
#threads init for Frame fetching and DICOM encapsulation
self._initFetchAndDICOMizeProcesses(AHI_metadata=AHI_metadata )
series = self.getSeriesList(AHI_metadata , imageset_id)[0]
self.ImageFrames.extendleft(self.getImageFrames(datastore_id, imageset_id , AHI_metadata , series["SeriesInstanceUID"]))
instanceCount = len(self.ImageFrames)
self.logger.debug(f"[{__name__}] - Importing {instanceCount} instances in memory.")
self.CountToDICOMize = instanceCount
self.FrameDICOMizerPoolManager.start()
#Assigning jobs to the Frame fetching thread pool.
if ( header_only == False):
threadId = 0
while(len(self.ImageFrames)> 0):
self.frameFetcherThreadList[threadId].AddFetchJob(self.ImageFrames.popleft())
threadId+=1
if threadId == self.fetcherProcessCount :
threadId = 0
instanceFetchedCount = 0
while(instanceFetchedCount < (instanceCount)):
self.logger.debug(f"Done {instanceFetchedCount}/{instanceCount}")
for x in range(self.fetcherProcessCount):
entry=self.frameFetcherThreadList[x].getFramesFetched()
if entry is not None:
instanceFetchedCount+=1
self.frameToDICOMize.append(entry)
sleep(0.01)
self.logger.debug("All frames Fetched and submitted to the DICOMizer queue")
else:
while(len(self.ImageFrames)> 0):
self.frameToDICOMize.append(self.ImageFrames.popleft())
for x in range(self.fetcherProcessCount):
self.logger.debug(f"[{__name__}] - Disposing frame fetcher thread # {x}")
self.frameFetcherThreadList[x].Dispose()
self.logger.debug(f"[{__name__}] - frame fetcher thread # {x} disposed.")
while(self.still_processing == True):
self.logger.debug(f"[{__name__}] - Still processing DICOMizing...")
sleep(0.1)
returnlist = list(self.DICOMizedFrames)
returnlist.sort( key= self.getInstanceNumberInDICOM)
return returnlist
def AssignDICOMizeJob(self):
#this function rounds robin accross all the dicomizer threads, until all the images are actually dicomized.
self.logger.debug(f"[AssignDICOMizeJob] - DICOMizer Thread Assigner started.")
keep_running = True
while( keep_running):
while( len(self.frameToDICOMize) > 0):
threadId = 0
self.frameDICOMizerThreadList[threadId].AddDICOMizeJob(self.frameToDICOMize.popleft())
threadId+=1
if(threadId == self.DICOMizerProcessCount):
threadId = 0
for x in range(self.DICOMizerProcessCount):
while( not self.frameDICOMizerThreadList[x].DICOMizeJobsCompleted.empty()):
self.DICOMizedFrames.append(self.frameDICOMizerThreadList[x].getFramesDICOMized())
dc = len(self.DICOMizedFrames)
#print(dc)
if(len(self.DICOMizedFrames) == self.CountToDICOMize):
keep_running = False
self.logger.debug(f"[{__name__}] - DICOMized count : {dc}")
for x in range(self.DICOMizerProcessCount):
self.logger.debug(f"[{__name__}] - Disposing DICOMizer thread # {x}")
self.frameDICOMizerThreadList[x].Dispose()
self.logger.debug(f"[{__name__}] - DICOMizer thread # {x} Disposed.")
self.still_processing = False
else:
sleep(0.05)
self.logger.debug(f"[AssignDICOMizeJob] - DICOMizer Thread Assigner finished.")
def getImageFrames(self, datastoreId, imagesetId , AHI_metadata , seriesUid) -> collections.deque:
instancesList = []
for instances in AHI_metadata["Study"]["Series"][seriesUid]["Instances"]:
if len(AHI_metadata["Study"]["Series"][seriesUid]["Instances"][instances]["ImageFrames"]) < 1:
self.logger.info("Skipping the following instance because it do not contain ImageFrames: " + instances)
continue
try:
frameIds = []
for imageFrame in AHI_metadata["Study"]["Series"][seriesUid]["Instances"][instances]["ImageFrames"]:
frameIds.append(imageFrame["ID"])
InstanceNumber = AHI_metadata["Study"]["Series"][seriesUid]["Instances"][instances]["DICOM"]["InstanceNumber"]
instancesList.append( { "datastoreId" : datastoreId, "imagesetId" : imagesetId , "frameIds" : frameIds , "SeriesUID" : seriesUid , "SOPInstanceUID" : instances, "InstanceNumber" : InstanceNumber , "PixelData" : None})
except Exception as AHIErr:
self.logger.error(f"[{__name__}] - {AHIErr}")
instancesList.sort(key=self.getInstanceNumber)
return collections.deque(instancesList)
def getSeriesList(self, AHI_metadata , image_set_id : str):
###07/25/2023 - awsjpleger : this function is from a time when there could be multiple series withing a single ImageSetId. Still works with new AHI metadata, but should be refactored.
seriesList = []
for series in AHI_metadata["Study"]["Series"]:
SeriesNumber = AHI_metadata["Study"]["Series"][series]["DICOM"]["SeriesNumber"]
Modality = AHI_metadata["Study"]["Series"][series]["DICOM"]["Modality"]
try: # This is a non-mandatory tag
SeriesDescription = AHI_metadata["Study"]["Series"][series]["DICOM"]["SeriesDescription"]
except:
SeriesDescription = ""
SeriesInstanceUID = series
try:
instanceCount = len(AHI_metadata["Study"]["Series"][series]["Instances"])
except:
instanceCount = 0
seriesList.append({ "ImageSetId" : image_set_id, "SeriesNumber" : SeriesNumber , "Modality" : Modality , "SeriesDescription" : SeriesDescription , "SeriesInstanceUID" : SeriesInstanceUID , "InstanceCount" : instanceCount})
return seriesList
def getMetadata(self, datastore_id, imageset_id , client = None):
"""
getMetadata(datastore_id : str = None , image_set_id : str , client : str = None).
:param datastore_id: The datastoreId containtaining the DICOM Study.
:param image_set_id: The ImageSetID of the data to be DICOMized from AHI.
:param client: Optional boto3 medical-imaging client. The functions creates its own client by default.
:return: a JSON structure corresponding to the ImageSet Metadata.
"""
try:
if client is None:
client = AHIClientFactory(self.aws_access_key , self.aws_secret_key , self.AHI_endpoint )
AHI_study_metadata = client.get_image_set_metadata(datastoreId=datastore_id , imageSetId=imageset_id)
json_study_metadata = gzip.decompress(AHI_study_metadata["imageSetMetadataBlob"].read())
json_study_metadata = json.loads(json_study_metadata)
return json_study_metadata
except Exception as AHIErr :
self.logger.error(f"[{__name__}] - {AHIErr}")
return None
def getImageSetToSeriesUIDMap(self, datastore_id : str, study_instance_uid : str ):
"""
getImageSetToSeriesUIDMap(datastore_id : str = None , study_instance_uid : str).
:param datastore_id: The datastoreId containtaining the DICOM Study.
:param study_instance_uid: The StudyInstanceUID (0020,000d) of the Study to be DICOMized from AHI.
:return: An array of Series descriptors associated to their ImageSetIDs for all the ImageSets related to the DICOM Study.
"""
search_criteria = {
'filters': [
{
'values': [
{
'DICOMStudyInstanceUID': study_instance_uid
}
],
'operator': 'EQUAL'
}
]
}
client = AHIClientFactory(self.aws_access_key , self.aws_secret_key , self.AHI_endpoint )
search_result = client.search_image_sets(datastoreId=datastore_id, searchCriteria = search_criteria) ### in theory we should check if a continuation token is returned and loop until we have all the results...
series_map = []
for imageset in search_result["imageSetsMetadataSummaries"]:
current_imageset = imageset["imageSetId"]
series_map.append(self.getSeriesList(self.getMetadata(datastore_id, current_imageset ) , current_imageset)[0])
return series_map
def getInstanceNumber(self, elem):
return int(elem["InstanceNumber"])
def getInstanceNumberInDICOM(self, elem):
return int(elem["InstanceNumber"].value)
def saveAsPngPIL(self, ds: Dataset , destination : str):
"""
saveAsPngPIL(ds : pydicom.Dataset , destination : str).
Saves a PNG representation of the DICOM object to the specified destination.
:param ds: The pydicom Dataset representing the DICOM object.
:param destination: the file path where the file needs to be dumped to. the file path must include the file name and extension.
"""
try:
folder_path = os.path.dirname(destination)
os.makedirs( folder_path , exist_ok=True)
import numpy as np
shape = ds.pixel_array.shape
image_2d = ds.pixel_array.astype(float)
image_2d_scaled = (np.maximum(image_2d,0) / image_2d.max()) * 255.0
image_2d_scaled = np.uint8(image_2d_scaled)
if 'PhotometricInterpretation' in ds and ds.PhotometricInterpretation == "MONOCHROME1":
image_2d_scaled = np.max(image_2d_scaled) - image_2d_scaled
img = Image.fromarray(image_2d_scaled)
img.save(destination, 'png')
except Exception as err:
self.logger.error(f"[{__name__}][saveAsPngPIL] - {err}")
return False
return True
# def getSeries(self, datastore_id : str = None , image_set_id : str = None):
# AHI_metadata = self.getMetadata(datastore_id, image_set_id, self.AHIclient)
# seriesList = self.getSeriesList(AHI_metadata=AHI_metadata)
# return seriesList
def _initFetchAndDICOMizeProcesses(self, AHI_metadata):
self.frameDICOMizerThreadList = []
self.frameDICOMizerThreadList = []
self.frameFetcherThreadList.clear()
self.frameDICOMizerThreadList.clear()
for x in range(self.fetcherProcessCount):
self.logger.debug("[DICOMize] - Spawning AHIFrameFetcher thread # "+str(x))
self.frameFetcherThreadList.append(AHIFrameFetcher(str(x), self.aws_access_key , self.aws_access_key , self.AHI_endpoint ))
for x in range(self.DICOMizerProcessCount):
self.logger.debug("[DICOMize] - Spawning AHIDICOMizer thread # "+str(x))
self.frameDICOMizerThreadList.append(AHIDataDICOMizer(str(x) , AHI_metadata ))
def saveAsDICOM(self, ds : pydicom.Dataset , destination : str = './out' ) -> bool:
"""
saveAsDICOM(ds : pydicom.Dataset , destination : str).
Saves a DICOM Part10 file for the DICOM object to the specified destination.
:param ds: The pydicom Dataset representing the DICOM object.
:param destination: the folder path where to save the DICOM file to. The file name will be the SOPInstanceUID of the DICOM object suffixed by '.dcm'.
"""
try:
os.makedirs( destination , exist_ok=True)
filename = os.path.join( destination , ds["SOPInstanceUID"].value)
ds.save_as(f"{filename}.dcm", write_like_original=False)
except Exception as err:
self.logger.error(f"[{__name__}][saveAsDICOM] - {err}")
return False
return True