diff --git a/DicomAnnotUtils.py b/DicomAnnotUtils.py new file mode 100644 index 0000000..6cd337f --- /dev/null +++ b/DicomAnnotUtils.py @@ -0,0 +1,506 @@ +import json +import pydicom +from pydicom.dataset import Dataset, FileMetaDataset +from pydicom import Sequence +from pydicom.uid import ExplicitVRLittleEndian, generate_uid +import numpy as np +from copy import deepcopy +import random +from datetime import datetime + +import argparse + +featureTemplate = { + "type": "Feature", + "properties": { + "style": { + "color":"#00fc2e", + "lineJoin":"round", + "lineCap":"round", + "isFill":True + } + }, + "geometry" : { + "type":"Polygon", + + "coordinates" : [] + }, + "bound":{ + "type":"Polygon", + "coordinates" : [] + } +} + +annotTemplate = { + "creator": "dicomImport", + "provenance": { + "image": { + "slide": "ADD_CAMIC_SLIDE_ID", + "dicom-ReferencedSOPClassUID": "", + "dicom-ReferencedSOPInstanceUID": "", + "dicom-study": "", + "dicom-series": "", + "dicom-instance": "", + }, + "analysis": { + "source": "human", # may not be true! + "execution_id": "dicom-?", + "name": "imported dicom annotation" + } + }, + "properties": { + "annotations":{ + "name":"imported dicom annotation", + "notes":"" + } + }, + "geometries": { + "type" : "FeatureCollection", + "features" : [] + } +} + +def _generate_random_string(n): + letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + return ''.join(random.choice(letters) for _ in range(n)) + +def get_dicom_info(image_path): + # Read the DICOM file + ds = pydicom.dcmread(image_path) + + # Extract DICOM attributes + patient_id = ds.get('PatientID', 'Unknown') + study_instance_uid = ds.get('StudyInstanceUID', 'Unknown') + series_instance_uid = ds.get('SeriesInstanceUID', 'Unknown') + image_width = int(ds.get('TotalPixelMatrixColumns', 0)) + image_height = int(ds.get('TotalPixelMatrixRows', 0)) + + return { + 'PatientID': patient_id, + 'StudyInstanceUID': study_instance_uid, + 'SeriesInstanceUID': series_instance_uid, + 'ImageWidth': image_width, + 'ImageHeight': image_height + } + +def create_annotation_dicom(annot_arrays, slide_file, geojson): + # the slide/ref dataset + reference_ds = pydicom.dcmread(slide_file) + # the annot dataset + ds = Dataset() + # Copy file meta information + + attrs_to_copy = [ + 'StudyID', 'PatientID', 'PatientName', 'PatientBirthDate', 'StudyInstanceUID', 'StudyDate', 'StudyTime', 'Modality', 'SeriesNumber', + 'PatientSex', 'ReferringPhysicianName', 'AccessionNumber', 'Manufacturer', 'ManufacturerModelName', 'DeviceSerialNumber', 'SoftwareVersions', 'InstanceNumber' + ] + for attr in attrs_to_copy: + setattr(ds, attr, getattr(reference_ds, attr)) + + # annotation fields + # ds.MediaStorageSOPClassUID = '1.2.840.10008.5.1.4.1.1.91.1' + ds.SOPClassUID = '1.2.840.10008.5.1.4.1.1.91.1' + ds.SeriesInstanceUID = generate_uid() + ds.SOPInstanceUID = generate_uid() + ds.ContentLabel = str(geojson['properties']['annotations']['name']).upper() + ds.ContentDescription = geojson['properties']['annotations']['notes'] + ds.AnnotationCoordinateType = "2D" + ds.PixelOriginInterpretation = 'VOLUME' + ds.Laterality = 'L' + ds.Modality = "ANN" + ds.StudyID = "S1" + ds.SeriesNumber = "0" + + current_date = datetime.now().strftime('%Y%m%d') + current_time = datetime.now().strftime('%H%M%S') + # Set ContentDate and ContentTime attributes + ds.ContentDate = current_date + ds.ContentTime = current_time + + + referenced_series_sequence = Dataset() + referenced_instance_sequence = Dataset() + referenced_instance_sequence.ReferencedSOPInstanceUID = reference_ds.SOPInstanceUID + referenced_instance_sequence.ReferencedSOPClassUID = reference_ds.SOPClassUID + referenced_series_sequence.ReferencedInstanceSequence = [referenced_instance_sequence] + referenced_series_sequence.SeriesInstanceUID = reference_ds.SeriesInstanceUID + ds.ReferencedSeriesSequence = [referenced_series_sequence] + + referenced_image_sequence = Dataset() + referenced_image_sequence.ReferencedSOPClassUID = reference_ds.SOPClassUID + referenced_image_sequence.ReferencedSOPInstanceUID = reference_ds.SOPInstanceUID + ds.ReferencedImageSequence = [referenced_image_sequence] + + + # add the annotation data + ds.AnnotationGroupSequence = [] + i = 0 + idx = 1 + point_indices = [] + # make the array first? + for points_array in annot_arrays: + point_indices.append(idx) + idx+=len(points_array) + + # now do the rest? + for points_array in annot_arrays: + + # Create a new AnnotationGroupSequence item + annotation_group_item = Dataset() + annotation_group_item.PointCoordinatesData = points_array.tobytes() # Convert numpy array to bytes + annotation_group_item.LongPrimitivePointIndexList = np.array([1], dtype=np.int32).tobytes() + + #annotation_group_item.LongPrimitivePointIndexList = bytes(points_array) + annotation_group_item.GraphicType = "POLYGON" + annotation_property_seq = Sequence() + item_dataset = Dataset() + item_dataset.CodeValue = "91723000" + item_dataset.CodingSchemeDesignator = "DCM" + item_dataset.CodeMeaning = "Anatomical Stucture" + annotation_property_seq.append(item_dataset) + annotation_group_item.AnnotationPropertyTypeCodeSequence = annotation_property_seq + annotation_group_item.AnnotationGroupNumber = i + i+=1 + annotation_group_item.NumberOfAnnotations = 1 + annotation_group_item.AnnotationAppliesToAllOpticalPaths = "YES" + annotation_group_item.AnnotationGroupUID = generate_uid() + annotation_group_item.AnnotationGroupLabel = "Annotation Group Label" + annotation_group_item.AnnotationGroupGenerationType = "MANUAL" + annotation_group_item.AnnotationPropertyCategoryCodeSequence = annotation_property_seq + annotation_group_item.AnnotationPropertyCategoryCodeSequence = annotation_property_seq + ds.AnnotationGroupSequence.append(annotation_group_item) + # Create a DICOM File Meta Information header + file_meta = FileMetaDataset() + file_meta.MediaStorageSOPClassUID = ds.SOPClassUID + file_meta.MediaStorageSOPInstanceUID = generate_uid() + file_meta.TransferSyntaxUID = ExplicitVRLittleEndian + file_meta.ImplementationVersionName = 'camic@0.0.1' + # Set the File Meta Information header + ds.file_meta = file_meta + return ds + +# camic to dicom +def camicToDicom(annot_file, slide_file): + dicom_data = get_dicom_info(slide_file) + width = int(dicom_data['ImageWidth']) + height = int(dicom_data['ImageHeight']) + with open(annot_file, 'r') as f: + annot_geo = json.load(f) + annot_arrays = [] + for x in annot_geo: + coordinates = x['geometries']['features'][0]['geometry']['coordinates'][0] + flattened_coords = [[pair[0] * width, pair[1] * height] for pair in coordinates] + converted_coords = np.array(flattened_coords, dtype=np.float32) + annot_arrays.append(converted_coords) + annot_ds = create_annotation_dicom(annot_arrays, slide_file, x) + return annot_ds + + +def _makeBound(coords_mat): + try: + min_x = float(np.min(coords_mat[:, 0])) + max_x = float(np.max(coords_mat[:, 0])) + min_y = float(np.min(coords_mat[:, 1])) + max_y = float(np.max(coords_mat[:, 1])) + bound_coords = [[min_x, min_y], [min_x, max_y], [max_x, max_y], [max_x, min_y],[min_x, min_y]] + return bound_coords + except BaseException as e: + print("coords mat") + print(coords_mat) + raise e + +def _polygon_area(vertices): + n = len(vertices) + area = 0 + for i in range(n): + j = (i + 1) % n + area += vertices[i][0] * vertices[j][1] + area -= vertices[j][0] * vertices[i][1] + return abs(area) / 2 + +def _polygon_perimeter(vertices): + perimeter = 0 + n = len(vertices) + for i in range(n): + j = (i + 1) % n + perimeter += np.linalg.norm(np.array(vertices[i]) - np.array(vertices[j])) + return perimeter + +def getPointCoordinatesDataArray(x): + if 'PointCoordinatesData' in x: + n = x.PointCoordinatesData + return np.frombuffer(n, dtype=np.float32) + elif 'DoublePointCoordinatesData' in x: + n = x.DoublePointCoordinatesData + return np.frombuffer(n, dtype=np.float64) + else: + raise ValueError("No coordinates found in array item") + + +def convert_ellipse(x1_major,y1_major,x2_major,y2_major,x1_minor,y1_minor,x2_minor,y2_minor): + center_x = (x1_major + x2_major) / 2 + center_y = (y1_major + y2_major) / 2 + major_axis_length = np.sqrt((x2_major - x1_major)**2 + (y2_major - y1_major)**2) / 2 + minor_axis_length = np.sqrt((x2_minor - x1_minor)**2 + (y2_minor - y1_minor)**2) / 2 + rotation = np.arctan2(y2_major - y1_major, x2_major - x1_major) + return center_x, center_y, major_axis_length, minor_axis_length, rotation + +def dicomToCamic(annot_path, image_dimensions, output_file, source_url=None, slide_id=None, file_mode=False): + # image_dimensions is either dimensions or a ds + annot_ds = pydicom.dcmread(annot_path) + slide_width = image_dimensions['TotalPixelMatrixColumns'] + slide_height = image_dimensions['TotalPixelMatrixRows'] + # get physical resolution + imaged_volume_width = image_dimensions['ImagedVolumeWidth'] + imaged_volume_height = image_dimensions['ImagedVolumeHeight'] + pixel_size_x = imaged_volume_width / slide_width + pixel_size_y = imaged_volume_height / slide_height + # millimeters to microns + mpp_x = pixel_size_x * 1000 + #mpp_y = pixel_size_y * 1000 + # TODO this is just for POLYGON. generalize later. + res = [] + isSegment = False + for x in annot_ds.AnnotationGroupSequence: + exported_annot = deepcopy(annotTemplate) + exported_annot['properties']['annotations']['name'] = annot_ds.ContentLabel + exported_annot['provenance']['analysis']['name'] = annot_ds.ContentLabel + exported_annot['properties']['annotations']['notes'] = annot_ds.ContentDescription + exported_annot['provenance']['analysis']['execution_id'] = "_DICOM_" + _generate_random_string(10) + if x.AnnotationGroupGenerationType == "AUTOMATIC": + exported_annot['provenance']['analysis']['source'] = 'computer' + exported_annot['provenance']['analysis']['computation'] = 'segmentation' + isSegment = True + if slide_id: + exported_annot['provenance']['image']['slide'] = slide_id + exported_annot['provenance']['image']['dicom-ReferencedSOPClassUID'] = annot_ds.ReferencedImageSequence[0].ReferencedSOPClassUID + exported_annot['provenance']['image']['dicom-ReferencedSOPInstanceUID'] = annot_ds.ReferencedImageSequence[0].ReferencedSOPInstanceUID + exported_annot['provenance']['image']["dicom-source-url"] = source_url + exported_annot['provenance']['image']["dicom-study"] = annot_ds.StudyInstanceUID + exported_annot['provenance']['image']["dicom-series"] = annot_ds.SeriesInstanceUID + exported_annot['provenance']['image']["dicom-instance"] = annot_ds.SOPInstanceUID + + # handle other cases first + if x.GraphicType == "ELLIPSE": + m = getPointCoordinatesDataArray(x) + # sets of 4 points, 8 numbers + for i in range(8, len(m), 8): + ellipse_points = m[i-8: i] + x1_major = ellipse_points[0]/slide_width + y1_major = ellipse_points[1]/slide_height + x2_major = ellipse_points[2]/slide_width + y2_major = ellipse_points[3]/slide_height + x1_minor = ellipse_points[4]/slide_width + y1_minor = ellipse_points[5]/slide_height + x2_minor = ellipse_points[6]/slide_width + y2_minor = ellipse_points[7]/slide_height + center_x, center_y, major_axis_length, minor_axis_length, rotation = convert_ellipse(x1_major,y1_major,x2_major,y2_major,x1_minor,y1_minor,x2_minor,y2_minor) + newFeature = deepcopy(featureTemplate) + newFeature['geometry']['type'] = "Ellipse" + newFeature['geometry']['coordinates'] = [center_x, center_y] + newFeature['geometry']["radius"] = [major_axis_length,minor_axis_length], + newFeature['geometry']["rotation"] = rotation + newFeature['bound']['type'] = "Point" + newFeature['bound']['coordinates'] = [center_x, center_y] + if isSegment: + exported_annot["footprint"] = 4* major_axis_length*minor_axis_length*slide_height*slide_width + exported_annot["bbox"]= [center_x - major_axis_length, center_y - minor_axis_length, center_x + major_axis_length, center_y + minor_axis_length] + exported_annot["x"] = center_x + exported_annot["y"] = center_y + exported_annot['geometries']['features'] = [newFeature] + res.append(deepcopy(exported_annot)) + elif x.GraphicType == "POINT": + # sets of 1 point, 2 numbers + m = getPointCoordinatesDataArray(x) + # sets of 4 points, 8 numbers + for i in range(2, len(m), 2): + point = m[i-2: i] + center_x = point[0]/slide_width + center_y = point[1]/slide_height + newFeature = deepcopy(featureTemplate) + newFeature['geometry']['type'] = "Point" + newFeature['geometry']['coordinates'] = [center_x, center_y] + newFeature['bound']['type'] = "Point" + newFeature['bound']['coordinates'] = [center_x, center_y] + if isSegment: + exported_annot["footprint"] = slide_height*slide_width # always show points for now, big thing + exported_annot["bbox"]= [center_x, center_y, slide_width, slide_height] + exported_annot["x"] = center_x + exported_annot["y"] = center_y + exported_annot['geometries']['features'] = [newFeature] + res.append(deepcopy(exported_annot)) + elif x.GraphicType == "RECTANGLE": + m = getPointCoordinatesDataArray(x) + # sets of 4 points, 8 numbers + for i in range(8, len(m), 8): + rect_points = m[i-8: i] + x1 = rect_points[0]/slide_width + y1 = rect_points[1]/slide_height + x2 = rect_points[2]/slide_width + y2 = rect_points[3]/slide_height + x3 = rect_points[4]/slide_width + y3 = rect_points[5]/slide_height + x4 = rect_points[6]/slide_width + y4 = rect_points[7]/slide_height + newFeature = deepcopy(featureTemplate) + newFeature['geometry']['type'] = "Polygon" + newFeature['geometry']['coordinates'] = [[x1,y1], [x2,y2], [x3,y3], [x4,y4], [x1,y1]] + newFeature['bound']['type'] = "Polygon" + newFeature['bound']['coordinates'] = [[x1,y1], [x2,y2], [x3,y3], [x4,y4], [x1,y1]] + if isSegment: + exported_annot["footprint"] = abs(x1-x3)*abs(y2-y4)*slide_height*slide_width + exported_annot["bbox"]= [(x1+x3)/2, (y2+y4)/2, abs(x1-x3), abs(y2-y4)] + exported_annot["x"] = (x1+x3)/2 + exported_annot["y"] = (y2+y4)/2 + exported_annot['geometries']['features'] = [newFeature] + res.append(deepcopy(exported_annot)) + elif x.GraphicType == "POLYGON" or x.GraphicType == "POLYLINE": + m = getPointCoordinatesDataArray(x) + norm_width = slide_width + norm_height = slide_height + offset_x = 0 + offset_y = 0 + # normalize coordinates for camicroscope + coordinates_array = (m.reshape(-1, 2)) + coordinates_array = (coordinates_array - [offset_x, offset_y]) / [norm_width, norm_height] + + + # split into different geometry objects Index List + indexList = np.frombuffer(x.LongPrimitivePointIndexList, dtype=np.int32) + #print("IndexList", indexList) + #print("len Index List", len(indexList)) + #print("coordinates_array shape", coordinates_array.shape) + if len(indexList) > 1: + # split + prevIndex = 0 + for idx in indexList[1:]: + end_idx = int((idx-1)/2) + #print("prev", prevIndex, "idx", idx) + # make a thing + points = coordinates_array[prevIndex:end_idx, :] + points = np.concatenate((points, [points[0]])) + #print('len(points)', len(points)) + if len(points) > 0: + newFeature = deepcopy(featureTemplate) + newFeature['geometry']['coordinates'].append(points.tolist()) + bounding_box = _makeBound(points) + # [[min_x, min_y], [min_x, max_y], [max_x, max_y], [max_x, min_y],[min_x, min_y]] + newFeature['bound']['coordinates'].append(bounding_box) + if isSegment: + x1 = bounding_box[0][0] + x3 = bounding_box[2][0] + y2 = bounding_box[1][1] + y4 = bounding_box[3][1] + exported_annot["footprint"] = abs(x1-x3)*abs(y2-y4)*slide_width*slide_height + exported_annot["bbox"]= [(x1+x3)/2, (y2+y4)/2, abs(x1-x3), abs(y2-y4)] + exported_annot["x"] = (x1+x3)/2 + exported_annot["y"] = (y2+y4)/2 + exported_annot['geometries']['features'] = [newFeature] + res.append(deepcopy(exported_annot)) + prevIndex = end_idx + # and the bound + # then add the last one + points = coordinates_array[prevIndex:, :] + points = np.concatenate((points, [points[0]])) + if len(points) > 0: + newFeature = deepcopy(featureTemplate) + newFeature['geometry']['coordinates'].append(points.tolist()) + bounding_box = _makeBound(points) + # [[min_x, min_y], [min_x, max_y], [max_x, max_y], [max_x, min_y],[min_x, min_y]] + newFeature['bound']['coordinates'].append(bounding_box) + if isSegment: + x1 = bounding_box[0][0] + x3 = bounding_box[2][0] + y2 = bounding_box[1][1] + y4 = bounding_box[3][1] + exported_annot["footprint"] = abs(x1-x3)*abs(y2-y4)*slide_height*slide_width + exported_annot["bbox"]= [(x1+x3)/2, (y2+y4)/2, abs(x1-x3), abs(y2-y4)] + exported_annot["x"] = (x1+x3)/2 + exported_annot["y"] = (y2+y4)/2 + exported_annot['geometries']['features'] = [newFeature] + res.append(deepcopy(exported_annot)) + else: + # whole thing at once. Only do area and circumference here. + points = coordinates_array + points = np.concatenate((points, [points[0]])) + newFeature = deepcopy(featureTemplate) + newFeature['geometry']['coordinates'].append(points.tolist()) + bounding_box = _makeBound(points) + # [[min_x, min_y], [min_x, max_y], [max_x, max_y], [max_x, min_y],[min_x, min_y]] + newFeature['bound']['coordinates'].append(bounding_box) + if isSegment: + x1 = bounding_box[0][0] + x3 = bounding_box[2][0] + y2 = bounding_box[1][1] + y4 = bounding_box[3][1] + exported_annot["footprint"] = abs(x1-x3)*abs(y2-y4)*slide_height*slide_width + exported_annot["bbox"]= [(x1+x3)/2, (y2+y4)/2, abs(x1-x3), abs(y2-y4)] + exported_annot["x"] = (x1+x3)/2 + exported_annot["y"] = (y2+y4)/2 + exported_annot['geometries']['features'] = [newFeature] + area = _polygon_perimeter(points.tolist()) * mpp_x * slide_width + perimeter = _polygon_area(points.tolist()) * (mpp_x * slide_width)**2 + exported_annot['properties']['annotations']['circumference'] = str(area)+ " μm" + exported_annot['properties']['annotations']['area'] = str(perimeter) + " μm²" + res.append(deepcopy(exported_annot)) + + if file_mode: + n = 0 + for x in res: + n += 1 + output_fn = output_file + "_" + str(n) + ".json" + with open(output_fn, 'w') as f: + json.dump(x, f) + print("saved to", output_fn) + else: + return res + +# demonstration example +def demo(): + annot_file = './annots_camic.json' + slide_file = '/Users/rbirmin/Documents/distro/images/5d8b1b52d0/e25e1997-b86a-42de-aa8c-83ca18444bf2.dcm' + annot_ds = camicToDicom(annot_file, slide_file) + annot_ds.save_as("test_out.dcm", write_like_original=False) + print("now working backwards for test") + annot_output = "./test_out.dcm" + image_dimensions = {} + ds = pydicom.dcmread(slide_file) + image_dimensions['TotalPixelMatrixColumns'] = ds.TotalPixelMatrixColumns + image_dimensions['TotalPixelMatrixRows'] = ds.TotalPixelMatrixRows + image_dimensions['ImagedVolumeWidth'] = ds.ImagedVolumeWidth + image_dimensions['ImagedVolumeHeight'] = ds.ImagedVolumeHeight + dicomToCamic(annot_output, image_dimensions, "test_out2", slide_id='65fc65851200600012eb9222', file_mode=True) + exit(0) + +if __name__ == "__main__": + # Create argument parser + parser = argparse.ArgumentParser(description='Convert annotations between CAMIC and DICOM') + + # Add arguments + parser.add_argument('operation', choices=['import', 'export'], help='Operation to perform (camic_to_dicom or dicom_to_camic)') + parser.add_argument('annot_file', help='Path to the annotation file (json or dcm)') + parser.add_argument('slide_file', help='Path to the slide file (dcm)') + parser.add_argument('output_file', help='Path to the output file (dcm or json)') + parser.add_argument('--slide_id', help='Slide ID (optional)') + + + # Parse arguments + args = parser.parse_args() + slide_id = "ADD_CAMIC_SLIDE_ID_HERE" # note: not literally here, but if you don't give a slide id, you should add it eventually to the output + if args.slide_id: + slide_id = args.slide_id + + # Perform the selected operation + if args.operation == 'export': + annot_ds = camicToDicom(args.annot_file, args.slide_file) + annot_ds.save_as(args.output_file, write_like_original=False) + elif args.operation == 'import': + ds = pydicom.dcmread(args.slide_file) + dimensions = {} + dimensions['TotalPixelMatrixColumns'] = ds.TotalPixelMatrixColumns + dimensions['TotalPixelMatrixRows'] = ds.TotalPixelMatrixRows + dimensions['ImagedVolumeWidth'] = ds.ImagedVolumeWidth + dimensions['ImagedVolumeHeight'] = ds.ImagedVolumeHeight + dicomToCamic(args.annot_file, dimensions, args.output_file, slide_id=slide_id, file_mode=True) + else: + print("Invalid operation. Choose 'export' (camic_to_dicom) or 'import' (dicom_to_camic)") diff --git a/SlideServer.py b/SlideServer.py index a6906c1..c321a5a 100644 --- a/SlideServer.py +++ b/SlideServer.py @@ -2,9 +2,11 @@ import json import os import random +import traceback import shutil import string import sys +from datetime import datetime import pyvips import os from spritemaker import createSpritesheet @@ -12,6 +14,7 @@ import urllib import flask import flask_cors +import threading from flask import request from image_reader import construct_reader, suggest_folder_name from werkzeug.utils import secure_filename @@ -25,6 +28,18 @@ from threading import Thread from file_extensions import ALLOWED_EXTENSIONS from time import sleep +import hashlib +from urllib.parse import urlparse +from dicomweb_client.api import DICOMwebClient + +from DicomAnnotUtils import dicomToCamic +import pydicom + +# mongo stuff +import pymongo +from bson.objectid import ObjectId + + try: from io import BytesIO @@ -37,10 +52,6 @@ # dataset storage location for the workbench tasks app.config['DATASET_FOLDER'] = "/images/dataset/" -#creating a uploading folder if it doesn't exist -if not os.path.exists('/images/uploading/'): - os.mkdir('/images/uploading') - # where to put and get slides app.config['UPLOAD_FOLDER'] = "/images/" app.config['TEMP_FOLDER'] = "/images/uploading/" @@ -48,6 +59,11 @@ app.config['SECRET_KEY'] = os.urandom(24) app.config['ROI_FOLDER'] = "/images/roiDownload" +#creating a uploading folder if it doesn't exist +if not os.path.exists(app.config['TEMP_FOLDER']): + os.mkdir(app.config['TEMP_FOLDER']) + + # should be used instead of secure_filename to create new files whose extensions are important. # use secure_filename to access previous files. # secure_filename ensures security but may result in invalid filenames. @@ -94,8 +110,9 @@ def getThumbnail(filename, size=50): return {"error": "No such file"} try: slide = construct_reader(filepath) - except BaseException as e: - # here, e has attribute "error" + except Exception as e: + app.logger.error(f"An error occurred: {type(e).__name__}: {e}") + app.logger.debug(traceback.format_exc()) return e try: @@ -696,3 +713,317 @@ def guiLocation(): success = "port" in res and "ui_port" in res return flask.Response(json.dumps(res), status=200 if success else 500, mimetype='text/json') + +# things needed for just dicom routes +app.config['MONGO_URI'] = os.environ.get('MONGO_URI', 'mongodb://ca-mongo:27017/') +app.config['MONGO_DB'] = os.environ.get('MONGO_DB', 'camic') + +mongo_client = pymongo.MongoClient(app.config['MONGO_URI']) +mongo_db = mongo_client[app.config['MONGO_DB']] + +# find if a slide already exists for this +def _findMatchingSlide(source_url, study, series): + query = {"dicom-source-url": source_url, "study": study, "series": series} + return mongo_db['slide'].find_one(query) + +def _getSlide(slide_id): + object_id = ObjectId(slide_id) + result = mongo_db['slide'].find_one({"_id": object_id}) + return result + +def _setSlideStatus(slide_id, status): + object_id = ObjectId(slide_id) + result = mongo_db['slide'].update_one({"_id": object_id}, {"$set": {"status": status}}) + return result + +def _add_slide(document): + insert_result = mongo_db['slide'].insert_one(document) + return str(insert_result.inserted_id) + +def _add_mark(document): + insert_result = mongo_db['mark'].insert_one(document) + return str(insert_result.inserted_id) + + +def _get_hash_prefix(input_string, length=8): + algorithm='sha256' + input_bytes = input_string.encode('utf-8') + hash_obj = hashlib.new(algorithm) + hash_obj.update(input_bytes) + full_hash = hash_obj.hexdigest() + hash_prefix = full_hash[:length] + return hash_prefix + +def find_referenced_image_by_files(source_url, study, ds): + instance_uid = ds.ReferencedImageSequence[0].ReferencedSOPInstanceUID + study_directory = app.config['UPLOAD_FOLDER'] + _get_hash_prefix(source_url + "~" + study, length=10) + "_dicomweb" + # Traverse through the study directory + for root, dirs, files in os.walk(study_directory): + for file_name in files: + if file_name.endswith('.dcm'): + file_path = os.path.join(root, file_name) + ds = pydicom.dcmread(file_path) + if ds.SOPInstanceUID == instance_uid: + # Found the instance, return its SeriesInstanceUID + dimensions = {} + dimensions['TotalPixelMatrixColumns'] = ds.TotalPixelMatrixColumns + dimensions['TotalPixelMatrixRows'] = ds.TotalPixelMatrixRows + dimensions['ImagedVolumeWidth'] = ds.ImagedVolumeWidth + dimensions['ImagedVolumeHeight'] = ds.ImagedVolumeHeight + return ds.SeriesInstanceUID, dimensions + + # If the instance is not found in the study + return None, None + +def downloadRawDicom(source_url, study_uid, series_uid, instance_uid, output_fn): + instance_url = source_url + f"/studies/{study_uid}/series/{series_uid}/instances/{instance_uid}" + response = requests.get(instance_url, stream=True) + if response.status_code == 200: + with open(output_fn, "wb") as file: + content_type_header = response.headers.get('Content-Type') + boundary = content_type_header.split('boundary=')[1].split(';')[0].strip() + app.logger.info("Working on file: " + output_fn) + dicom_started = False + for chunk in response.iter_content(chunk_size=8192): + if chunk: + if not dicom_started: + # Check if the chunk contains "\r\n\r\n"" + if b"\r\n\r\n" in chunk: + # Find the position of "\r\n\r\n"" in the chunk + start_idx = chunk.find(b"\r\n\r\n") + file.write(chunk[start_idx + 4:]) + # Set dicom_started to True + dicom_started = True + else: + # Write entire chunk to the file + file.write(chunk) + # remove the trailing multipart stuff + file.seek(-len(boundary) - 6, 2) # Move the file pointer to before the boundary + file.truncate() # Truncate the file at the current position + + print("DICOM instance saved successfully.") + else: + print(f"Failed to retrieve DICOM instance. Status code: {response.status_code}") + +# dicom web based routes + +def doDicomSlideDownloads(source_url, study, series, instance_list, camic_slide_id): + for instance in instance_list: + app.logger.info("Working on slide instance: " + instance) + try: + dest_directory = app.config['UPLOAD_FOLDER'] + _get_hash_prefix(source_url + "~" + study, length=10) + "_dicomweb" + if not os.path.exists(dest_directory): + os.makedirs(dest_directory) + output_fn = dest_directory + "/" + instance + ".dcm" + downloadRawDicom(source_url, study, series, instance, output_fn) + except Exception as e: + app.logger.error(f"An error occurred: {type(e).__name__}: {e}") + app.logger.debug(traceback.format_exc()) + # we're done, update the camic slide instance to done + _setSlideStatus(camic_slide_id, "done") + camic_slide_id + + +def levenshtein_distance(s1, s2): + """Calculate the Levenshtein distance between two strings.""" + if len(s1) < len(s2): + return levenshtein_distance(s2, s1) + if len(s2) == 0: + return len(s1) + previous_row = range(len(s2) + 1) + for i, c1 in enumerate(s1): + current_row = [i + 1] + for j, c2 in enumerate(s2): + insertions = previous_row[j + 1] + 1 + deletions = current_row[j] + 1 + substitutions = previous_row[j] + (c1 != c2) + current_row.append(min(insertions, deletions, substitutions)) + previous_row = current_row + return previous_row[-1] + +def find_closest_sm_instance(source_url, study_uid, target_series_uid, target_instance_uid): + client = DICOMwebClient(source_url) + # Retrieve all instances within the specified study + instances = client.search_for_instances(study_instance_uid=study_uid) + sm_instances = [instance for instance in instances if instance.get('00080060', {}).get('Value', [None])[0] == 'SM'] + # Initialize variables to store the closest match + closest_series_uid = None + closest_instance_uid = None + min_distance = float('inf') # Initialize with a large value + + # Iterate over all instances and find the closest match + for instance in sm_instances: + series_uid = instance['0020000E']['Value'][0] # Series Instance UID + instance_uid = instance['00080018']['Value'][0] # SOP Instance UID + + # Calculate the Levenshtein distance between the target and current series and instance UIDs + series_distance = levenshtein_distance(target_series_uid, series_uid) + instance_distance = levenshtein_distance(target_instance_uid, instance_uid) + total_distance = series_distance + instance_distance + + # Check if the current instance is closer than the previous closest match + if total_distance < min_distance: + min_distance = total_distance + closest_series_uid = series_uid + closest_instance_uid = instance_uid + + return closest_series_uid, closest_instance_uid + + +def get_reference_image_dimensions(source_url, ann_study, ann_series, ann_instance): + try: + client = DICOMwebClient(source_url) + ann_metadata = client.retrieve_instance_metadata( + study_instance_uid=ann_study, + series_instance_uid=ann_series, + sop_instance_uid=ann_instance + ) + # use the metadata for the annotation to get the identifiers then metadata for the slide + # study is the same + image_study = ann_study + image_instance = ann_metadata['00081140']['Value'][0]['00081155']['Value'][0] + image_series = ann_metadata['00081115']['Value'][0]['0020000E']['Value'][0] + slide_metadata = None + try: + slide_metadata = client.retrieve_instance_metadata( + study_instance_uid=image_study, + series_instance_uid=image_series, + sop_instance_uid=image_instance + ) + except BaseException as e: + app.logger.exception(e) + # for some reason, exact matches don't seem to be present. find "closest" one + close_series, close_instance = find_closest_sm_instance(source_url,image_study,image_series,image_instance) + slide_metadata = client.retrieve_instance_metadata( + study_instance_uid=image_study, + series_instance_uid=close_series, + sop_instance_uid=close_instance + ) + app.logger.info("Couldn't find : " + image_study + " " + image_series + " " + image_instance) + app.logger.info("Instead using : " + image_study + " " + close_series + " " + close_instance) + image_series = close_series + response = {} + response['ImagedVolumeWidth'] = slide_metadata['00480001']['Value'][0] + response['ImagedVolumeHeight'] = slide_metadata['00480001']['Value'][0] + response['TotalPixelMatrixColumns'] = slide_metadata['00480006']['Value'][0] + response['TotalPixelMatrixRows'] = slide_metadata['00480007']['Value'][0] + return image_series, response + + # use the slide metadata to get the dimensions, return them + except BaseException as e: + app.logger.exception(e) + # get the series id and ds another way, through a file search. + ann_ds = client.retrieve_instance( + study_instance_uid=ann_study, + series_instance_uid=ann_series, + sop_instance_uid=ann_instance + ) + return find_referenced_image_by_files(source_url, ann_study, ann_ds) + +def doDicomAnnDownloads(source_url, study, series, instance_list): + client = DICOMwebClient(source_url) + for instance in instance_list: + app.logger.info("Working on ann instance: " + instance) + try: + instance_resp = client.retrieve_instance( + study_instance_uid=study, + series_instance_uid=series, + sop_instance_uid=instance + ) + dest_directory = app.config['UPLOAD_FOLDER'] + _get_hash_prefix(source_url + "~" + study, length=10) + "_dicomweb" + annot_path = dest_directory + "/" + instance + ".dcm" + downloadRawDicom(source_url, study, series, instance, annot_path) + ref_series, dimensions = get_reference_image_dimensions(source_url, study, series, instance) + if ref_series: + slide_res = _findMatchingSlide(source_url, study, ref_series) + if slide_res: + slide_id = str(slide_res['_id']) + res = dicomToCamic(annot_path, dimensions, None, source_url, slide_id, file_mode=False) + # load into camic + for doc in res: + _add_mark(doc) + else: + # get the slide then try this again + getDicomSmThenAnn(source_url, study, ref_series, study, series) + else: + e = {"err": "err", "msg": "couldn't find the reference"} + app.logger.info("err: " + str(e)) + return e + except Exception as e: + app.logger.error(f"An error occurred: {type(e).__name__}: {e}") + app.logger.debug(traceback.format_exc()) + + + +def getDicomSeriesANN(source_url, study, series): + client = DICOMwebClient(source_url) + instances = client.search_for_instances( + study_instance_uid=study, + series_instance_uid=series + ) + instance_list = [x['00080018']['Value'][0] for x in instances] + load_thread = threading.Thread(target=doDicomAnnDownloads, args=(source_url, study, series, instance_list)) + load_thread.start() + +def getDicomSeriesSM(source_url, study, series, blocking=False): + # check if the slide object matching already exists, returning it if so. + matching_slide = _findMatchingSlide(source_url, study, series) + if matching_slide: + return {'status': 'already exists', 'slide_id': str(matching_slide["_id"])} + client = DICOMwebClient(source_url) + instances = client.search_for_instances( + study_instance_uid=study, + series_instance_uid=series + ) + instance_list = [x['00080018']['Value'][0] for x in instances] + + # do the download + dest_directory = app.config['UPLOAD_FOLDER'] + _get_hash_prefix(source_url + "~" + study, length=10) + "_dicomweb" + if not os.path.exists(dest_directory): + os.makedirs(dest_directory) + first_file = dest_directory + "/" + instance_list[0] + ".dcm" + # if the slide object does not exist, make it using the first instance file name with in progress status + current_time = datetime.now() + formatted_time = current_time.strftime("%m/%d/%Y, %I:%M:%S %p") + new_slide = { + 'location': first_file, + 'filepath': instance_list[0] + ".dcm", + 'upload_date': formatted_time , + 'creator': "DicomWebImport", + 'dicom-source-url': source_url, + 'study': study, + 'series': series, + 'specimen': '', + 'status': 'loading', + 'name': _get_hash_prefix(study + "~" + series, length=12) + } + slide_id = _add_slide(new_slide) + if blocking: + doDicomSlideDownloads(source_url, study, series, instance_list, slide_id) + else: + load_thread = threading.Thread(target=doDicomSlideDownloads, args=(source_url, study, series, instance_list, slide_id)) + load_thread.start() + return slide_id + +def getDicomSmThenAnn(source_url, slide_study, slide_series, annot_study, annot_series): + slide_id = getDicomSeriesSM(source_url, slide_study, slide_series, blocking=True) + # slide is loaded, so just eventually load the annotations + getDicomSeriesANN(source_url, annot_study, annot_series) + +@app.route('/dicomWeb/importSeries', methods=['POST']) +def dicom_import(): + try: + data = flask.request.get_json() + source_url = data['source_url'] + study = data['study'] + series = data['series'] + modality = data['modality'] + if modality == "ANN": + slide_id = getDicomSeriesANN(source_url, study, series) + return({"modality": modality, "status": "starting"}) + else: + slide_id = getDicomSeriesSM(source_url, study, series, blocking=False) + return({"modality": modality, 'slide_id': slide_id}) + except Exception as e: + app.logger.error(f"An error occurred: {type(e).__name__}: {e}") + app.logger.debug(traceback.format_exc()) diff --git a/requirements.txt b/requirements.txt index c9064f3..f05d286 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,5 @@ google-auth-httplib2 google-auth-oauthlib pycurl pydicom +dicomweb-client +pymongo \ No newline at end of file