In [1]:
## Pull all of  the annotations for the easy study
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import pprint as pprint
import os, io, json, dateutil, girder_client
import numpy as np
import tqdm


##  Connect to ISIC in order to pull ISIC annotation Data
BASE_URL = 'https://isic-archive.com/api/v1'
gc = girder_client.GirderClient(apiUrl=BASE_URL)
c = gc.authenticate(username='dagutman@gmail.com',password='bobafet1')

dermGC = girder_client.GirderClient(apiUrl='http://dermannotator.org:8080/api/v1')
dc = dermGC.authenticate(username='admin',password='cancersuckz!')

In [3]:
def getSVGJsonData(gc, itemId):
    ## Scan an item ID and grab the files in it... m,akes assumption this only has one file
    for i in gc.get("/item/%s/files" % itemId):
        file = gc.getFile(i['_id'])
        fp = io.BytesIO()
        gc.downloadFile(file['_id'],fp)
        fp.seek(0)
        svgJson = json.load(fp)
        return svgJson

def checkFolderForMultiRaterData(gc, folderData):
    ### Given the folder data, which includes the _id , name and metadata folderID, I expect it to have the raw image, an SVG.JSON file, and an overlay image
    ### I need to parse/process these in order to make sure I have all the necessary information for the multirater study
    folderName = folderData['name']
#     print(f"Processing {folderData['name']}")
    
    multiRaterMetaData = { "imageName": folderName, "featuresObservedForImage": [], "markupData": {}, "superpixels_in_mask": [] }
    for i in gc.listItem(folderData['_id']):
        base = os.path.splitext(i['name'])[0]

        ### Generate information for the baseImage
        if (base == folderData['name']):
            ## This should be the main Image for the image
            multiRaterMetaData['mainImage'] = i
            ### I need to get the baseImageHeight and baseImageWidth of the main image to scale things appropriately
            try:
                largeImageInfo = gc.get(f"/item/{i['_id']}/tiles")
            
                multiRaterMetaData['baseImageWidth']  = largeImageInfo['sizeX']
                multiRaterMetaData['baseImageHeight'] = largeImageInfo['sizeY']
            except:
                print(f"No large image info found for {folderName}")
                return None
        ##Look for the SVG Json File
        if ( i['name'].endswith('.svg.json') and folderName in i['name']):
            ### Print Processing the SVG JSON File
            multiRaterMetaData['svgJson'] = {}#  getSVGJsonData(gc,i['_id'])  Not returning this here anymore.. its too big
            
        ## Check for the superpixel png image as well
        if ( i['name'].endswith('_superpixels_v3.0.svg.json') and folderName in i['name']):
            ### Print Processing the SVG JSON File
            multiRaterMetaData['spxImage'] = i 
    return multiRaterMetaData

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, set):
            return list(obj)
        else:
            return super(NpEncoder, self).default(obj)

In [6]:
def AnalyzeImageAnnotations(gc, curImageAnnotations):
    """An image for a particular study can have one or more annotations, that need ot be cleaned up and
    combined into a single ARRAY for visualization, this function takes a pointer to the girer instance
    with the annotations and I will likely be adding additional params as this iterates
    The curImageAnnotations contains a list of dictionaries containing all the _ids for the annotations"""
    ### First get all the annotations, may debate filtering by user, and also add a 
    ### AggreGate user
    raterData = {}
    
    for cia in curImageAnnotations:
        cuad = gc.get('annotation/%s' % cia['_id'] ) ## Current User Annotation Data
        
        if (cuad['state']== 'complete'):
            cuadUserAnon = cuad['user']
            cuadUserName =  cuad['user']['name'] ## was login which is their fuller name, using name gets rid of that
            
            raterData[cuadUserName] = {}
            raterData[cuadUserName] = {}
            raterData[cuadUserName]['annotations'] =  {}
            stop = dateutil.parser.parse(cuad['startTime'] )
            start = dateutil.parser.parse(cuad['stopTime'] )
            markupTime  = "%s" % (stop-start)
            raterData[cuadUserName]['markupTime']  = markupTime
            raterData[cuadUserName]['startTime']  = start.isoformat()

            if 'markups' in cuad:
                for ftr in cuad['markups'].keys():
                    try:
                        spxForFtr = gc.get(f'annotation/{cia["_id"]}/markup/{ftr}/superpixels')
                        if(sum(spxForFtr) > 0):
                            x = np.array( spxForFtr)
                            raterData[cuadUserName]['annotations'][ftr] =  list(np.where(x == 1)[0])
                    except:
                        pass
                        #print("Failed lookup for",cia['_id'],ftr)
    return {"raters": raterData }

In [7]:
WhiteListedStudySet = ['EASY_STUDY_1','EASY_STUDY_2','EASY_STUDY_3','EASY_STUDY_4']
studyList = gc.get('study')

for s in studyList:
    studyName = s['name']
    
    if (studyName in WhiteListedStudySet): ## Pull data only for the relevant studies
        studyDetails = gc.get('study/%s' % s['_id'])  # Get the list of images for that study
        studyLastUpdated = studyDetails['updated']
        featureSet = studyDetails['features']  ## note camel case vs non camelcase
        featureSetName = "EASY_STUDY" ##featureSet['name']
        featureSetID = "NA" ### featureSet['_id']
        studyImageList = studyDetails['images']
        
        curStudyRaterData = {}

        ## Check and see if the current image has been added to the MultiRaterStudyFolder
        for si in tqdm.tqdm(studyImageList):
            try:
                studyImgFolder = dermGC.get("/resource/lookup?path=%s"% "/collection/MultiRater_Viewer_Data/NewDataSet/%s" % si['name'])
            except:
                #print("Did not find",si['name'])
                next
            if studyImgFolder:
#                 print("Processing image",si['name'])
                if (studyImgFolder['meta']):## == {}):
                    curImageAnnotations = gc.get('annotation?studyId=%s&imageId=%s' % ( s['_id'], si['_id'])) ### Each image can then have many annotations

                    curImageStudyData =   AnalyzeImageAnnotations(gc,curImageAnnotations) 
                    ## Some of this goes to the parent folder, and some of this I am putting in a special item in the folder

                    multiRaterMetadata = checkFolderForMultiRaterData(dermGC,studyImgFolder)
                    EasyStudyMetadataItem = dermGC.createItem(studyImgFolder['_id'],'EasyStudyMetadata',reuseExisting=True,metadata= multiRaterMetadata)

                    studyImgFolderMetadata = {}
                    studyImgFolderMetadata['FullMetaDataItem'] = EasyStudyMetadataItem['_id']


                    studyImgFolderMetadata['ratersForCurrentImage'] = list(curImageStudyData['raters'].keys())

                    featuresInImage = []
                    for r in curImageStudyData['raters'].keys():
                        featuresInImage += curImageStudyData['raters'][r]['annotations'].keys()

                    featureSet = studyDetails['features']  ## note camel case vs non camelcase
                    featureSetName = "EASY_STUDY" ##featureSet['name']

                    studyImgFolderMetadata['featuresObservedForImage'] =list(set(featuresInImage))
                    studyImgFolderMetadata['baseImageWidth']  = multiRaterMetadata['baseImageWidth']
                    studyImgFolderMetadata['baseImageHeight'] = multiRaterMetadata['baseImageHeight']
                    studyImgFolderMetadata['imageName'] = multiRaterMetadata['imageName']
                    studyImgFolderMetadata['spxImage'] = multiRaterMetadata['spxImage']
                    studyImgFolderMetadata['mainImage'] = multiRaterMetadata['mainImage']
                    studyImgFolderMetadata['superpixels_in_mask'] = []
                    studyImgFolderMetadata['svgJson'] = {'tbd'}

                    raterMarkupData = {}
                    for rtr in curImageStudyData['raters'].keys():
                        #print(curImageStudyData['raters'][rtr])
                        rtrAnnotations = curImageStudyData['raters'][rtr]['annotations']
                        for ftr in rtrAnnotations:
                            x = np.array( rtrAnnotations[ftr])
                            if ftr not in raterMarkupData:
                                raterMarkupData[ftr] = {}
                            raterMarkupData[ftr][rtr] = list(np.where(x == 1)[0])

                    studyImgFolderMetadata['markupData'] = raterMarkupData

                    dermGC.addMetadataToFolder(studyImgFolder['_id'],json.loads(json.dumps(studyImgFolderMetadata,cls=NpEncoder)))
                else:
                    pass
                    #print("Study already had metadata")

100%|██████████| 62/62 [05:47<00:00,  5.61s/it]
100%|██████████| 62/62 [05:44<00:00,  5.55s/it]
100%|██████████| 62/62 [05:57<00:00,  5.76s/it]
100%|██████████| 62/62 [09:16<00:00,  8.97s/it]


In [8]:
## Probably to make this more efficient I'll delete the data from the folders till it works
## then reload?
import tqdm
for sf in tqdm.tqdm( dermGC.listFolder('5fac5180887370bb7c8b8a11')):
    if ('superpixels_in_mask' not in sf['meta']):
        dermGC.addMetadataToFolder( sf['_id'], {'superpixels_in_mask':[]})
#     if ('svgJson' not in sf['meta']):
    dermGC.addMetadataToFolder( sf['_id'], {'svgJson':{}})

        
    ## Check if the M
    if 'FullMetaDataItem' in sf['meta']:
        studyResultMetadata = dermGC.getItem(sf['meta']['FullMetaDataItem'])
        if (studyResultMetadata['meta']['svgJson'] == {} ):
            svgJsonData = getSVGJsonData(dermGC,studyResultMetadata['meta']['spxImage']['_id'])
            studyResultMetadata = dermGC.addMetadataToItem(studyResultMetadata['_id'],{'svgJson':svgJsonData})


248it [06:04,  1.47s/it]


In [None]:
studyImgFolder

In [None]:
## Remember the feature data needs to be grouped by raters...

                    
#                 sys.exit()
#             #print(si['name'])
            
#             #pprint.pprint(raterData)
# #         AllStudyRaterData[studyName] = { "FeatureSetId": studyDetails["featureset"]["_id"],
# #                                         "MarkupData": curStudyRaterData, "fullFeatureSet" : curFeatureSetData
# #                                         }

    

In [None]:
# curImageAnnotations = gc.get('annotation?studyId=%s&imageId=%s' % ( s['_id'], si['_id']))
# curImageAnnotations
studyImgFolderMetadata

In [None]:
### need to look into the modelType as well

curStudyRaterData[si['name']] =  AnalyzeImageAnnotations(gc,curImageAnnotations) 

In [None]:
# curStudyRaterData['ISIC_0016081']['raters']['marghooa']['meta']['annotations']['Dots : Irregular']

In [None]:
## Post the data to the newDataSet Folder on Dermannotator
#         multiRaterMetadata['ratersForCurrentImage'] = localRaterData[fldr['name']]['ratersForCurrentImage']
#         multiRaterMetadata['featuresObservedForImage'] =localRaterData[fldr['name']]['featuresObservedForImage']
#         multiRaterMetadata['markupData'] =localRaterData[fldr['name']]['markupData']
#         multiRaterMetadata['markupData']['superpixels_in_mask'] = localRaterData[fldr['name']]['superpixels_in_mask']
#         gc.addMetadataToFolder(fldr['_id'],multiRaterMetadata)
## this is the small data set I am using for testing
for fldr in dermGC.listFolder('5fad6a17887370bb7c8b93c9'):
    #print(fldr)
    if(fldr['name'] in curStudyRaterData):
        print("Updating %s" % fldr['name'])
        
        EasyStudyMetadataItem = dermGC.createItem(fldr['_id'],'EasyStudyMetadata',reuseExisting=True,metadata= curStudyRaterData[fldr['name']])
        
        parentFolderMeta = curStudyRaterData[fldr['name']]
        ## It's too big.. need to shrink the data.
        if 'raters' in parentFolderMeta:
            del parentFolderMeta['raters']
        
        parentFolderMeta['FullMetaDataItem'] = EasyStudyMetadataItem['_id']
        
        dermGC.addMetadataToFolder(fldr['_id'],parentFolderMeta)
    else:
        print(fldr['name'],'does not have any rater data yet')

In [None]:
## DAMN IT g.aregiano  

def print_dict(d):
    new = {}
    for k, v in d.iteritems():
        if isinstance(v, dict):
            v = print_dict(v)
        new[k.replace('.', '-')] = v
    return new


