In [1]:
## Given a full path in the DSA it will copy files so they can be run through the deIdentification pipeline
import girder_client
%load_ext autoreload
%autoreload 2
import deidHelpers as hlprs
import dsaSecrets as ds

gc = girder_client.GirderClient(apiUrl=ds.dsaApiUrl)
_ = gc.authenticate(apiKey=ds.dsaApiToken)

In [2]:
## For testing this will delete images from previous testing
hlprs.cleanupFoldersByPath(gc,hlprs.wsiDeidFolderPathsForCleanup)

Deleted 50 items and 0 folders in folder Unfiled
Deleted 0 items and 0 folders in folder Redacted
Deleted 0 items and 0 folders in folder Import Job Reports
Deleted 0 items and 0 folders in folder Redacted
Deleted 0 items and 0 folders in folder Approved


In [4]:
import pandas as pd
metadataFile = "DeId_exampleData_052423.csv"
targetFolder = '646276df3bf3da34495c1268'
sampleImageFolder = '6477c00e309a9ffde6689635'


def createSyntheticDataSetFromCSV( gc, sampleImageFolder, targetFolder, metadataFile):
    """ This will grab a random image from the sample Image Folder, and copy it to the targetFolder
    and will also rename it during this process so it matches whatever was in the metadataFile"""

    sampleImageSet = [x for x in gc.listItem(sampleImageFolder) if "(" not in x['name']]

    
    df = pd.read_csv(metadataFile)
    for k,r in enumerate(df.to_dict(orient='records')):
        ## Copy item and rename it into new target folder
        itemCopy = gc.post(f'item/{sampleImageSet[k]["_id"]}/copy?folderId={targetFolder}&name={r["CASE"]}')
        gc.addMetadataToItem(itemCopy["_id"],{"deidUpload":r})

In [5]:
createSyntheticDataSetFromCSV(gc, sampleImageFolder, targetFolder, metadataFile)

In [7]:
df = pd.read_csv(metadataFile)
df.head()

requiredColumns =  ['InputFileName','SampleID','REPOSITORY','STUDY','PROJECT','CASE','BLOCK','ASSAY','INDEX','ImageID',
 'OutputFileName']

# CAse = Patient more or less
# Sample = uniqueish slide #

## Clean Initial Input File

df['InputFileName'] = df.CASE
df['OutputFileName'] = df.CASE +'.deid.svs'

df['SampleID'] = df.CASE.str.extract(r'(TI\d+)')
df['ImageID'] = df.CASE.str.extract(r'00(\d+)_')




# df['CASE'].apply(lambda x: re.sub(r'[\n\r]*','', str(x)))
df = df[requiredColumns]

df.head()

Unnamed: 0,InputFileName,SampleID,REPOSITORY,STUDY,PROJECT,CASE,BLOCK,ASSAY,INDEX,ImageID,OutputFileName
0,copySlide-0033444_TI02289 4003.svs,TI02289,DCEG,MR-0700,HP0700-012,copySlide-0033444_TI02289 4003.svs,BR6011,H&E,1,33444,copySlide-0033444_TI02289 4003.svs.deid.svs
1,copySlide-0033445_TI02263 4002.svs,TI02263,DCEG,MR-0700,HP0700-012,copySlide-0033445_TI02263 4002.svs,BR6011a,H&E,1,33445,copySlide-0033445_TI02263 4002.svs.deid.svs
2,copySlide-0033446_TI01978 4003.svs,TI01978,DCEG,MR-0700,HP0700-012,copySlide-0033446_TI01978 4003.svs,BR6013,H&E,1,33446,copySlide-0033446_TI01978 4003.svs.deid.svs
3,copySlide-0033447_TI02678 4002.svs,TI02678,DCEG,MR-0700,HP0700-012,copySlide-0033447_TI02678 4002.svs,BR6014,H&E,1,33447,copySlide-0033447_TI02678 4002.svs.deid.svs
4,copySlide-0033448_TI02740 4002.svs,TI02740,DCEG,MR-0700,HP0700-012,copySlide-0033448_TI02740 4002.svs,BR6015,H&E,1,33448,copySlide-0033448_TI02740 4002.svs.deid.svs


In [None]:
# df.to_csv("sampleDeIdFile.csv",index=False
# df.to_dict(orient='records')

In [8]:
dsaPathToDeid = "/testHALO_import/DCEG/n100 AI"   ## Folder I want to copy over for deidentification

importFolderName = "/WSI DeID/Unfiled"  ## This is for internal bookkeeping, Will be hidden

folderToImport = gc.get(f'resource/lookup?path=/collection{dsaPathToDeid}')

In [10]:
unfiledImageFolder = gc.get(f'resource/lookup?path=/collection/WSI DeID/Unfiled')
for i in gc.listItem(folderToImport['_id']):

    #### AAH SO THE UNFILED OPERATION IS A MOVE EVENT, NOT A COPY EVENT... SO I NEED TO FIRST COPY THEM TO UNFILED
    ### AND THEN RUN THIS COMMAND TO AVOID MOVING THE ORIGINAL ITEMS...
    ## Using a helper function to generate some random synthetic data
    #imageMeta = hlprs.generate_random_data(hlprs.syntheticData)

    imageMeta = df.loc[df.InputFileName==i['name']].to_dict(orient='record')[0]

    itemCopyToUnfiled = gc.post(f'item/{i["_id"]}/copy?folderId={unfiledImageFolder["_id"]}')
    gc.addMetadataToItem(itemCopyToUnfiled['_id'],{"deidUpload": imageMeta})

    #Will use the refill API endpoint to copy these images to the target directory, and also rename
#     newImageName = f'{imageMeta["PatientID"]}_{imageMeta["BLOCK"]}.{imageMeta["INDEX"]}_WSIDEID'
    newImageName = imageMeta['OutputFileName']

    newImagePath = f'/WSI DeID/AvailableToProcess/{imageMeta["SampleID"]}/{newImageName}.svs'
    ## TO DO: ADD File Path Extension programatiicaly
    ## See if an image with this name already exists in the AvailableToProcess folder.. this throws an error

    try:
        print("Trying to process item",newImageName,"which I think should go to",newImagePath)
        itemCopyOutput = gc.put(f'/wsi_deid/item/{itemCopyToUnfiled["_id"]}/action/refile?imageId={newImageName}&tokenId={imageMeta["SampleID"]}')

        deidMeta = {**itemCopyOutput["meta"]["deidUpload"], **imageMeta}
    #     print(deidMeta)
        gc.addMetadataToItem(itemCopyOutput["_id"],{"deidUpload":deidMeta})
        print("Adding a new item for",imageMeta['PatientID'])
    except:
        print("Item already exists...")


  imageMeta = df.loc[df.InputFileName==i['name']].to_dict(orient='record')[0]


Trying to process item copySlide-0033444_TI02289 4003.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI02289/copySlide-0033444_TI02289 4003.svs.deid.svs.svs
Item already exists...
Trying to process item copySlide-0033445_TI02263 4002.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI02263/copySlide-0033445_TI02263 4002.svs.deid.svs.svs
Item already exists...
Trying to process item copySlide-0033446_TI01978 4003.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI01978/copySlide-0033446_TI01978 4003.svs.deid.svs.svs
Item already exists...
Trying to process item copySlide-0033447_TI02678 4002.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI02678/copySlide-0033447_TI02678 4002.svs.deid.svs.svs
Item already exists...
Trying to process item copySlide-0033448_TI02740 4002.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI02740/copySlide-0033448_TI02740 4002.svs.deid.svs.svs
Item already 

Item already exists...
Trying to process item copySlide-0035120_TI06106 4002.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI06106/copySlide-0035120_TI06106 4002.svs.deid.svs.svs
Item already exists...
Trying to process item copySlide-0035121_TI06107 4000.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI06107/copySlide-0035121_TI06107 4000.svs.deid.svs.svs
Item already exists...
Trying to process item copySlide-0035148_TI06122 4001.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI06122/copySlide-0035148_TI06122 4001.svs.deid.svs.svs
Item already exists...
Trying to process item copySlide-0035149_TI00487 4003.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI00487/copySlide-0035149_TI00487 4003.svs.deid.svs.svs
Item already exists...
Trying to process item copySlide-0035150_TI01214 4002.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI01214/copySlide-0035150_TI01214 4002.svs.dei

Item already exists...
Trying to process item copySlide-0043791_TI06040 4001.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI06040/copySlide-0043791_TI06040 4001.svs.deid.svs.svs
Item already exists...
Trying to process item copySlide-0043917_TI06371 4002.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI06371/copySlide-0043917_TI06371 4002.svs.deid.svs.svs
Item already exists...
Trying to process item copySlide-0043918_TI06371 4003.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI06371/copySlide-0043918_TI06371 4003.svs.deid.svs.svs
Item already exists...
Trying to process item copySlide-0043919_TI06372 4001.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI06372/copySlide-0043919_TI06372 4001.svs.deid.svs.svs
Item already exists...
Trying to process item copySlide-0043920_TI06372 4002.svs.deid.svs which I think should go to /WSI DeID/AvailableToProcess/TI06372/copySlide-0043920_TI06372 4002.svs.dei

In [None]:
# for i in gc.listItem(folderToImport['_id']):
#     print(i['name'])
i['name']

In [None]:
# get(f'resource/lookup?path=/collection/WSI DeID/AvailableToProcess/Patient3/Patient3_5.1_WSIDEID.svs')
df.loc[df.InputFileName=='copySlide-0033444_TI02289 4003.svs'].to_dict(orient='record')[0]