# 1. Read the initial parquet file you have downloaded from prostate net platform

In [1]:
import pandas as pd
import os 

csv = "/home/nst/Desktop/Procancer/Datasets/PROSTATENET-LESIONS/Datasetv0.3/newparq.parquet"
df= pd.read_parquet(csv) # read parquet
ls_pats = os.listdir("/home/nst/Desktop/Procancer/Datasets/PROSTATENET-LESIONS/Datasetv0.3/Patients") # go to patients folder (patients are onto and contains studies-series-dicom files)
df_filt = df[df["patient_id"].isin(ls_pats)]  # keep only the patients u wish 


# 2. Keep necessary features for jose's docker

In [2]:
features = ["patient_id",
"study_uid",
"series_uid",
"number_of_images",
"diffusion_bvalue",
"diffusion_directionality",
"echo_time",
"echo_train_length",
"repetition_time",
"flip_angle",
"in_plane_phase_encoding_direction",
"mr_acquisition_type",
"acquisition_matrix",
"patient_position",
"reconstruction_matrix",
"magnetic_field_strength",
"manufacturer",
"manufacturer_model_name",
"number_of_phase_encoding_steps",
"percent_phase_field_of_view",
"pixel_bandwidth",
"receive_coil_name",
"transmit_coil_name",
"sar",
"scanning_sequence",
"sequence_variant",
"slice_thickness",
"software_versions",
"temporal_resolution",
"image_orientation_patient",
"image_type",
"scan_options",
"photometric_interpretation",
"spectrally_selected_suppression",
"inversion_time",
"pixel_spacing",
"number_of_echos",
"number_of_temporal_positions",
"modality",
"series_description"]

# 3. Filter the dataframe and replace black space with "-" as follows

In [3]:
df_filt = df_filt.loc[:,features]
df_filt = df_filt.replace(to_replace="", value="-")

# 3. Produce the .tsv file (Jose's Docker takes tsv files as input)
#### P.S. The tsv file needs to be names tsv_file.tsv otherwise docker will not work

In [4]:
df_filt.to_csv("/home/nst/Desktop/Dimitris_Deep_Learning/Lesion_Radiomics/tsv_file.tsv",sep="\t")

# 4. Pull the docker image from harbor. It needs authentication

In [None]:
! docker pull pcr.procancer-i.eu/metadata-classification/metadata-classification@sha256:d044e1739387aa5ec61c16144244577bb341ef5b09a0dbcfccde1fb72809d7a8

# 5. Run the docker image. Volume is the path that contains the tsv_file.tsv. Also at the end add `/data/tsv_file.tsv >results.csv` to extract the outputs in a csv

In [7]:
! docker run  -v "/home/nst/Desktop/Dimitris_Deep_Learning/Lesion_Radiomics":/data -it --rm 
'pcr.procancer-i.eu/metadata-classification/metadata-classification:tsv' /data/tsv_file.tsv >results.csv

# 6. Read the CSV file you produced from step 5 and set the columns

In [8]:
df = pd.read_csv("/home/nst/Desktop/Dimitris_Deep_Learning/Lesion_Radiomics/results.csv", 
                 names=["patient_id", "study_uid", "series_uid", "series_type", "series_type_heuristics"])
df

Unnamed: 0,patient_id,study_uid,series_uid,series_type,series_type_heuristics
0,PCa-224460163592177244880977586344168516060,1.3.6.1.4.1.58108.1.24125651050895909277174943...,1.3.6.1.4.1.58108.1.86528952413866730661368191...,DCE,DCE
1,PCa-322811708477387603513790092385211486514,1.3.6.1.4.1.58108.1.10574491948317486064417202...,1.3.6.1.4.1.58108.1.15196080904999324926032028...,OTHERS,OTHERS
2,PCa-128343693582576654198302883533313562452,1.3.6.1.4.1.58108.1.26462270923081528847454060...,1.3.6.1.4.1.58108.1.26844072371653350274060206...,OTHERS,OTHERS
3,PCa-34148494147237367619235184665025094132,1.3.6.1.4.1.58108.1.12582474876076936654284528...,1.3.6.1.4.1.58108.1.26114360012838052189904638...,OTHERS,OTHERS
4,PCa-43385374415423801322600884921397774294,1.3.6.1.4.1.58108.1.75509484004312114934226657...,1.3.6.1.4.1.58108.1.33297051103260127487400644...,DCE,DCE
...,...,...,...,...,...
981,PCa-193310775782366100721133747246861570224,1.3.6.1.4.1.58108.1.14776109988601235322337612...,1.3.6.1.4.1.58108.1.31154290060023308187736723...,DWI,DWI
982,PCa-196042475411156616056677980795637271534,1.3.6.1.4.1.58108.1.13121765398268351365499676...,1.3.6.1.4.1.58108.1.18321618761500318797447376...,ADC,ADC
983,PCa-44337912652508229499239407761343064689,1.3.6.1.4.1.58108.1.30020474920122346862583334...,1.3.6.1.4.1.58108.1.33548256302927534982775208...,ADC,ADC
984,PCa-326531208611654252353671329600896024008,1.3.6.1.4.1.58108.1.14224404972847540307144471...,1.3.6.1.4.1.58108.1.20687434097832528988778610...,DCE,DCE
