# 1. Read the initial parquet file you have downloaded from prostate net platform

In [59]:
import pandas as pd
import os 

csv = "/home/nst/Desktop/Procancer/Datasets/PROSTATENET-LESIONS/Datasetv0.3/newparq.parquet"
df= pd.read_parquet(csv) # read parquet
ls_pats = os.listdir("/home/nst/Desktop/Procancer/Datasets/PROSTATENET-LESIONS/Datasetv0.3/Patients") # go to patients folder (patients are onto and contains studies-series-dicom files)
df_filt = df[df["patient_id"].isin(ls_pats)]  # keep only the patients u wish 


# 2. Keep necessary features for jose's docker

In [None]:
features = ["patient_id",
"study_uid",
"series_uid",
"number_of_images",
"diffusion_bvalue",
"diffusion_directionality",
"echo_time",
"echo_train_length",
"repetition_time",
"flip_angle",
"in_plane_phase_encoding_direction",
"mr_acquisition_type",
"acquisition_matrix",
"patient_position",
"reconstruction_matrix",
"magnetic_field_strength",
"manufacturer",
"manufacturer_model_name",
"number_of_phase_encoding_steps",
"percent_phase_field_of_view",
"pixel_bandwidth",
"receive_coil_name",
"transmit_coil_name",
"sar",
"scanning_sequence",
"sequence_variant",
"slice_thickness",
"software_versions",
"temporal_resolution",
"image_orientation_patient",
"image_type",
"scan_options",
"photometric_interpretation",
"spectrally_selected_suppression",
"inversion_time",
"pixel_spacing",
"number_of_echos",
"number_of_temporal_positions",
"modality",
"series_description"]

# 3. Filter the dataframe and replace black space with "-" as follows

In [29]:
df_filt = df_filt.loc[:,features]
df_filt = df_filt.replace(to_replace="", value="-")

# 3. Produce the .tsv file (Jose's Docker takes tsv files as input)
#### P.S. The tsv file needs to be names tsv_file.tsv otherwise docker will not work

In [30]:
df_filt.to_csv("/home/nst/Desktop/Procancer/Datasets/PROSTATENET-LESIONS/Datasetv0.3/tsv_file.tsv",sep="\t")

# 4. Pull the docker image from harbor. It needs authentication

In [None]:
! docker pull pcr.procancer-i.eu/metadata-classification/metadata-classification@sha256:d044e1739387aa5ec61c16144244577bb341ef5b09a0dbcfccde1fb72809d7a8

# 5. Run the docker image. Volume is the path that contains the tsv_file.tsv. Also at the end add `/data/tsv_file.tsv >results.csv` to extract the outputs in a csv

In [54]:
! docker run  -v "/home/nst/Desktop/Dimitris_Deep_Learning/Lesion_Radiomics":/data -it --rm 'pcr.procancer-i.eu/metadata-classification/metadata-classification:tsv' /data/tsv_file.tsv >results.csv

# 6. Read the CSV file you produced from step 5 and set the columns

In [60]:
df = pd.read_csv("/home/nst/Desktop/Dimitris_Deep_Learning/Lesion_Radiomics/results.csv", names=["patient_id", "study_uid", "series_uid", "series_type", "series_type_heuristics"])
df

Unnamed: 0,patient_id,study_uid,series_uid,series_type,series_type_heuristics
0,PCa-281800825508578515144435818229752815516,1.3.6.1.4.1.58108.1.16256001161290267998269556...,1.3.6.1.4.1.58108.1.29364075866931413995773288...,OTHERS,OTHERS
1,PCa-135969383536126677583519258861681720456,1.3.6.1.4.1.58108.1.22651757944016428908315693...,1.3.6.1.4.1.58108.1.25095428435818490090531741...,DCE,DCE
2,PCa-303901718985542921512314477627180280896,1.3.6.1.4.1.58108.1.10607839931048332629189779...,1.3.6.1.4.1.58108.1.16677794868759826810731269...,OTHERS,T2
3,PCa-34148494147237367619235184665025094132,1.3.6.1.4.1.58108.1.12582474876076936654284528...,1.3.6.1.4.1.58108.1.30220701077754774736721399...,OTHERS,OTHERS
4,PCa-322811708477387603513790092385211486514,1.3.6.1.4.1.58108.1.10574491948317486064417202...,1.3.6.1.4.1.58108.1.84359734161400723681095382...,DCE,DCE
...,...,...,...,...,...
981,PCa-96799853096533800933501342060735272625,1.3.6.1.4.1.58108.1.23730826489225261695884119...,1.3.6.1.4.1.58108.1.51641186631464242703148643...,DWI,DWI
982,PCa-269375718944291990686518457660855358384,1.3.6.1.4.1.58108.1.75356085133065770878103654...,1.3.6.1.4.1.58108.1.24351937689592790156147560...,DWI,DWI
983,PCa-306053693953244553625602707659659138431,1.3.6.1.4.1.58108.1.25779762579249755756790059...,1.3.6.1.4.1.58108.1.30196138510031753827238274...,OTHERS,OTHERS
984,PCa-205409908134605906159119137085095268810,1.3.6.1.4.1.58108.1.93454360929944515045537574...,1.3.6.1.4.1.58108.1.10974175830493171774281547...,OTHERS,OTHERS
