In [1]:
from typing import Counter

import h5py
# import block
import h5py as h5
import numpy as np
import pandas as pd
import dask
import dask.array as da


In [2]:
# f = h5.File('hdf5_TCGAFFPE_LUADLUSC_5x_60pc_he_complete_lungsubtype_survival.h5', 'r')
f = h5py.File('TCGA_Lung.h5', 'r')
list(f.keys())

['img_h_latent',
 'img_z_latent',
 'indexes',
 'labels',
 'luad',
 'original_set',
 'os_event_data',
 'os_event_ind',
 'patterns',
 'samples',
 'slides',
 'tiles']

In [3]:
f['img_z_latent'][0:5]

array([[ 0.60399085,  0.6154265 ,  0.51997286,  0.7983581 ,  0.8543468 ,
         1.6198976 , -0.08143081, -0.5076853 , -0.58085173, -0.04999867,
        -0.27874815, -1.3209368 ,  0.44735205, -0.32292035,  0.78014284,
        -0.4687674 ,  0.5048694 , -0.8251408 , -0.6559142 , -0.64013517,
        -0.17248419,  0.8352538 ,  0.1853057 ,  0.97889704, -1.1805491 ,
         0.2166574 ,  0.32337976, -0.36513782, -0.10457937,  1.0557078 ,
        -0.0422553 , -0.94248337,  0.656888  , -0.17325932, -0.8991051 ,
         0.5922995 , -0.355088  , -0.32703495,  0.5538323 , -0.5677034 ,
        -0.7032062 ,  0.6093278 , -0.10199608,  0.86537015,  0.46646714,
         0.94357806, -0.55080205,  0.46549016,  0.01579239, -0.21778792,
        -0.04497945,  1.0489277 , -0.4732653 , -0.2026035 ,  1.541997  ,
        -0.5442506 , -1.2489289 ,  0.14919099, -1.2776375 ,  0.3791314 ,
        -0.08529247,  1.0572771 , -0.6917053 ,  0.5638212 ,  0.4198172 ,
        -1.0549508 , -1.41478   , -0.98353535,  0.1

In [4]:
f['img_z_latent'][0].shape

(128,)

### Loading h5 file with Dask

Files saved with h5py are not compatible with pandas so we need dask as an in

In [5]:
# loading z vector as dask array
z_data = f['img_z_latent']
z_dask_array = da.from_array(z_data)
z_np = z_dask_array.compute()
z_np.shape

(840979, 128)

In [6]:
slides_data = f['slides']
slides_dask_array = da.from_array(slides_data)
slides_np = slides_dask_array.compute()
slides_np[:5]

array([b'TCGA-73-4677-01Z-00-DX1', b'TCGA-50-5045-01Z-00-DX1',
       b'TCGA-69-7765-01Z-00-DX1', b'TCGA-69-7765-01Z-00-DX1',
       b'TCGA-73-4677-01Z-00-DX1'], dtype='|S23')

In [7]:
tiles_data = f['tiles']
slides_dask_array = da.from_array(tiles_data)
tiles_np = slides_dask_array.compute()
tiles_np[:5]

array([b'41_27.jpeg', b'18_22.jpeg', b'33_15.jpeg', b'34_13.jpeg',
       b'23_14.jpeg'], dtype='|S16')

In [8]:
byte_string_concatenate = np.vectorize(lambda x, y: x.decode('utf-8') + '_' + y.decode('utf-8'))
full_names_np = byte_string_concatenate(slides_np, tiles_np)

In [9]:
hdf5_df_dict = {'full_names': full_names_np,
                'slides': slides_np,
                'tiles_np': tiles_np}
hdf5_df = pd.DataFrame(hdf5_df_dict)
z_df = pd.DataFrame(z_np)

lung_dataframe = hdf5_df.join(z_df)
lung_dataframe.drop_duplicates(subset='full_names', keep=False, inplace=True)
lung_dataframe.head(5)

Unnamed: 0,full_names,slides,tiles_np,0,1,2,3,4,5,6,...,118,119,120,121,122,123,124,125,126,127
0,TCGA-73-4677-01Z-00-DX1_41_27.jpeg,b'TCGA-73-4677-01Z-00-DX1',b'41_27.jpeg',0.603991,0.615426,0.519973,0.798358,0.854347,1.619898,-0.081431,...,-0.027439,0.659666,0.938265,-0.21967,0.57999,-0.868479,0.820414,-0.292379,0.304265,1.051329
1,TCGA-50-5045-01Z-00-DX1_18_22.jpeg,b'TCGA-50-5045-01Z-00-DX1',b'18_22.jpeg',0.007933,-0.653504,-0.325921,0.595971,0.656198,0.723254,0.769134,...,0.359725,0.889286,-0.304759,0.343273,0.571918,0.257411,-0.636936,-0.786946,-0.106007,-0.075794
2,TCGA-69-7765-01Z-00-DX1_33_15.jpeg,b'TCGA-69-7765-01Z-00-DX1',b'33_15.jpeg',-0.210125,-0.088107,1.24442,-0.183467,-0.112259,0.90779,-0.09075,...,0.037885,-0.278923,-0.135324,-0.113194,-0.093982,-0.876498,-1.21537,-0.637309,-1.313928,-0.418625
3,TCGA-69-7765-01Z-00-DX1_34_13.jpeg,b'TCGA-69-7765-01Z-00-DX1',b'34_13.jpeg',-0.692818,-0.289884,0.331762,-0.644696,0.088126,-0.24039,-0.326105,...,-1.279986,-1.310433,0.644736,-0.064769,-0.054209,0.133795,0.070039,0.72199,-0.971647,-0.151334
4,TCGA-73-4677-01Z-00-DX1_23_14.jpeg,b'TCGA-73-4677-01Z-00-DX1',b'23_14.jpeg',-1.449646,0.472102,-0.691184,0.487814,1.070542,0.808109,-0.907713,...,0.747186,-0.331451,-0.279906,0.643118,-0.964694,0.212125,-0.09931,-0.812816,-1.04329,0.128322


In [10]:
lung_dataframe.shape

(840743, 131)

In [11]:
lung_dataframe[lung_dataframe['full_names'].duplicated(False)]

Unnamed: 0,full_names,slides,tiles_np,0,1,2,3,4,5,6,...,118,119,120,121,122,123,124,125,126,127


### Getting Data from the folds
This is to get the leiden cluster for the tile pictures


In [12]:
# train_df = pd.read_csv('TCGAFFPE_LUADLUSC_5x_60pc_he_complete_lungsubtype_survival_leiden_2p0__fold4.csv')
# test_df = pd.read_csv('TCGAFFPE_LUADLUSC_5x_60pc_he_complete_lungsubtype_survival_leiden_2p0__fold4_test.csv')
# validate_df = pd.read_csv('TCGAFFPE_LUADLUSC_5x_60pc_he_complete_lungsubtype_survival_leiden_2p0__fold4_valid.csv')

train_df = pd.read_csv('TCGAFFPE_LUADLUSC_5x_60pc_he_complete_lungsubtype_survival_filtered_leiden_2p0__fold0.csv')
test_df = pd.read_csv('TCGAFFPE_LUADLUSC_5x_60pc_he_complete_lungsubtype_survival_filtered_leiden_2p0__fold0_test.csv')

In [13]:
# leiden_df = pd.concat([train_df, test_df, validate_df])
leiden_df = pd.concat([train_df, test_df], axis=0, ignore_index=True)
string_concatenate = np.vectorize(lambda x, y: x + '_' + y)
leiden_full_name_np = string_concatenate(leiden_df['slides'], leiden_df['tiles'])
leiden_full_name_df = pd.DataFrame({'full_names': leiden_full_name_np})
leiden_results_df = leiden_df.join(leiden_full_name_df)
leiden_results_df.head(10)

Unnamed: 0,indexes,labels,luad,original_set,os_event_data,os_event_ind,patterns,slides,tiles,leiden_2.0,samples,full_names
0,1,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX1,18_22.jpeg,22,TCGA-50-5045,TCGA-50-5045-01Z-00-DX1_18_22.jpeg
1,2,0.0,1,train,5.424658,0.0,TCGA-LUAD_not_reported,TCGA-69-7765-01Z-00-DX1,33_15.jpeg,0,TCGA-69-7765,TCGA-69-7765-01Z-00-DX1_33_15.jpeg
2,3,0.0,1,train,5.424658,0.0,TCGA-LUAD_not_reported,TCGA-69-7765-01Z-00-DX1,34_13.jpeg,7,TCGA-69-7765,TCGA-69-7765-01Z-00-DX1_34_13.jpeg
3,7,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX2,51_26.jpeg,13,TCGA-50-5045,TCGA-50-5045-01Z-00-DX2_51_26.jpeg
4,12,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX1,35_13.jpeg,11,TCGA-50-5045,TCGA-50-5045-01Z-00-DX1_35_13.jpeg
5,18,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX2,10_18.jpeg,17,TCGA-50-5045,TCGA-50-5045-01Z-00-DX2_10_18.jpeg
6,19,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX2,48_26.jpeg,9,TCGA-50-5045,TCGA-50-5045-01Z-00-DX2_48_26.jpeg
7,20,0.0,1,train,5.424658,0.0,TCGA-LUAD_not_reported,TCGA-69-7765-01Z-00-DX1,33_29.jpeg,0,TCGA-69-7765,TCGA-69-7765-01Z-00-DX1_33_29.jpeg
8,22,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX2,33_22.jpeg,22,TCGA-50-5045,TCGA-50-5045-01Z-00-DX2_33_22.jpeg
9,23,0.0,1,train,5.424658,0.0,TCGA-LUAD_not_reported,TCGA-69-7765-01Z-00-DX1,22_35.jpeg,0,TCGA-69-7765,TCGA-69-7765-01Z-00-DX1_22_35.jpeg


In [14]:
leiden_results_df['luad'].unique().shape

(1,)

In [15]:
leiden_results_df.shape

(405948, 12)

### Append the textual description

In [16]:
consensus_df = pd.read_csv('consensus.csv')
consensus_df

Unnamed: 0,HPC,consensus
0,0,"Acinar pattern adenocarcinoma, with inflamed irregular acini and tissue destruction."
1,1,Inflamed compact stroma and sparse tumor. Sheets of inflammation with destruction.
2,2,Compressed normal lung and haemorrhaging smaller airways or vessel.
3,3,Coarse fibrillar stroma.
4,4,Open normal lung with interluminal debris or small vessels.
5,5,Solid pattern adenocarcinoma with stromal TILs. Inflamed stroma. Big pleomorphic nuclei.
6,6,Adenocarcinoma with solid and sieve-like complex cribriform appearance.
7,7,Stroma-rich solid.
8,8,"Acinar pattern adenocarcinoma, showing angulated columnar acini with multiple small branched lumina."
9,9,Diverse inflamed stroma with sparse malignant epithelium.


In [17]:
def append_textual(df, consensus):
    vector_get_consensus = np.vectorize(lambda leiden: consensus[int(leiden)])
    consensus_vector = vector_get_consensus(df['leiden_2.0'])
    consensus_dataframe = pd.DataFrame({'consensus': consensus_vector})
    return df.join(consensus_dataframe)

In [18]:
consensus_vec = consensus_df['consensus'].to_numpy()
dataframe_with_consensus = append_textual(leiden_results_df, consensus_vec)
dataframe_with_consensus.drop_duplicates(subset='full_names', keep=False, inplace=True)
dataframe_with_consensus.head(10)

Unnamed: 0,indexes,labels,luad,original_set,os_event_data,os_event_ind,patterns,slides,tiles,leiden_2.0,samples,full_names,consensus
0,1,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX1,18_22.jpeg,22,TCGA-50-5045,TCGA-50-5045-01Z-00-DX1_18_22.jpeg,Large vessel lumina.
1,2,0.0,1,train,5.424658,0.0,TCGA-LUAD_not_reported,TCGA-69-7765-01Z-00-DX1,33_15.jpeg,0,TCGA-69-7765,TCGA-69-7765-01Z-00-DX1_33_15.jpeg,"Acinar pattern adenocarcinoma, with inflamed irregular acini and tissue destruction."
2,3,0.0,1,train,5.424658,0.0,TCGA-LUAD_not_reported,TCGA-69-7765-01Z-00-DX1,34_13.jpeg,7,TCGA-69-7765,TCGA-69-7765-01Z-00-DX1_34_13.jpeg,Stroma-rich solid.
3,7,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX2,51_26.jpeg,13,TCGA-50-5045,TCGA-50-5045-01Z-00-DX2_51_26.jpeg,Normal open lung with mild interstitial thickening and inflammation.
4,12,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX1,35_13.jpeg,11,TCGA-50-5045,TCGA-50-5045-01Z-00-DX1_35_13.jpeg,"Adenocarcinoma, showing discohesive solid and compressed lumina or linear clefts."
5,18,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX2,10_18.jpeg,17,TCGA-50-5045,TCGA-50-5045-01Z-00-DX2_10_18.jpeg,Surface and margin artefacts.
6,19,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX2,48_26.jpeg,9,TCGA-50-5045,TCGA-50-5045-01Z-00-DX2_48_26.jpeg,Diverse inflamed stroma with sparse malignant epithelium.
7,20,0.0,1,train,5.424658,0.0,TCGA-LUAD_not_reported,TCGA-69-7765-01Z-00-DX1,33_29.jpeg,0,TCGA-69-7765,TCGA-69-7765-01Z-00-DX1_33_29.jpeg,"Acinar pattern adenocarcinoma, with inflamed irregular acini and tissue destruction."
8,22,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX2,33_22.jpeg,22,TCGA-50-5045,TCGA-50-5045-01Z-00-DX2_33_22.jpeg,Large vessel lumina.
9,23,0.0,1,train,5.424658,0.0,TCGA-LUAD_not_reported,TCGA-69-7765-01Z-00-DX1,22_35.jpeg,0,TCGA-69-7765,TCGA-69-7765-01Z-00-DX1_22_35.jpeg,"Acinar pattern adenocarcinoma, with inflamed irregular acini and tissue destruction."


In [19]:
dataframe_with_consensus.shape

(405712, 13)

In [20]:
dataframe_with_consensus[dataframe_with_consensus['full_names'].duplicated(False)]

Unnamed: 0,indexes,labels,luad,original_set,os_event_data,os_event_ind,patterns,slides,tiles,leiden_2.0,samples,full_names,consensus


### Combine Tables

Combine the z128 vector table with the results

In [21]:
def get_z_vector(z_df, full_name):
    z_match = z_df[z_df['full_names'] == full_name]
    return z_match.to_numpy().squeeze()[3:].astype(np.float32)

In [22]:
get_z_vector(lung_dataframe, 'TCGA-50-5045-01Z-00-DX1_18_22.jpeg')

array([ 0.00793278, -0.6535041 , -0.32592073,  0.5959711 ,  0.6561984 ,
        0.72325367,  0.7691342 , -1.0305343 , -0.2527991 , -0.71954805,
        0.18113293, -0.52599835, -0.20067441, -0.29770222,  0.52636486,
       -0.25883275,  0.3601961 , -0.20289223, -0.18432501,  0.02314237,
       -0.50373554,  0.12701714, -0.36622202, -0.45415965,  0.31850484,
        0.3967717 ,  0.7098042 ,  0.05477478,  0.01881803,  0.17783704,
       -0.16318966, -0.29783416,  0.23743758, -0.0657612 , -0.30754843,
        0.07342377,  0.1687861 ,  0.34562567,  0.4063647 , -0.3809925 ,
        0.28917754, -0.593021  , -0.11398333,  0.05506039, -0.71482295,
        0.09188692,  0.35405225, -0.6085819 ,  0.30212772, -0.6940665 ,
        0.20554326,  0.24928072, -0.24627034,  0.94119084,  0.0520241 ,
       -0.93536055,  0.6756928 , -0.22274317, -0.44869697,  0.51905966,
       -1.083598  ,  0.70078456, -0.17058548, -0.02530888,  0.19538344,
       -0.18474415,  0.13062754, -0.21208975,  0.2038778 ,  0.58

In [23]:
result = pd.merge(dataframe_with_consensus, lung_dataframe, on='full_names', how='left')
result.drop('slides_y', axis=1, inplace=True)
result.rename(columns={'slides_x': 'slides'}, inplace=True)
result.shape

(405712, 142)

In [24]:
result.head(5)

Unnamed: 0,indexes,labels,luad,original_set,os_event_data,os_event_ind,patterns,slides,tiles,leiden_2.0,...,118,119,120,121,122,123,124,125,126,127
0,1,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX1,18_22.jpeg,22,...,0.359725,0.889286,-0.304759,0.343273,0.571918,0.257411,-0.636936,-0.786946,-0.106007,-0.075794
1,2,0.0,1,train,5.424658,0.0,TCGA-LUAD_not_reported,TCGA-69-7765-01Z-00-DX1,33_15.jpeg,0,...,0.037885,-0.278923,-0.135324,-0.113194,-0.093982,-0.876498,-1.21537,-0.637309,-1.313928,-0.418625
2,3,0.0,1,train,5.424658,0.0,TCGA-LUAD_not_reported,TCGA-69-7765-01Z-00-DX1,34_13.jpeg,7,...,-1.279986,-1.310433,0.644736,-0.064769,-0.054209,0.133795,0.070039,0.72199,-0.971647,-0.151334
3,7,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX2,51_26.jpeg,13,...,1.103752,0.310443,-0.534691,0.383871,0.505146,-0.469839,-0.259364,0.455547,-0.976671,-1.056874
4,12,0.0,1,train,71.473973,1.0,TCGA-LUAD_not_reported,TCGA-50-5045-01Z-00-DX1,35_13.jpeg,11,...,-1.54697,-0.023987,-0.670502,0.267346,-1.057527,0.889611,0.627989,0.333832,-1.350921,-1.661967


In [25]:
# this will save to file
result.to_csv('TCGA_Lung_consensus.csv', index=False)

## Sanity Check
These are a list of sanity checks to see that the data is processed properly

In [26]:
# TCGA-80-5608-01Z-00-DX1
# mostly leiden 19

result[result['slides'] == 'TCGA-80-5608-01Z-00-DX1']

Unnamed: 0,indexes,labels,luad,original_set,os_event_data,os_event_ind,patterns,slides,tiles,leiden_2.0,...,118,119,120,121,122,123,124,125,126,127
2964,5254,1.0,1,train,93.106849,0.0,TCGA-LUAD_stage_i,TCGA-80-5608-01Z-00-DX1,16_7.jpeg,19,...,1.281739,0.000694,-0.201676,-1.481620,0.837781,1.734455,0.929274,-0.322783,-0.059566,-0.633908
3020,5336,1.0,1,train,93.106849,0.0,TCGA-LUAD_stage_i,TCGA-80-5608-01Z-00-DX1,8_14.jpeg,19,...,1.398777,-0.627009,-0.448036,-0.860154,1.123078,1.630756,0.845251,-1.908251,-1.405845,0.052567
3435,5921,1.0,1,train,93.106849,0.0,TCGA-LUAD_stage_i,TCGA-80-5608-01Z-00-DX1,6_8.jpeg,13,...,0.219224,-0.602812,-0.461645,0.069590,-0.835767,0.470968,0.148806,-0.371321,-0.023745,-0.421051
3505,6019,1.0,1,train,93.106849,0.0,TCGA-LUAD_stage_i,TCGA-80-5608-01Z-00-DX1,8_9.jpeg,19,...,0.581324,-0.410427,-0.001796,-1.094775,0.790222,1.543434,0.834864,-0.722758,-0.573386,-0.225494
3614,6169,1.0,1,train,93.106849,0.0,TCGA-LUAD_stage_i,TCGA-80-5608-01Z-00-DX1,20_10.jpeg,7,...,-0.782848,-0.555409,-1.018653,0.212370,0.236269,-0.500635,0.225169,0.487915,-0.949484,0.149573
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87217,122164,1.0,1,train,93.106849,0.0,TCGA-LUAD_stage_i,TCGA-80-5608-01Z-00-DX1,15_13.jpeg,40,...,0.130470,0.667965,0.165579,-0.845013,0.070928,0.809765,0.776071,-0.213079,-1.147931,-0.027619
87297,122269,1.0,1,train,93.106849,0.0,TCGA-LUAD_stage_i,TCGA-80-5608-01Z-00-DX1,11_14.jpeg,19,...,1.031519,-0.491388,-0.239145,-0.349908,0.679604,1.405829,0.721511,-1.874033,-1.799751,0.382094
87704,122828,1.0,1,train,93.106849,0.0,TCGA-LUAD_stage_i,TCGA-80-5608-01Z-00-DX1,18_9.jpeg,19,...,1.363414,-0.398824,-0.329428,-1.283460,0.874684,1.702178,0.827241,-0.876173,-0.647394,-0.499443
89311,125029,1.0,1,train,93.106849,0.0,TCGA-LUAD_stage_i,TCGA-80-5608-01Z-00-DX1,20_7.jpeg,19,...,1.431592,0.478648,0.047792,-1.390995,1.480816,0.511291,0.850132,-1.028597,0.797229,-0.855126


In [27]:
# TCGA-38-4625-01Z-00-DX1
# mostly leiden 5, some leiden 6 and 25
result[result['slides'] == 'TCGA-38-4625-01Z-00-DX1']

Unnamed: 0,indexes,labels,luad,original_set,os_event_data,os_event_ind,patterns,slides,tiles,leiden_2.0,...,118,119,120,121,122,123,124,125,126,127
1893,3784,1.0,1,train,97.742466,0.0,TCGA-LUAD_stage_i,TCGA-38-4625-01Z-00-DX1,2_4.jpeg,13,...,0.628150,-0.314795,-0.263594,-0.356741,-0.920274,0.729529,-0.142330,-0.492465,-0.224959,-0.513619
2012,3947,1.0,1,train,97.742466,0.0,TCGA-LUAD_stage_i,TCGA-38-4625-01Z-00-DX1,15_8.jpeg,5,...,-1.564883,0.593221,-1.271260,1.631086,0.684606,-0.491828,0.051472,-0.165495,1.282973,-0.797713
2140,4131,1.0,1,train,97.742466,0.0,TCGA-LUAD_stage_i,TCGA-38-4625-01Z-00-DX1,37_15.jpeg,25,...,-0.525587,-0.932824,-0.775166,-0.010336,0.493963,0.969696,1.069161,-1.411157,0.758884,-1.086590
2270,4307,1.0,1,train,97.742466,0.0,TCGA-LUAD_stage_i,TCGA-38-4625-01Z-00-DX1,35_5.jpeg,6,...,0.912954,0.130123,-1.036927,0.108388,0.170619,0.606051,-0.044601,0.732815,0.242777,-0.342200
2317,4366,1.0,1,train,97.742466,0.0,TCGA-LUAD_stage_i,TCGA-38-4625-01Z-00-DX1,6_6.jpeg,5,...,-1.587105,1.618329,-0.929874,2.014560,1.347194,-0.792479,-0.724644,0.307020,1.744031,-0.764506
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88911,124502,1.0,1,train,97.742466,0.0,TCGA-LUAD_stage_i,TCGA-38-4625-01Z-00-DX1,47_12.jpeg,6,...,-0.573727,0.891030,-0.093931,0.582266,1.243723,-0.372285,-1.108236,-0.738437,-0.045818,0.072452
88959,124563,1.0,1,train,97.742466,0.0,TCGA-LUAD_stage_i,TCGA-38-4625-01Z-00-DX1,42_6.jpeg,5,...,-1.354000,0.465781,-1.168528,1.051054,0.359999,0.012532,0.345578,-0.719549,0.887861,-0.738973
89000,124615,1.0,1,train,97.742466,0.0,TCGA-LUAD_stage_i,TCGA-38-4625-01Z-00-DX1,8_7.jpeg,5,...,-1.409219,1.349723,-0.721880,1.724379,0.931630,-0.446902,-0.245434,0.026176,1.348269,-0.734245
89489,125276,1.0,1,train,97.742466,0.0,TCGA-LUAD_stage_i,TCGA-38-4625-01Z-00-DX1,36_17.jpeg,25,...,-1.302337,-0.767388,-1.514202,-0.042128,0.544000,0.921041,0.846493,-1.437672,0.676015,-1.095347
