In [1]:
import pandas as pd
from pathlib import Path
import tensorflow as tf
import os

mimic_cxr_path = Path('/scratch/physionet.org/files/mimic-cxr-jpg/2.0.0')


metadata = pd.read_csv(mimic_cxr_path / 'mimic-cxr-2.0.0-metadata.csv.gz', header=0, sep=',')
dfsplit = pd.read_csv(mimic_cxr_path / 'mimic-cxr-2.0.0-split.csv.gz', header=0, sep=',')
dfsplit = dfsplit.drop("subject_id", axis = 1)
dfchex = pd.read_csv(mimic_cxr_path / 'mimic-cxr-2.0.0-chexpert.csv.gz', header=0, sep=',')
df = pd.merge(dfsplit, dfchex, on='study_id', how='inner')
#df = df.drop(["study_id", "split", "subject_id"], axis = 1)


In [2]:
dfmetadata = pd.merge(df, metadata, on='dicom_id', how='inner')
dfmetadata

Unnamed: 0,dicom_id,study_id_x,split,subject_id_x,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,...,study_id_y,PerformedProcedureStepDescription,ViewPosition,Rows,Columns,StudyDate,StudyTime,ProcedureCodeSequence_CodeMeaning,ViewCodeSequence_CodeMeaning,PatientOrientationCodeSequence_CodeMeaning
0,02aa804e-bde0afdd-112c0b34-7bc16630-4e384014,50414267,train,10000032,,,,,,,...,50414267,CHEST (PA AND LAT),PA,3056,2544,21800506,213014.531,CHEST (PA AND LAT),postero-anterior,Erect
1,174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962,50414267,train,10000032,,,,,,,...,50414267,CHEST (PA AND LAT),LATERAL,3056,2544,21800506,213014.531,CHEST (PA AND LAT),lateral,Erect
2,2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab,53189527,train,10000032,,,,,,,...,53189527,CHEST (PA AND LAT),PA,3056,2544,21800626,165500.312,CHEST (PA AND LAT),postero-anterior,Erect
3,e084de3b-be89b11e-20fe3f9f-9c8d8dfe-4cfd202c,53189527,train,10000032,,,,,,,...,53189527,CHEST (PA AND LAT),LATERAL,3056,2544,21800626,165500.312,CHEST (PA AND LAT),lateral,Erect
4,68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714,53911762,train,10000032,,,,,,,...,53911762,CHEST (PORTABLE AP),AP,2705,2539,21800723,80556.875,CHEST (PORTABLE AP),antero-posterior,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377090,428e2c18-5721d8f3-35a05001-36f3d080-9053b83c,57132437,train,19999733,,,,,,,...,57132437,CHEST (PA AND LAT),PA,3056,2544,21520708,224550.171,CHEST (PA AND LAT),postero-anterior,Erect
377091,58c403aa-35ff8bd9-73e39f54-8dc9cc5d-e0ec3fa9,57132437,train,19999733,,,,,,,...,57132437,CHEST (PA AND LAT),LATERAL,3056,2544,21520708,224550.171,CHEST (PA AND LAT),lateral,Erect
377092,58766883-376a15ce-3b323a28-6af950a0-16b793bd,55368167,train,19999987,1.0,-1.0,,,,,...,55368167,CHEST (PORTABLE AP),AP,2544,3056,21451104,51448.218,CHEST (PORTABLE AP),antero-posterior,Erect
377093,7ba273af-3d290f8d-e28d0ab4-484b7a86-7fc12b08,58621812,train,19999987,1.0,,,,,,...,58621812,CHEST (PORTABLE AP),AP,3056,2544,21451102,202809.234,CHEST (PORTABLE AP),antero-posterior,Erect


In [3]:

# initialize view with a mapping from ViewPosition
VIEW_MAP = {
    'AP': 'frontal',
    'PA': 'frontal',
    'LATERAL': 'lateral',
    'LL': 'lateral',
    'LPO': 'other',
    'RAO': 'other',
    'RPO': 'other',
    'LAO': 'other',
    # the below are overwritten in some instances by manual review
    'AP AXIAL': 'other',
    'XTABLE LATERAL': 'other',
    'AP LLD': 'other',
    'PA LLD': 'other',
    'L5 S1': 'other',
    'SWIMMERS': 'other',
    'AP RLD': 'other',
    'PA RLD': 'other',
}

metadata['view'] = metadata['ViewPosition'].map(VIEW_MAP)

#--------------------------------------------will need to work on this part---------------------------------------
# for 'other' category, currently many of these are simply unknown
# so try to update them with acq device map
ADPD_MAP = {
    'CHEST, LATERAL': 'lateral',
    'CHEST, PA': 'frontal',
    # manually checked 100 records, below is always frontal
    'CHEST, PORTABLE': 'frontal',
    'CHEST, PA X-WISE': 'frontal',
    'CHEST, AP (GRID)': 'frontal',
    'CHEST LAT': 'lateral',
    'CHEST PA': 'frontal',
    'CHEST, AP NON-GRID': 'frontal',
    'CHEST AP NON GRID': 'frontal',
    'CHEST PA X-WISE': 'frontal',
    'CHEST AP GRID': 'frontal',
    'CHEST, PORTABLE X-WISE': 'other',
    # below have < 25 samples each
    'CHEST PORT': 'frontal',
    'CHEST PORT X-WISE': 'frontal',
    # manually classified below
    'SHOULDER': 'other',
    'CHEST, PEDI (4-10 YRS)': 'other',
    'LOWER RIBS': 'other',
    'CHEST, DECUB.': 'other',
    'ABDOMEN, PORTABLE': 'other',
    'UPPER RIBS': 'frontal',
    'STERNUM, LATERAL': 'lateral',
    'KNEE, AP/OBL': 'other',
    'STERNUM, PA/OBL.': 'other',
    'CLAVICLE/ AC JOINTS': 'other',
    'ABDOMEN,GENERAL': 'other',
    'LOWER RIB': 'other',
    'SCOLIOSIS AP': 'frontal'
}

#------------------------------------------------------------------------------------------


dfmetadata = pd.merge(df, metadata[['dicom_id','view']], on='dicom_id', how='inner')

dfmetadata
#c = 'AcquisitionDeviceProcessingDescription'
#idx = (df_metadata[c].notnull()) & idxUpdate
#df_metadata.loc[idx, 'view'] = df_metadata.loc[idx, c].map(ADPD_MAP)

Unnamed: 0,dicom_id,study_id,split,subject_id,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices,view
0,02aa804e-bde0afdd-112c0b34-7bc16630-4e384014,50414267,train,10000032,,,,,,,,,1.0,,,,,,frontal
1,174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962,50414267,train,10000032,,,,,,,,,1.0,,,,,,lateral
2,2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab,53189527,train,10000032,,,,,,,,,1.0,,,,,,frontal
3,e084de3b-be89b11e-20fe3f9f-9c8d8dfe-4cfd202c,53189527,train,10000032,,,,,,,,,1.0,,,,,,lateral
4,68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714,53911762,train,10000032,,,,,,,,,1.0,,,,,,frontal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377090,428e2c18-5721d8f3-35a05001-36f3d080-9053b83c,57132437,train,19999733,,,,,,,,,1.0,,,,,,frontal
377091,58c403aa-35ff8bd9-73e39f54-8dc9cc5d-e0ec3fa9,57132437,train,19999733,,,,,,,,,1.0,,,,,,lateral
377092,58766883-376a15ce-3b323a28-6af950a0-16b793bd,55368167,train,19999987,1.0,-1.0,,,,,0.0,,,0.0,,,0.0,,frontal
377093,7ba273af-3d290f8d-e28d0ab4-484b7a86-7fc12b08,58621812,train,19999987,1.0,,,,,,,,,,,,,1.0,frontal


In [4]:
dfmetadata.groupby(['view','split']).count()


Unnamed: 0_level_0,Unnamed: 1_level_0,dicom_id,study_id,subject_id,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
view,split,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
frontal,test,3403,3403,3403,979,1193,411,1331,437,96,151,1232,594,1567,78,972,877,1250
frontal,train,237962,237962,237962,59862,68666,23946,67643,22481,6221,8425,60153,79886,89241,2900,59539,56053,75292
frontal,validate,1959,1959,1959,510,601,198,592,209,27,92,460,637,760,22,455,467,676
lateral,test,1431,1431,1431,282,363,193,461,112,62,83,436,338,528,44,489,187,185
lateral,train,115642,115642,115642,18465,21394,9254,21874,6408,3604,4843,21224,55580,29777,1715,29797,13205,9666
lateral,validate,908,908,908,154,181,67,170,66,22,35,135,457,248,9,204,121,82
other,train,21,21,21,3,4,1,1,2,0,4,10,3,3,0,2,7,7


In [5]:
frontal_views = metadata[metadata.view.eq('frontal')]
lateral_views = metadata[metadata.view.eq('lateral')]
dfmetadata = pd.merge(df, lateral_views[['dicom_id','view']], on='dicom_id', how='inner')
dfmetadata

Unnamed: 0,dicom_id,study_id,split,subject_id,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices,view
0,174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962,50414267,train,10000032,,,,,,,,,1.0,,,,,,lateral
1,e084de3b-be89b11e-20fe3f9f-9c8d8dfe-4cfd202c,53189527,train,10000032,,,,,,,,,1.0,,,,,,lateral
2,b79e55c3-735ce5ac-64412506-cdc9ea79-f1af521f,57375967,train,10000764,,,1.0,,,,,,,,,-1.0,,,lateral
3,dcfeeac4-1597e318-d0e6736a-8b2c2238-47ac3f1b,57375967,train,10000764,,,1.0,,,,,,,,,-1.0,,,lateral
4,0c4eb1e1-b801903c-bcebe8a4-3da9cd3c-3b94a27c,50771383,train,10000898,,,,,,,,,1.0,,,,,,lateral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117976,0e0fb868-b64dabb9-1856dfb0-fc116903-be2629df,52434977,train,19999068,,,,,,,,,1.0,,,,,,lateral
117977,3aa928af-77a1238e-7cb9b3cd-604ee1dd-f6fa9788,52434977,train,19999068,,,,,,,,,1.0,,,,,,lateral
117978,bd4eb73d-09c65a7e-797c197f-ae864491-8d258918,50847545,train,19999156,,,,,,,,,1.0,,,,,,lateral
117979,9d1b4abe-52d55ff8-25dd0af8-bae63de6-0f2e36e5,53282218,train,19999287,,0.0,,,,,-1.0,1.0,,0.0,,,,,lateral


In [6]:
dicom_id = []
file_path = []
cfiles = 0

img_dir = '/scratch/physionet.org/files/mimic-cxr-jpg/2.0.0/files/'
for root, dirs, files in os.walk(img_dir):
    for name in files:
#        if cfiles == 5000:
#            break
        if name.endswith((".jpeg")):
            image_name = os.path.join(root,name)
            file_path.append(image_name)
            dicom_id.append(name.rsplit('.', 1)[0])
path_and_id = list(zip(file_path,dicom_id))
df_file_path = pd.DataFrame(path_and_id, columns = ['file_path', 'dicom_id']) 
df_final = pd.merge(df_file_path, df, on='dicom_id', how='inner')

df_final = df_final.replace(1, 3) #positive
df_final = df_final.replace(-1, 2) #uncertain
df_final = df_final.replace(0, 1) #negative
df_final= df_final.fillna(0) #not mentioned

pd.DataFrame(df_final).to_csv("/home/jyarnal1/all_file_path.csv", index=None)


all_files = pd.read_csv('/home/jyarnal1/all_file_path.csv', header=0, sep=',')

In [7]:
all_files


Unnamed: 0,file_path,dicom_id,study_id,split,subject_id,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
0,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,90cbb6b8-e3d03d1a-722061ba-421d4138-03339ccf,53947845,train,10356999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
1,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,f6d68322-28d091af-e0211a88-4b5d6daa-e452549d,53947845,train,10356999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
2,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,f7b3994d-94ebe91a-dc976413-7fba3f0e-9536d3ee,59616608,train,10747632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,ef237bc6-aa1017e8-dd6380c1-58e9b904-f3a4ea03,59616608,train,10747632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,df0a8003-31a86246-bf980394-d7b6247e-4d54d644,59616608,train,10747632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377090,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,756d4809-f7127a40-b97dd958-91a226c5-4722c35c,54113008,train,19979738,3.0,0.0,2.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,3.0,0.0,0.0
377091,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,761bc93c-e4364574-b4cc019f-8bec92d6-00bb83d4,52911757,train,19979738,0.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,3.0,0.0,3.0
377092,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,fa7d1c1d-5d01e16f-0a8d5b3d-3d962ab7-b6ef979b,56079377,train,19979738,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0
377093,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,aca64aeb-fd390621-d271db22-c001998b-09111ea8,52468311,train,19979738,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0


In [8]:

all_files_frontal = pd.merge(all_files, frontal_views[['dicom_id','view']], on='dicom_id', how='inner')
all_files_frontal = all_files_frontal.drop(["dicom_id","study_id", "split", "subject_id"], axis = 1)
all_files_frontal
pd.DataFrame(all_files_frontal).to_csv("/home/jyarnal1/frontal_all.csv", index=None)

In [9]:
all_files_frontal


Unnamed: 0,file_path,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices,view
0,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,frontal
1,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,frontal
2,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,frontal
3,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,frontal
4,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,frontal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243319,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,3.0,0.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,1.0,3.0,frontal
243320,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,3.0,0.0,2.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,3.0,0.0,0.0,frontal
243321,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,0.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,3.0,0.0,3.0,frontal
243322,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0,frontal


In [10]:

all_files_lateral = pd.merge(all_files, lateral_views[['dicom_id','view']], on='dicom_id', how='inner')
all_files_lateral = all_files_lateral.drop(["dicom_id","study_id", "subject_id", "view"], axis = 1)
all_files_lateral
#pd.DataFrame(all_files_lateral).to_csv("/home/jyarnal1/lateral_all.csv", index=None)

Unnamed: 0,file_path,split,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
0,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
1,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
4,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117976,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
117977,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
117978,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,3.0,0.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,1.0,3.0
117979,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,3.0,0.0,2.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,3.0,0.0,0.0


In [16]:
all_files_lateral.groupby('split').count()


Unnamed: 0_level_0,file_path,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
split,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
test,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431,1431
train,115642,115642,115642,115642,115642,115642,115642,115642,115642,115642,115642,115642,115642,115642,115642
validate,908,908,908,908,908,908,908,908,908,908,908,908,908,908,908


In [17]:
lateral_train = all_files_lateral[all_files_lateral.split.eq('train')]
lateral_train

Unnamed: 0,file_path,split,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
0,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
1,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
4,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117976,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
117977,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
117978,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,3.0,0.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,1.0,3.0
117979,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,3.0,0.0,2.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,3.0,0.0,0.0


In [18]:
pd.DataFrame(lateral_train).to_csv("/home/jyarnal1/lateral_train.csv", index=None)

In [19]:
lateral_test = all_files_lateral[all_files_lateral.split.eq('test')]
lateral_test

Unnamed: 0,file_path,split,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
809,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
810,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0
811,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0
812,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0
813,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117628,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0
117629,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
117630,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
117631,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,2.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0


In [23]:
pd.DataFrame(lateral_test).to_csv("/home/jyarnal1/lateral_test.csv", index=None)

In [21]:
lateral_validate = all_files_lateral[all_files_lateral.split.eq('validate')]
lateral_validate

Unnamed: 0,file_path,split,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
187,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
460,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0
461,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0
462,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,2.0,2.0,0.0,0.0,0.0
463,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0,0.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116504,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
116505,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
116506,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
116746,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [24]:
pd.DataFrame(lateral_validate).to_csv("/home/jyarnal1/lateral_val.csv", index=None)

In [25]:

all_files_frontal = pd.merge(all_files, frontal_views[['dicom_id','view']], on='dicom_id', how='inner')
all_files_frontal = all_files_frontal.drop(["dicom_id","study_id", "subject_id","view"], axis = 1)
all_files_frontal
#pd.DataFrame(all_files_lateral).to_csv("/home/jyarnal1/frontal_all.csv", index=None)

Unnamed: 0,file_path,split,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
0,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
1,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
3,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
4,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243319,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,3.0,0.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,1.0,3.0
243320,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,3.0,0.0,2.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,3.0,0.0,0.0
243321,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,3.0,0.0,3.0
243322,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0


In [26]:
all_files_frontal.groupby('split').count()


Unnamed: 0_level_0,file_path,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
split,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
test,3403,3403,3403,3403,3403,3403,3403,3403,3403,3403,3403,3403,3403,3403,3403
train,237962,237962,237962,237962,237962,237962,237962,237962,237962,237962,237962,237962,237962,237962,237962
validate,1959,1959,1959,1959,1959,1959,1959,1959,1959,1959,1959,1959,1959,1959,1959


In [28]:
frontal_train = all_files_frontal[all_files_frontal.split.eq('train')]
frontal_train

Unnamed: 0,file_path,split,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
0,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
1,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
3,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
4,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243319,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,3.0,0.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,1.0,3.0
243320,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,3.0,0.0,2.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,3.0,0.0,0.0
243321,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,3.0,0.0,3.0
243322,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,train,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0


In [29]:
pd.DataFrame(frontal_train).to_csv("/home/jyarnal1/frontal_train.csv", index=None)

In [30]:
frontal_test = all_files_frontal[all_files_frontal.split.eq('test')]
frontal_test

Unnamed: 0,file_path,split,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
506,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,0.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,3.0
507,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,2.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0
508,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,2.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0
509,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,3.0,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0,0.0,3.0
510,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,3.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242650,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
242651,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,2.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0
242652,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
242653,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,test,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,3.0


In [31]:
pd.DataFrame(frontal_test).to_csv("/home/jyarnal1/frontal_test.csv", index=None)

In [32]:
frontal_validate = all_files_frontal[all_files_frontal.split.eq('validate')]
frontal_validate

Unnamed: 0,file_path,split,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
380,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
970,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0
971,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,3.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0,1.0,3.0
972,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,1.0,0.0
973,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
240319,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,3.0,3.0,0.0,2.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,3.0
240735,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
240736,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
240896,/scratch/physionet.org/files/mimic-cxr-jpg/2.0...,validate,3.0,0.0,0.0,1.0,3.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,1.0,0.0


In [33]:
pd.DataFrame(frontal_validate).to_csv("/home/jyarnal1/frontal_val.csv", index=None)