In [1]:
import pandas as pd

raw_data_path = 'gs://dados_brutos_chexphoto/'

v1_1_path = raw_data_path + 'CheXphoto-v1.1/'
v1_0_path = raw_data_path + 'CheXphoto-v1.0/'

In [5]:
def read_data(filename, split, to_keep='PA'):
    data = pd.read_csv(filename)
    data['split'] = split
    data['source_path'] = data['Path']
    
    data = data.loc[data.source_path.str.contains('natural')]
    
    data.drop(['Sex', 'Age', 'No Finding',
               'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity',
               'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis',
               'Pneumothorax', 'Pleural Effusion', 'Pleural Other', 'Fracture',
               'Support Devices'], axis=1, inplace=True)
    
    data = data.loc[data['AP/PA'] == to_keep, ['Path', 'source_path', 'split']].reset_index(drop=True)
    
    return data

In [6]:
metadata_valid = pd.concat([read_data(v1_1_path + 'valid.csv', 'valid'),
                            read_data(v1_0_path + 'valid.csv', 'valid')])
                            
metadata_train = read_data(v1_0_path + 'train.csv', 'train')

metadata = pd.concat([metadata_train, metadata_valid])

In [5]:
transformations = ['blur',
                   'brightness_down',
                   'brightness_up',
                   'contrast_down',
                   'contrast_up',
                   'exposure',
                   'glare_glossy',
                   'glare_matte',
                   'moire',
                   'rotation',
                   'tilt',
                   'translation']

def generate_transformation_table(data, transf=transformations):
    new_transf = data.copy()
    data['transformation'] = 'identity'

    for transformation in transf:
        new_transf['transformation'] = transformation
        data = pd.concat([data, new_transf])
    
    data.reset_index(drop=True, inplace=True)
    
    data['id'] = data.index.to_list()
    
    data['destination_path'] = 'Data/processed_data/' + data.split +'/'+ data.transformation +'/'+ data.id.astype(str) + '.jpg'
    data.pop('Path')
    return data

In [6]:
metadata = generate_transformation_table(metadata)

In [7]:
metadata

Unnamed: 0,source_path,split,transformation,id,destination_path
0,CheXphoto-v1.0/train/natural/iphone/patient002...,train,identity,0,Data/processed_data/train/identity/0.jpg
1,CheXphoto-v1.0/train/natural/iphone/patient002...,train,identity,1,Data/processed_data/train/identity/1.jpg
2,CheXphoto-v1.0/train/natural/iphone/patient005...,train,identity,2,Data/processed_data/train/identity/2.jpg
3,CheXphoto-v1.0/train/natural/iphone/patient005...,train,identity,3,Data/processed_data/train/identity/3.jpg
4,CheXphoto-v1.0/train/natural/iphone/patient007...,train,identity,4,Data/processed_data/train/identity/4.jpg
...,...,...,...,...,...
19300,CheXphoto-v1.0/valid/natural/oneplus/patient64...,valid,translation,19300,Data/processed_data/valid/translation/19300.jpg
19301,CheXphoto-v1.0/valid/natural/oneplus/patient64...,valid,translation,19301,Data/processed_data/valid/translation/19301.jpg
19302,CheXphoto-v1.0/valid/natural/oneplus/patient64...,valid,translation,19302,Data/processed_data/valid/translation/19302.jpg
19303,CheXphoto-v1.0/valid/natural/oneplus/patient64...,valid,translation,19303,Data/processed_data/valid/translation/19303.jpg


In [8]:
metadata.to_parquet('Data/metadata.parquet')

In [7]:
metadata

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,CheXphoto-v1.0/train/synthetic/digital/patient...,Female,20.0,Frontal,PA,1.0,0.0,,,,,0.0,,,,0.0,,,
1,CheXphoto-v1.0/train/synthetic/digital/patient...,Female,20.0,Lateral,,1.0,0.0,,,,,0.0,,,,0.0,,,
2,CheXphoto-v1.0/train/synthetic/digital/patient...,Female,46.0,Frontal,PA,,,,,1.0,,,,,0.0,,,,
3,CheXphoto-v1.0/train/synthetic/digital/patient...,Female,46.0,Lateral,,,,,,1.0,,,,,0.0,,,,
4,CheXphoto-v1.0/train/synthetic/digital/patient...,Female,50.0,Frontal,AP,,,1.0,1.0,1.0,,,,1.0,1.0,1.0,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
697,CheXphoto-v1.0/valid/natural/oneplus/patient64...,Female,57.0,Frontal,AP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
698,CheXphoto-v1.0/valid/natural/oneplus/patient64...,Male,65.0,Frontal,AP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
699,CheXphoto-v1.0/valid/natural/oneplus/patient64...,Male,71.0,Frontal,AP,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
700,CheXphoto-v1.0/valid/natural/oneplus/patient64...,Female,45.0,Frontal,AP,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
