In [33]:
import pandas as pd
import json
from tqdm import tqdm
import os
import numpy as np

In [2]:
def get_previous(data_line, meta_w_report):
    dicom_id = data_line['dicom_id']
    subject_id = data_line['subject_id']
    study_date = data_line['StudyDate']
    subject_prev_df = meta_w_report[(meta_w_report['subject_id'] == subject_id )&( meta_w_report['StudyDate']<study_date)].sort_values(by='StudyDate', ascending=False)
    if subject_prev_df.shape[0] != 0:
        return subject_prev_df.iloc[0]
    else:
        return None

In [28]:
isinstance(data, list)

True

In [31]:
def make_data_w_prev(data_path, output_path, meta_w_report_path='/home/data/mimic-cxr-jpg/2.0.0/rred/frontal.csv'):
    meta_w_report = pd.read_csv(meta_w_report_path)
    if isinstance(data_path, str):
        data = [json.loads(l) for l in open(data_path)]
    elif isinstance(data_path, list):
        data = data_path
    else:
        raise("error: data_path must be a string or list")
    
    for d in tqdm(data):
        prev_df = get_previous(d, meta_w_report)
        if prev_df is not None:        
            d['prev_study_id'] = int(prev_df['study_id'])
            d['prev_dicom_id'] = prev_df['dicom_id']
            d['prev_Findings'] = prev_df['Findings']
            d['prev_Impression'] = prev_df['Impression']
        else:
            d['prev_study_id'] = None
            d['prev_dicom_id'] = None
            d['prev_Findings'] = None
            d['prev_Impression'] = None

    with open(output_path, encoding= "utf-8",mode="w") as file: 
        for i in data: file.write(json.dumps(i) + "\n")
        
    print('done')

In [20]:
make_data_w_prev(data_path='/home/data/mimic-cxr-jpg/2.0.0/rred/frontal_val.jsonl', output_path='/home/data/mimic-cxr-jpg/2.0.0/rred/frontal_val_w_prev.jsonl')
make_data_w_prev(data_path='/home/data/mimic-cxr-jpg/2.0.0/rred/frontal_train.jsonl', output_path='/home/data/mimic-cxr-jpg/2.0.0/rred/frontal_train_w_prev.jsonl')
make_data_w_prev(data_path='/home/data/mimic-cxr-jpg/2.0.0/rred/frontal_test.jsonl', output_path='/home/data/mimic-cxr-jpg/2.0.0/rred/frontal_test_w_prev.jsonl')

100%|██████████| 1002/1002 [00:01<00:00, 818.53it/s]


done


100%|██████████| 126439/126439 [02:36<00:00, 809.04it/s]


done


100%|██████████| 1494/1494 [00:01<00:00, 764.12it/s]

done





In [40]:
root_path ='/home/data/mimic-cxr-jpg/2.0.0/rred/error_baseline_Mixed_FPI_v0.4/reference_dist/'
split = 'train'
data = []
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v1.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v2.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v4.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v5.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v3.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v6.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v7.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v8.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v9.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v10.jsonl'))])

In [41]:
data = list(np.concatenate(data))
print(len(data))

300000


In [42]:
make_data_w_prev(
    data_path=data, 
    output_path=os.path.join(root_path, f'frontal_{split}_error_reference_dist_v1_to_v10_w_prev.jsonl'))

100%|██████████| 300000/300000 [06:02<00:00, 826.51it/s]


done


In [43]:
root_path ='/home/data/mimic-cxr-jpg/2.0.0/rred/error_baseline_Mixed_FPI_v0.4/reference_dist/'
split = 'val'
data = []
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v1.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v2.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v4.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v5.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v3.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v6.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v7.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v8.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v9.jsonl'))])
data.append([json.loads(l) for l in open(os.path.join(root_path, f'frontal_{split}_error_reference_dist_v10.jsonl'))])

In [44]:
data = list(np.concatenate(data))
print(len(data))

2990


In [45]:
make_data_w_prev(
    data_path=data, 
    output_path=os.path.join(root_path, f'frontal_{split}_error_reference_dist_v1_to_v10_w_prev.jsonl'))

100%|██████████| 2990/2990 [00:03<00:00, 825.35it/s]


done


In [21]:
make_data_w_prev(
    data_path='/home/data/mimic-cxr-jpg/2.0.0/rred/error_baseline_Mixed_FPI_v0.3/frontal_train_error_v1_to_v10.jsonl', 
    output_path='/home/data/mimic-cxr-jpg/2.0.0/rred/error_baseline_Mixed_FPI_v0.3/frontal_train_error_v1_to_v10_w_prev.jsonl')
make_data_w_prev(
    data_path='/home/data/mimic-cxr-jpg/2.0.0/rred/error_baseline_Mixed_FPI_v0.3/frontal_val_error_v1_to_v10.jsonl', 
    output_path='/home/data/mimic-cxr-jpg/2.0.0/rred/error_baseline_Mixed_FPI_v0.3/frontal_val_error_v1_to_v10_w_prev.jsonl')
make_data_w_prev(
    data_path='/home/data/mimic-cxr-jpg/2.0.0/rred/error_baseline_Mixed_FPI_v0.3/frontal_test_error_v1_to_v10.jsonl', 
    output_path='/home/data/mimic-cxr-jpg/2.0.0/rred/error_baseline_Mixed_FPI_v0.3/frontal_test_error_v1_to_v10_w_prev.jsonl')

100%|██████████| 1200000/1200000 [24:35<00:00, 813.45it/s]


done


100%|██████████| 9990/9990 [00:12<00:00, 778.74it/s]


done


100%|██████████| 14990/14990 [00:20<00:00, 749.47it/s]


done
