Simple Notebook to create a modified version of the original CheXpert CSV

In [13]:
# Importing necessary packages
import pandas as pd 
import numpy as np
import os
import torch
from sklearn.model_selection import train_test_split

In [2]:
# Load csv and show the head
csv_path = 'train.csv'
org_df = pd.read_csv(csv_path)
file_heads = [os.path.split(os.path.split(org_df['Path'][i])[0])[0] for i in range(len(org_df['Path']))]

# Add column to dataframe
org_df.insert(0,'Patient_ID_path',file_heads)

In [3]:
# Basic statistics of original dataset
print('Number of images = ', len(org_df))
print('Number of patients = ', len(org_df['Patient_ID_path'].unique()))
print('Number of frontal images = ', np.sum(org_df['Frontal/Lateral'] == 'Frontal'))
print('Number of lateral images = ', np.sum(org_df['Frontal/Lateral'] == 'Lateral'))

Number of images =  223414
Number of patients =  64540
Number of frontal images =  191027
Number of lateral images =  32387


In [4]:
# Replace all -1 and NaN with 0 
new_df = org_df.copy(deep=True)
new_df = new_df.replace(-1,0)
new_df = new_df.replace(np.nan,0)

# Male = 1 and Female = 0
new_df = new_df.replace('Male',1)
new_df = new_df.replace('Female',0)
new_df = new_df.drop(new_df[new_df['Sex'] == 'Unknown'].index)
new_df['Sex'] = new_df['Sex'].astype('uint8')

# Remove unnecessary columns: (Only using Frontal images)
new_df = new_df.drop(new_df[new_df['Frontal/Lateral'] == 'Lateral'].index)
new_df = new_df.drop(columns=['Age','Frontal/Lateral','AP/PA'])

# Remaining number of patients and images
print('Remaining number of images = ', len(new_df))
print('Remaining number of patients = ', len(new_df['Patient_ID_path'].unique()))

Remaining number of images =  191026
Remaining number of patients =  64533


In [None]:
# Split csv into csvs for classifier and sex classifier
a = new_df['Sex']
d1,d2 = train_test_split(new_df,test_size=0.2,stratify=new_df['Sex'])

# Create new csvs
d1.to_csv('model_train.csv',index=False)
# Create new csvs
d1.to_csv('sex_model_train.csv',index=False)