In [15]:
import numpy as np
from PIL import Image
import os
import pandas as pd
from pathlib import Path
from glob import glob
from torchvision import transforms

In [3]:
def process_image(image_path):
    img=glob(image_path +"/*.jpeg")
    path=Path(image_path).name
    split=Path(image_path).parent.name
    return pd.DataFrame(
    {
        "image_path":img,
        "class":path,
        "split":split
    }
)

In [4]:
train_normal_df=process_image(r'C:\Users\ihhim\OneDrive\Desktop\project1\data\chest_xray\train\NORMAL')
train_pneumonia_df=process_image(r'C:\Users\ihhim\OneDrive\Desktop\project1\data\chest_xray\train\PNEUMONIA')
test_normal_df=process_image(r'C:\Users\ihhim\OneDrive\Desktop\project1\data\chest_xray\test\NORMAL')
test_pneumonia_df=process_image(r'C:\Users\ihhim\OneDrive\Desktop\project1\data\chest_xray\test\PNEUMONIA')
val_normal_df=process_image(r'C:\Users\ihhim\OneDrive\Desktop\project1\data\chest_xray\val\NORMAL')
val_pneumonia_df=process_image(r'C:\Users\ihhim\OneDrive\Desktop\project1\data\chest_xray\val\PNEUMONIA')

In [9]:
print(len(train_normal_df))
print(len(train_pneumonia_df))

1341
3875


In [12]:
count=abs(len(train_normal_df)-len(train_pneumonia_df)) 
count

2534

In [16]:
augmentation=transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(20)])

augmented_rows=[]

In [17]:
for idx in range(count):
    row=train_normal_df.sample(1,random_state=idx).iloc[0]
    img=Image.open(row['image_path'])
    img_augmented=augmentation(img)
    new_path=os.path.join(r'C:\Users\ihhim\OneDrive\Desktop\project1\data\chest_xray\train\NORMAL',f"augmented_{idx}.jpeg")
    img_augmented.save(new_path)
    augmented_rows.append({
        "image_path":new_path,
        "class":row['class'],
        "split":row['split']
        })
augmented_df=pd.DataFrame(augmented_rows)
train_normal_df=pd.concat([train_normal_df,augmented_df]).sample(frac=1,random_state=42)

In [19]:
print(len(train_normal_df))
print(len(train_pneumonia_df))

3875
3875


In [20]:
train_df=pd.concat([train_normal_df,train_pneumonia_df]).sample(frac=1,random_state=42)

In [21]:
dataframe=pd.concat([train_df,val_normal_df,val_pneumonia_df,test_normal_df,test_pneumonia_df])
dataframe.to_csv(r'C:\Users\ihhim\OneDrive\Desktop\project1\data\chest_xray\metadata.csv', index=False)

In [22]:
print(dataframe['class'].value_counts())

class
PNEUMONIA    4273
NORMAL       4117
Name: count, dtype: int64


In [23]:
dataframe.head()

Unnamed: 0,image_path,class,split
1999,C:\Users\ihhim\OneDrive\Desktop\project1\data\...,NORMAL,train
1959,C:\Users\ihhim\OneDrive\Desktop\project1\data\...,PNEUMONIA,train
1047,C:\Users\ihhim\OneDrive\Desktop\project1\data\...,PNEUMONIA,train
1214,C:\Users\ihhim\OneDrive\Desktop\project1\data\...,NORMAL,train
1294,C:\Users\ihhim\OneDrive\Desktop\project1\data\...,NORMAL,train


In [24]:
WIDTH=128
HEIGHT=128
for i,row in dataframe.iterrows():
    img=Image.open(row['image_path'])
    img=img.resize((WIDTH,HEIGHT))
    img.save(row['image_path'])