<a href="https://colab.research.google.com/github/hyesukim1/chest_X_ray_images_binary_classification/blob/main/Chest_X_ray__images_binary_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Kaggle API로 연결하여 데이터 로드

In [None]:
!pip install kaggle
from google.colab import files
files.upload()
# ls -1ha kaggle.json

# json file 이동시키기
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

# permission warning 방지
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

In [None]:
!ls

In [None]:
# zip 파일 풀기
!unzip -qq "/content/chest-xray-pneumonia.zip"

---

# 프로젝트 설명

In [None]:
# 데이터 경로
data_path = '/content/chest_xray/'

# 훈련, 검증, 테스트 데이터 경로 설정
train_path = data_path + 'train/'
valid_path = data_path + 'val/'
test_path = data_path + 'test/'

In [None]:
from glob import glob # 파일들의 리스트를 뽑을 때 사용

print(f'num of train data: {len(glob(train_path + "*/*"))}')
print(f'num of val data: {len(glob(valid_path + "*/*"))}')
print(f'num of test data: {len(glob(test_path + "*/*"))}')

In [None]:
all_normal_imgs = []
all_pneumonia_imgs = []

for categories in ['train/', 'val/', 'test/']:
  data_category_path = data_path + categories
  normal_imgs = glob(data_category_path + 'NORMAL/*')
  pneumonia_imgs = glob(data_category_path + 'PNEUMONIA/*')

  all_normal_imgs.extend(normal_imgs)
  all_pneumonia_imgs.extend(pneumonia_imgs)

print(f'정상 흉부 이미지 개수 : {len(all_normal_imgs)}')
print(f'폐렴 흉부 이미지 개수 : {len(all_pneumonia_imgs)}')

# 데이터 시각화

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

mpl.rc('font', size=15)
plt.figure(figsize=(7,7))

label = ['Normal', 'Pneumonia']

plt.pie([len(all_normal_imgs), len(all_pneumonia_imgs)],
        labels = label,
        autopct = '%.f%%')

In [None]:
import matplotlib.gridspec as gridspec
import cv2

def show_image(img_paths, rows=2, cols=3):
  assert len(img_paths) <= rows*cols

  mpl.rc('font', size=8)
  plt.figure(figsize=(15, 8))
  grid = gridspec.GridSpec(rows, cols)

  for idx, img_path in enumerate(img_paths):
    image = cv2.imread(img_path)
    ax = plt.subplot(grid[idx])
    ax.imshow(image)

In [None]:
num_of_imgs = 6
normal_img_paths = all_normal_imgs[-num_of_imgs:]

show_image(normal_img_paths)

In [None]:
pneumonia_img_paths = all_pneumonia_imgs[-num_of_imgs:]
show_image(pneumonia_img_paths)

# 베이스라인 모델

In [None]:
import torch
import random
import numpy as np
import os

# 시드값 고정
seed = 50
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = False

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# 데이터 경로
data_path = '/content/chest_xray/'

# 훈련, 검증, 테스트 데이터 경로 설정
train_path = data_path + 'train/'
valid_path = data_path + 'val/'
test_path = data_path + 'test/'

In [None]:
from torchvision import transforms

# 훈련 데이터 이미지 변환
transform_train = transforms.Compose([
                                      transforms.Resize((250, 250)), # 이미지 크기 조정
                                      transforms.CenterCrop(180), # 중앙 이미지 확대
                                      transforms.RandomHorizontalFlip(0.5), # 좌우 대칭
                                      transforms.RandomVerticalFlip(0.2), # 상하대칭
                                      transforms.RandomRotation(20), # 이미지 회전
                                      transforms.ToTensor(), # 텐서 객체로 변환
                                      transforms.Normalize((0.485, 0.456, 0.406),
                                                           (0.229, 0.224, 0.225))])

# 테스트 데이터 이미지 변환
transform_test = transforms.Compose([
                                    transforms.Resize((250, 250)),
                                    transforms.CenterCrop(180),
                                    transforms.ToTensor(),
                                    transforms.Normalize((0.485, 0.456, 0.406),
                                                         (0.229, 0.224, 0.225))
])

In [None]:
from torchvision.datasets import ImageFolder

# 훈련 데이터셋
datasets_train = ImageFolder(root=train_path, transform=transform_train)
datasets_valid = ImageFolder(root=valid_path, transform=transform_test)

In [None]:
def seed_worker(worker_id):
  worker_seed = torch.initial_seed() % 2**32
  np.random.seed(worker_seed)
  random.seed(worker_seed)

# 제너레이터 시드값 고정
g = torch.Generator()
g.manual_seed(0)

In [None]:
from torch.utils.data import DataLoader

batch_size = 8

loader_train = DataLoader(dataset=datasets_train, batch_size=batch_size,
                          shuffle=True, worker_init_fn=seed_worker,
                          generator=g, num_workers=2)

loader_valid = DataLoader(dataset=datasets_valid, batch_size=batch_size,
                          shuffle=False, worker_init_fn=seed_worker,
                          generator=g, num_workers=2)

In [None]:
!pip install efficientnet-pytorch==0.7.1

In [None]:
# 모델 생성
from efficientnet_pytorch import EfficientNet

model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=2)
model = model.to(device)

In [None]:
print('모델 파라미터 수 : ', sum(param.numel() for param in model.parameters()))

In [None]:
import torch.nn as nn

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)