<a href="https://colab.research.google.com/github/hyesukim1/chest_X_ray_images_binary_classification/blob/main/GoogLeNet_chest_X_ray_image_binary_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Kaggle API로 연결하여 데이터 로드


In [None]:
!pip install kaggle
from google.colab import files
files.upload()

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

In [None]:
!ls

In [None]:
!unzip -qq "/content/chest-xray-pneumonia.zip"

---

In [None]:
data_path = '/content/chest_xray/'

train_path = data_path + 'train'
valid_path = data_path + 'val'
test_path = data_path + 'test'

In [None]:
from glob import glob

print(f'number of train data: {len(glob(train_path + "/*/*"))}')
print(f'number of validation data: {len(glob(valid_path + "/*/*"))}')
print(f'number of test data: {len(glob(test_path + "/*/*"))}')

In [None]:
from tqdm import tqdm
import pandas as pd
import numpy as np
import os
import random

os.environ['PYTHONHASHSEED'] = '73'

seed = 73
random.seed(seed)
np.random.seed(seed)

img_map = []

def prepareData(Dir, start):
  category = ["NORMAL", "PNEUMONIA"]
  for category in category:
    path = os.path.join(Dir, category)
    class_num = category.index(category)

    for img in tqdm(os.listdir(path)):
      img_path = os.path.join(path, img)
      img_map.append({'path':img_path, 'label':category})

prepareData(train_path, 'train')
prepareData(valid_path, 'val')
prepareData(test_path, 'test')

img_map = pd.DataFrame(img_map).sample(frac=1, random_state=seed)

In [None]:
img_map.shape

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit

features = img_map['path'].to_numpy()
labels = img_map['label'].to_numpy()

stratified_sample = StratifiedShuffleSplit(n_splits=2, test_size=0.3, random_state=73)


In [None]:
for train_index, test_index in stratified_sample.split(features, labels):
  X_train, test_X = features[train_index], features[test_index]
  y_train, test_y = labels[train_index], labels[test_index]

half_size = np.int(len(test_X)/2)
X_test, y_test = test_X[0:half_size], test_y[0:half_size]
X_val, y_val = test_X[half_size:], test_y[half_size:]

In [None]:
train_map = pd.DataFrame()
train_map['path'], train_map['label'] = X_train, y_train

test_map = pd.DataFrame()
test_map['path'], test_map['label'] = X_test, y_test

val_map = pd.DataFrame()
val_map['path'], val_map['label'] = X_val, y_val

In [None]:
# data summary
print('> {} train size'.format(X_train.shape[0]))
print('> {} test size'.format(X_test.shape[0]))
print('> {} val size'.format(X_val.shape[0]))

In [None]:
import cv2
import time
import imageio

ColorCh = 3
IMG_SIZE = 224
input_shape=(IMG_SIZE, IMG_SIZE, ColorCh)

classes = ("NORMAL", "PNEMONIA")
CATEGORIES = sorted(classes)

print('classes:', CATEGORIES)

In [None]:
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img

datagen = ImageDataGenerator(rescale = 1./255,
                             horizontal_flip=True,
                             brightness_range=[1.0, 1.3],
                             rotation_range=15
                             )

In [None]:
batch_size = 64

def get_generator(frame_):
  generator = datagen.flow_from_dataframe(
      dataframe=frame_,
      x_col = 'path',
      y_col = 'label',
      batch_size=batch_size,
      seed=seed,
      shuffle = False,
      class_mode='sparse',
      color_mode='rgb',
      save_format='jpeg',
      target_size=(IMG_SIZE, IMG_SIZE)
  )
  return generator

In [None]:
def getLabelCount(frame):
    label_count = pd.Series(frame['label'].values.ravel()).value_counts()
    n_classes = (label_count)
    return label_count

In [None]:
train_df = train_map.sample(frac=1, random_state=seed)
train_generator = get_generator(train_df)

print('훈련 셋의 라벨 갯수')
getLabelCount(train_df)

In [None]:
val_df = val_map.sample(frac=1, random_state=seed)
test_generator = get_generator(val_df)

print('검증 셋의 라벨 갯수')
getLabelCount(val_df)

In [None]:
test_df = test_map.sample(frac=1, random_state=seed)
test_generator = get_generator(test_df)

print('테스트 셋의 라벨 갯수')
getLabelCount(test_df)

---

# Building Models

In [None]:
from keras.backend import separable_conv2d
import keras
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Add, add
from tensorflow.keras.layers import InputLayer, Input, Conv2D, MaxPooling2D, AveragePooling2D, GlobalAveragePooling1D
from tensorflow.keras.layers import Activation, MaxPool2D, ZeroPadding2D, SeparableConv2D
from keras.layers.normalization import batch_normalization
from tensorflow.keras.models import Model, Sequential
from keras import regularizers

kernel_regularizer = regularizers.l2(0.0001)

final_activation = 'softmax'
entropy = 'sparse_categorical_crossentropy'
n_classes = len(CATEGORIES)
print(n_classes)

In [None]:
def FCLayers(baseModel):
  baseModel.trainable = True
  headModel = baseModel.output
  headModel = Dropout(0.5, seed=73)(headModel)
  headModel = Dense(n_classes, activtion=final_activation)(headModel)
  model = Model(inputs = baseModel.input, outputs = headModel)
  return model


## GoogLenet 모델

In [None]:
# Inception Block 만들기

from keras.layers.merge import concatenate

def Inception_block(input_layer, f1, f2, f3, f4):    
    
    path1 = Conv2D(filters=f1, kernel_size = (1,1), padding = 'same', activation = 'relu')(input_layer)
    
    path2 = Conv2D(filters = f2[0], kernel_size = (1,1), 
                   padding = 'same', activation = 'relu')(input_layer)
    
    path2 = Conv2D(filters = f2[1], kernel_size = (3,3), 
                   padding = 'same', activation = 'relu')(path2)

    path3 = Conv2D(filters = f3[0], kernel_size = (1,1), 
                   padding = 'same', activation = 'relu')(input_layer)
    
    path3 = Conv2D(filters = f3[1], kernel_size = (5,5), 
                   padding = 'same', activation = 'relu')(path3)

    path4 = MaxPooling2D((3,3), strides= (1,1), 
                         padding = 'same')(input_layer)
    
    path4 = Conv2D(filters = f4, kernel_size = (1,1), 
                   padding = 'same', activation = 'relu')(path4)
    
    output_layer = concatenate([path1, path2, path3, path4], axis = -1)

    return output_layer