In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import os
import cv2

In [3]:
import tensorflow as tf
import torch
import numpy as np

In [4]:
adenocarcinoma_test_file_path = '/content/drive/MyDrive/Data/test/adenocarcinoma'
adenocarcinoma_train_file_path = '/content/drive/MyDrive/Data/train/adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib'
adenocarcinoma_valid_file_path = '/content/drive/MyDrive/Data/valid/adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib'

largecell_test_file_path = '/content/drive/MyDrive/Data/test/large.cell.carcinoma'
largecell_train_file_path = '/content/drive/MyDrive/Data/train/large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa'
largecell_valid_file_path = '/content/drive/MyDrive/Data/valid/large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa'

normal_test_file_path = '/content/drive/MyDrive/Data/test/normal'
normal_train_file_path = '/content/drive/MyDrive/Data/train/normal'
normal_valid_file_path = '/content/drive/MyDrive/Data/valid/normal'

squamous_test_file_path = '/content/drive/MyDrive/Data/test/squamous.cell.carcinoma'
squamous_train_file_path = '/content/drive/MyDrive/Data/train/squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa'
squamous_valid_file_path = '/content/drive/MyDrive/Data/valid/squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa'

In [6]:
def image_return(file_path):
  """
  이미지 파일들의 경로를 받아 파일 내 이미지 리스트 반환

  Args:
    file_path: 이미지 파일들의 경로

  Returns:
    이미지 리스트
  """
  image_list = os.listdir(file_path)
  image_path_list = []
  for i in image_list:
    image_path_list.append(file_path + '/' + i)
  
  image_list = []
  for i in image_path_list:
    image_list.append(cv2.imread(i, cv2.IMREAD_COLOR))
  
  return image_list

In [7]:
def height_width_find(image_list):
  """
  이미지 리스트 내 가장 큰 너비, 높이 값 탐색

  Args:
    image_list: 이미지 리스트

  Returns:
    최대 높이, 너비
  """
  height = int(image_list[0].shape[0])
  width = int(image_list[0].shape[1])

  for i in image_list:
    image_height = int(i.shape[0])
    image_width = int(i.shape[1])
    if height < image_height:
      height = image_height
    if width < image_width:
      width = image_width

  return height, width

In [8]:
def image_transpose(height, width, image_list):
  """
  이미지 크기 변환

  Args:
    height, width : 변환할 이미지의 높이, 너비
    image_list : 변환할 이미지 리스트

  Returns:
    변환된 이미지 리스트
  """
  image_data = []

  for i in image_list:
    image_data.append(tf.image.resize_with_crop_or_pad(i,height, width))

  return image_data

In [9]:
def target_data_make(target_data, normal_data):
  """
  타겟 리스트 생성

  Args:
    target_data : 분류할 데이터
    normal_data : 일반 데이터

  Return:
    라벨 데이터
  """
  return np.concatenate((np.ones(len(target_data)),np.zeros(len(normal_data))))

정상 클래스와 비교하기 위한 리스트 결합

In [10]:
adenocarcinoma_image_list = image_return(adenocarcinoma_test_file_path) + image_return(adenocarcinoma_train_file_path) + image_return(adenocarcinoma_valid_file_path)

In [11]:
normal_image_list = image_return(normal_test_file_path) + image_return(normal_train_file_path) + image_return(normal_valid_file_path)

In [12]:
print(len(adenocarcinoma_image_list), len(normal_image_list))

338 215


이미지 크기 변환을 위한 너비 높이 측정

In [13]:
image_list = adenocarcinoma_image_list + normal_image_list

height, width = height_width_find(image_list)
print(height, width)

874 1200


이미지 변환

In [14]:
adenocarcinoma_image_list = image_transpose(height, width, adenocarcinoma_image_list)
normal_image_list = image_transpose(height, width, normal_image_list)

타겟 데이터 생성

In [15]:
input_data = adenocarcinoma_image_list + normal_image_list
target_data = target_data_make(adenocarcinoma_image_list, normal_image_list)

훈련, 테스트 세트 분할

In [16]:
from tensorflow import keras
from sklearn.model_selection import train_test_split

train_input, test_input, train_target, test_target = train_test_split(input_data, target_data, stratify=target_data)

데이터 정규화

In [17]:
train_scaled = np.array(train_input).reshape(-1, height, width, 3)
test_scaled = np.array(test_input).reshape(-1, height, width, 3)

모델 생성

In [18]:
model2 = keras.Sequential()
model2.add(keras.layers.Conv2D(8, kernel_size=3, activation='relu', padding='same', input_shape = (height, width,3)))
model2.add(keras.layers.MaxPooling2D(2))
model2.add(keras.layers.Conv2D(16, kernel_size=3, activation='relu', padding='same'))
model2.add(keras.layers.MaxPooling2D(2))
model2.add(keras.layers.Flatten())
model2.add(keras.layers.Dense(20, activation='relu'))
model2.add(keras.layers.Dropout(0.4))
model2.add(keras.layers.Dense(1))

In [19]:
model2.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 874, 1200, 8)      224       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 437, 600, 8)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 437, 600, 16)      1168      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 218, 300, 16)     0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 1046400)           0         
                                                                 
 dense (Dense)               (None, 20)                2

In [20]:
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics='accuracy')

In [21]:
history = model2.fit(train_scaled, train_target, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [22]:
model2.evaluate(test_scaled, test_target)



[nan, 0.3884892165660858]