In [1]:
import tensorflow as tf
from keras.applications.resnet_v2 import ResNet50V2

In [2]:
import cv2
import numpy as np
import scipy.ndimage as ndi
import pandas as pd
from sklearn.model_selection import train_test_split


In [4]:
# 이미지 로드
image_path = './open/train_imgs/BC_01_0062.png'  # 분할할 이미지 파일 경로
image = cv2.imread(image_path)

In [5]:
def _get_masked_image(image):
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    _, a, _ = cv2.split(lab)
    th = cv2.threshold(
        a, 127, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1]

    mask = np.zeros_like(a)
    mask[a < th] = 1
    mask[a >= th] = 2
    mask = ndi.binary_fill_holes(mask-1)

    masked_image = np.zeros_like(image)
    masked_image[mask == 1] = image[np.where(mask == 1)]
    masked_image[mask == 0] = 255.

    return masked_image
def _crop_image(image):
    for w_pos in reversed(range(image.shape[1])):
        if (image[:, w_pos] == [255, 255, 255]).all():
            image = np.delete(image, w_pos, 1)
    for h_pos in reversed(range(image.shape[0])):
        if (image[h_pos, :] == [255, 255, 255]).all():
            image = np.delete(image, h_pos, 0)

    return image

In [6]:
def tile_image(image, tile_size, n_tiles):
    h, w, ch = image.shape

    pad_h, pad_w = (
        tile_size - h % tile_size) % tile_size, (tile_size - w % tile_size) % tile_size
    padding = [[pad_h//2, pad_h-pad_h//2],
               [pad_w//2, pad_w-pad_w//2], [0, 0]]
    image = np.pad(image, padding, mode='constant', constant_values=255)

    image = image.reshape(
        image.shape[0]//tile_size, tile_size, image.shape[1]//tile_size, tile_size, ch)
    tiles = image.transpose(
        0, 2, 1, 3, 4).reshape(-1, tile_size, tile_size, ch)

    if len(tiles) < n_tiles:
        padding = [[0, n_tiles-len(tiles)], [0, 0], [0, 0], [0, 0]]
        tiles = np.pad(tiles, padding, mode='constant', constant_values=255)

    idxs = np.argsort(tiles.reshape(tiles.shape[0], -1).sum(-1))[:n_tiles]
    tiles = tiles[idxs]

    return tiles


## 테스트

In [7]:
image1 = _get_masked_image(image)
# scale_percent = 20

# # 축소할 크기 계산
# width = int(image1.shape[1] * scale_percent / 100)
# height = int(image1.shape[0] * scale_percent / 100)

# # 이미지 축소
# resized_image1 = cv2.resize(image1, (width, height))

# 이미지 출력
cv2.imwrite('Resized Image1.png', image1)
cv2.waitKey(0)
cv2.destroyAllWindows()
# cv2.imshow('Segmentation Result', _get_masked_image(image))
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [8]:
image2 = _get_masked_image(image)
crop_2 = _crop_image(image2)

# scale_percent = 20

# # 축소할 크기 계산
# width = int(image2.shape[1] * scale_percent / 100)
# height = int(image2.shape[0] * scale_percent / 100)

# # 이미지 축소
# resized_image2 = cv2.resize(image2, (width, height))

# 이미지 출력
cv2.imwrite('Resized Image2.png', crop_2)
cv2.waitKey(0)
cv2.destroyAllWindows()
# cv2.imshow('Segmentation Result', _get_masked_image(image))
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [15]:
height, width, _ = crop_2.shape
# 너비와 높이 출력
print("이미지 너비:", width)
print("이미지 높이:", height)

이미지 너비: 5981
이미지 높이: 2309


In [17]:
2309 / 224

10.308035714285714

In [26]:
tiles = tile_image(crop_2, 224, 16)

In [27]:
import os
save_dir = './test_tiles'
file_prefix = 'test_tile'

# 타일 이미지 저장
for i, tile in enumerate(tiles):
    file_name = f'{file_prefix}{i}.png'
    file_path = os.path.join(save_dir, file_name)
    cv2.imwrite(file_path, tile)


In [None]:
#https://github.com/lim-hyo-jeong/DACON-Breast-Cancer/blob/master/image_preprocessing.py
#참고해서 나누고 학습 진행 후 앙상블까지 끝내보기

# 데이터 전처리

In [35]:
def _get_masked_image(image):
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    _, a, _ = cv2.split(lab)
    th = cv2.threshold(
        a, 127, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1]

    mask = np.zeros_like(a)
    mask[a < th] = 1
    mask[a >= th] = 2
    mask = ndi.binary_fill_holes(mask-1)

    masked_image = np.zeros_like(image)
    masked_image[mask == 1] = image[np.where(mask == 1)]
    masked_image[mask == 0] = 255.

    return masked_image

def _crop_image(image):
    for w_pos in reversed(range(image.shape[1])):
        if (image[:, w_pos] == [255, 255, 255]).all():
            image = np.delete(image, w_pos, 1)
    for h_pos in reversed(range(image.shape[0])):
        if (image[h_pos, :] == [255, 255, 255]).all():
            image = np.delete(image, h_pos, 0)

    return image

def tile_image(image, tile_size, n_tiles):
    h, w, ch = image.shape

    pad_h, pad_w = (
        tile_size - h % tile_size) % tile_size, (tile_size - w % tile_size) % tile_size
    padding = [[pad_h//2, pad_h-pad_h//2],
               [pad_w//2, pad_w-pad_w//2], [0, 0]]
    image = np.pad(image, padding, mode='constant', constant_values=255)

    image = image.reshape(
        image.shape[0]//tile_size, tile_size, image.shape[1]//tile_size, tile_size, ch)
    tiles = image.transpose(
        0, 2, 1, 3, 4).reshape(-1, tile_size, tile_size, ch)

    if len(tiles) < n_tiles:
        padding = [[0, n_tiles-len(tiles)], [0, 0], [0, 0], [0, 0]]
        tiles = np.pad(tiles, padding, mode='constant', constant_values=255)

    idxs = np.argsort(tiles.reshape(tiles.shape[0], -1).sum(-1))[:n_tiles]
    tiles = tiles[idxs]

    return tiles


In [52]:
# # 데이터 여백제거

# image_path = './open/train_imgs/'  # 분할할 이미지 파일 경로
# save_path = './open/lab_train/'
# file_list = os.listdir(image_path)

# # 파일 목록 출력
# for file_name in file_list:
#     image = cv2.imread(image_path+file_name)
#     image2 = _get_masked_image(image)
#     crop_2 = _crop_image(image2)
#     file_path = os.path.join(save_path, file_name)
#     cv2.imwrite(file_path, crop_2)


In [3]:
#일단 타뷸러 데이터 불러오기
df =  pd.read_csv("./open/train.csv")
answer = df[["ID","N_category"]]
answer

Unnamed: 0,ID,N_category
0,BC_01_0001,0
1,BC_01_0002,1
2,BC_01_0003,0
3,BC_01_0004,0
4,BC_01_0005,0
...,...,...
995,BC_01_3464,1
996,BC_01_3482,0
997,BC_01_3485,1
998,BC_01_3502,0


In [57]:
answer["N_category"][0]

0

In [61]:
# # tiles = tile_image(crop_2, 224, 16)
# image_path = './open/lab_train/'  # 분할할 이미지 파일 경로
# file_list = os.listdir(image_path)
# cancer_dir = './open/data/cancer'
# cancer_prefix = 'cancer'
# normal_dir = './open/data/normal'
# normal_prefix = 'normal'
# for i, file_name in enumerate(file_list):
#     image = cv2.imread(image_path+file_name)
#     if(answer["N_category"][i] == 0):
#         tiles = tile_image(image, 224, 16)
#         for i, tile in enumerate(tiles):
#             file_path = os.path.join(normal_dir, file_name+f'_{normal_prefix}{i}.png')
#             cv2.imwrite(file_path, tile)
#     else:
#         tiles = tile_image(image, 224, 16)
#         for i, tile in enumerate(tiles):
#             file_path = os.path.join(cancer_dir, file_name+f'_{cancer_prefix}{i}.png')
#             cv2.imwrite(file_path, tile)


In [62]:
import splitfolders
# train/val/test 나누기
# ratio 파라미터에 원하는 (train, validation, test) 비율을 입력합니다. ex) (0.8, 0.1, 0.1)
splitfolders.ratio("./open/data", output="output", seed=42, ratio=(.7, .2, .1))

Copying files: 16000 files [01:58, 135.44 files/s]


## 데이터 학습

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

training_dir = './output/train/'
validation_dir = './output/val/'

train_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_directory(
    training_dir,
    batch_size=16,
    target_size=(224, 224),
    class_mode='binary'
)

validation_datagen = ImageDataGenerator()

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    batch_size=16,
    target_size=(224, 224),
    class_mode='binary'
)


Found 11199 images belonging to 2 classes.
Found 3199 images belonging to 2 classes.


In [5]:
import tensorflow as tf
from tensorflow.keras import layers

# 데이터 준비
# train_dataset = './output/train/'
# valid_dataset = './output/val/'
train_dataset = train_generator
valid_dataset = validation_generator
# ResNet 모델 생성
base_model = tf.keras.applications.ResNet50(
    include_top=False,
    weights='imagenet',
    input_shape=(224, 224, 3)
)

# 모델 구성
model = tf.keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(2, activation='softmax')
])

# 모델 컴파일
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# 모델 학습
model.fit(train_dataset,
          validation_data=valid_dataset,
          epochs=100,verbose=2)

# 모델 평가
test_dataset = ...  # 테스트 데이터셋 로드
loss, accuracy = model.evaluate(test_dataset)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)




Epoch 1/10


KeyboardInterrupt: 

In [None]:
# # 이미지 데이터와 암 여부를 나타내는 레이블 데이터가 있다고 가정
# X = 이미지 데이터
# y = 암 여부 레이블 데이터

# # 암에 걸린 이미지와 안걸린 이미지로 나눔
# X_cancer = X[y == 1]  # 암에 걸린 이미지
# X_normal = X[y == 0]  # 안걸린 이미지

# # 암에 걸린 이미지와 안걸린 이미지를 각각 학습 세트와 테스트 세트로 나눔
# X_cancer_train, X_cancer_test = train_test_split(X_cancer, test_size=0.2, random_state=42)
# X_normal_train, X_normal_test = train_test_split(X_normal, test_size=0.2, random_state=42)

# # 학습 세트에는 암과 안걸린 이미지가 모두 포함되도록 합침
# X_train = np.concatenate([X_cancer_train, X_normal_train], axis=0)

# # 테스트 세트에는 암과 안걸린 이미지가 모두 포함되도록 합침
# X_test = np.concatenate([X_cancer_test, X_normal_test], axis=0)

# # 각각의 이미지에 대한 레이블을 생성
# y_train = np.concatenate([np.ones(X_cancer_train.shape[0]), np.zeros(X_normal_train.shape[0])], axis=0)
# y_test = np.concatenate([np.ones(X_cancer_test.shape[0]), np.zeros(X_normal_test.shape[0])], axis=0)

# # 데이터 세트 확인
# print("Train set shape:", X_train.shape, y_train.shape)
# print("Test set shape:", X_test.shape, y_test.shape)
