## 0. Import Library

In [3]:
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split

## 1. Load Data

In [4]:
# 경로 설정
data_dir = "./dataset"

# 이미지 유형 리스트
image_types = ['coronal', 'sagittal', 'transverse', 'transverse_stripped', 'transverse_masked']


In [5]:
# 각 파일을 읽어오는 딕셔너리 생성
data = {}
labels = {}


# 파일 읽어 딕셔너리에 저장
for image_type in image_types:
    data[f"{image_type}"] = np.load(f"{data_dir}/data_{image_type}.npy")
    labels[f"{image_type}"] = np.load(f"{data_dir}/labels_{image_type}.npy")
      
# 데이터 출력 
for image_type in image_types:
    print(f"data {image_type.capitalize()} Shape:", data[f"{image_type}"].shape)
    print(f"labels {image_type.capitalize()} Shape:", labels[f"{image_type}"].shape)
    print("")


data Coronal Shape: (212, 224, 224, 1)
labels Coronal Shape: (212,)

data Sagittal Shape: (212, 224, 224, 1)
labels Sagittal Shape: (212,)

data Transverse Shape: (213, 224, 224, 1)
labels Transverse Shape: (213,)

data Transverse_stripped Shape: (213, 224, 224, 1)
labels Transverse_stripped Shape: (213,)

data Transverse_masked Shape: (213, 224, 224, 1)
labels Transverse_masked Shape: (213,)



In [6]:
# 각 부위별로 치매군과 비치매군을 맞춰서 train, validation, test로 분할하는 함수
def balance_and_split_data(data, labels, test_size=0.4, val_size=0.5, random_state=42):
    # 치매군, 비치매군 인원수 출력
    dementia_count = sum([1 if cdr >= 0.5 else 0 for cdr in labels])
    non_dementia_count = len(labels) - dementia_count
    print(f"Total Dementia: {dementia_count}, Total Non-dementia: {non_dementia_count}")

    # 비치매군을 치매군의 개수에 맞게 랜덤 추출 (과적합 방지)
    non_dementia_indices = [i for i, cdr in enumerate(labels) if cdr == 0]
    np.random.seed(random_state)
    non_dementia_balanced_indices = np.random.choice(non_dementia_indices, size=dementia_count, replace=False)

    # 치매군과 밸런스맞춘 비치매군의 데이터셋을 합침
    dementia_indices = [i for i, cdr in enumerate(labels) if cdr >= 0.5]
    balanced_indices = dementia_indices + non_dementia_balanced_indices.tolist()

    # 최종 데이터 및 라벨
    balanced_data = data[balanced_indices]
    balanced_labels = [1] * len(dementia_indices) + [0] * len(non_dementia_balanced_indices)

    # 데이터셋을 train/validation/test로 분할
    train_data, temp_data, train_labels, temp_labels = train_test_split(
        balanced_data, balanced_labels, test_size=test_size, stratify=balanced_labels, random_state=random_state
    )

    val_data, test_data, val_labels, test_labels = train_test_split(
        temp_data, temp_labels, test_size=val_size, stratify=temp_labels, random_state=random_state
    )

    return train_data, val_data, test_data, train_labels, val_labels, test_labels


In [7]:
# 각 부위별로 데이터셋 분할
split_data = {}
for image_type in image_types:
    print(f"Processing {image_type.capitalize()} Data:")
    train_data, val_data, test_data, train_labels, val_labels, test_labels = balance_and_split_data(
        data[f"{image_type}"], labels[f"{image_type}"]
    )
    
    # 결과 저장
    split_data[image_type] = {
        "train_data": train_data,
        "val_data": val_data,
        "test_data": test_data,
        "train_labels": train_labels,
        "val_labels": val_labels,
        "test_labels": test_labels
    }

    print("")

Processing Coronal Data:
Total Dementia: 92, Total Non-dementia: 120

Processing Sagittal Data:
Total Dementia: 92, Total Non-dementia: 120

Processing Transverse Data:
Total Dementia: 92, Total Non-dementia: 121

Processing Transverse_stripped Data:
Total Dementia: 92, Total Non-dementia: 121

Processing Transverse_masked Data:
Total Dementia: 92, Total Non-dementia: 121



In [8]:
# 각 파트별로 데이터를 불러오는 함수
def load_data_for_image_type(image_type, split_data):
    train_data = split_data[image_type]['train_data']
    train_labels = split_data[image_type]['train_labels']
    val_data = split_data[image_type]['val_data']
    val_labels = split_data[image_type]['val_labels']

    return train_data, train_labels, val_data, val_labels

## 2. Modeling

In [9]:
# 데이터 증강
datagen = ImageDataGenerator(
    rotation_range=20,              # 회전 
    width_shift_range=0.1,          # 수평 이동 
    height_shift_range=0.1,         # 수직 이동 
    shear_range=0.1,                # 전단 변환
    zoom_range=0.1,                 # 확대/축소
    horizontal_flip=True,           # 수평 반전
    brightness_range=[0.9, 1.1]    # 밝기 조정
)

In [10]:
# 모델 생성 및 컴파일 함수
def create_compile_model(input_shape):
    model = models.Sequential()
    
  # 첫 번째 합성곱층
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, kernel_regularizer=regularizers.l2(0.01)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.25))
    
    # 두 번째 합성곱층
    model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.25))
    
    # 세 번째 합성곱층
    model.add(layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.25))
    
    # 평탄화
    model.add(layers.Flatten())
    
    # 완전 연결층
    model.add(layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(1, activation='sigmoid'))  # 이진 분류를 위한 출력층

    # 컴파일
    model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
    
    return model

In [11]:
# 모델 생성부터 학습까지 전체 실행 코드
def create_and_fit_model(train_data, train_labels, val_data, val_labels):
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-6)

    # 모델 생성 및 컴파일
    model = create_compile_model((224, 224, 1))
    
    # 학습
    history = model.fit(
        train_data, train_labels,
        epochs=100,
        batch_size=32,
        validation_data=(val_data, val_labels),
        callbacks=[reduce_lr]
    )
    return history

### 2-0. augmentation 적용 버전

In [13]:
# transverse (masked) 이미지 유형에 맞는 데이터 불러오기
train_data, train_labels, val_data, val_labels = load_data_for_image_type("transverse_masked", split_data)

# 레이블을 numpy 배열로 변환
train_labels = np.array(train_labels)
val_labels = np.array(val_labels)

# 모델 생성 및 컴파일
model = create_compile_model((224, 224, 1))

# 학습률 조절 콜백
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-6)

# 학습
history = model.fit(
    datagen.flow(train_data, train_labels, batch_size=32), # augmentation 적용
    epochs=100,
    validation_data=(val_data, val_labels),
    callbacks=[reduce_lr]
)

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 681ms/step - accuracy: 0.5063 - loss: 3.9296 - val_accuracy: 0.4595 - val_loss: 2.1901 - learning_rate: 0.0010
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 757ms/step - accuracy: 0.5005 - loss: 1.9837 - val_accuracy: 0.5676 - val_loss: 1.6984 - learning_rate: 0.0010
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 577ms/step - accuracy: 0.5146 - loss: 1.6901 - val_accuracy: 0.5946 - val_loss: 1.6555 - learning_rate: 0.0010
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 585ms/step - accuracy: 0.4552 - loss: 1.6288 - val_accuracy: 0.4054 - val_loss: 1.4784 - learning_rate: 0.0010
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 771ms/step - accuracy: 0.4696 - loss: 1.4132 - val_accuracy: 0.4054 - val_loss: 1.2474 - learning_rate: 0.0010
Epoch 6/100


KeyboardInterrupt: 

In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

### 2-1. Coronal section

In [91]:
# coronal이미지 유형에 맞는 데이터 불러오기
train_data, train_labels, val_data, val_labels = load_data_for_image_type("coronal", split_data)

# 레이블을 numpy 배열로 변환
train_labels = np.array(train_labels)
val_labels = np.array(val_labels)

# 불러온 데이터를 모델 학습에 사용
history = create_and_fit_model(train_data, train_labels, val_data, val_labels)

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 640ms/step - accuracy: 0.4677 - loss: 27.9000 - val_accuracy: 0.4865 - val_loss: 5.0139 - learning_rate: 0.0010
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 582ms/step - accuracy: 0.4599 - loss: 28.2703 - val_accuracy: 0.4865 - val_loss: 7.0424 - learning_rate: 0.0010
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 586ms/step - accuracy: 0.4922 - loss: 14.8445 - val_accuracy: 0.5405 - val_loss: 5.6327 - learning_rate: 0.0010
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 567ms/step - accuracy: 0.5875 - loss: 10.0633 - val_accuracy: 0.5676 - val_loss: 6.6317 - learning_rate: 0.0010
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 565ms/step - accuracy: 0.5947 - loss: 7.5343 - val_accuracy: 0.5135 - val_loss: 6.7815 - learning_rate: 0.0010
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[

In [1]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

NameError: name 'plt' is not defined

### 2-2. Sagittal section

In [92]:
# coronal이미지 유형에 맞는 데이터 불러오기
train_data, train_labels, val_data, val_labels = load_data_for_image_type("sagittal", split_data)

# 레이블을 numpy 배열로 변환
train_labels = np.array(train_labels)
val_labels = np.array(val_labels)

# 불러온 데이터를 모델 학습에 사용
history = create_and_fit_model(train_data, train_labels, val_data, val_labels)

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 630ms/step - accuracy: 0.5296 - loss: 15.2823 - val_accuracy: 0.5946 - val_loss: 4.9218 - learning_rate: 0.0010
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 580ms/step - accuracy: 0.4958 - loss: 19.4734 - val_accuracy: 0.4324 - val_loss: 5.5041 - learning_rate: 0.0010
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 570ms/step - accuracy: 0.6327 - loss: 8.9930 - val_accuracy: 0.4324 - val_loss: 5.8249 - learning_rate: 0.0010
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 567ms/step - accuracy: 0.5905 - loss: 10.2987 - val_accuracy: 0.4865 - val_loss: 6.6635 - learning_rate: 0.0010
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 573ms/step - accuracy: 0.7576 - loss: 6.9834 - val_accuracy: 0.4865 - val_loss: 7.7147 - learning_rate: 0.0010
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0

In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

### 2-3. Transverse section

#### (1) without skull stripping

In [93]:
# coronal이미지 유형에 맞는 데이터 불러오기
train_data, train_labels, val_data, val_labels = load_data_for_image_type("transverse", split_data)

# 레이블을 numpy 배열로 변환
train_labels = np.array(train_labels)
val_labels = np.array(val_labels)

# 불러온 데이터를 모델 학습에 사용
history = create_and_fit_model(train_data, train_labels, val_data, val_labels)

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 713ms/step - accuracy: 0.5114 - loss: 45.5673 - val_accuracy: 0.4865 - val_loss: 5.8263 - learning_rate: 0.0010
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 586ms/step - accuracy: 0.6192 - loss: 13.9538 - val_accuracy: 0.4865 - val_loss: 17.5099 - learning_rate: 0.0010
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 579ms/step - accuracy: 0.5598 - loss: 9.3775 - val_accuracy: 0.4865 - val_loss: 32.6428 - learning_rate: 0.0010
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 577ms/step - accuracy: 0.6478 - loss: 6.0510 - val_accuracy: 0.4865 - val_loss: 48.8681 - learning_rate: 0.0010
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 591ms/step - accuracy: 0.5744 - loss: 6.6502 - val_accuracy: 0.4865 - val_loss: 63.1641 - learning_rate: 0.0010
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s

In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

#### (2) with skull stripping

In [94]:
# coronal이미지 유형에 맞는 데이터 불러오기
train_data, train_labels, val_data, val_labels = load_data_for_image_type("transverse_stripped", split_data)

# 레이블을 numpy 배열로 변환
train_labels = np.array(train_labels)
val_labels = np.array(val_labels)

# 불러온 데이터를 모델 학습에 사용
history = create_and_fit_model(train_data, train_labels, val_data, val_labels)

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 649ms/step - accuracy: 0.5146 - loss: 35.6012 - val_accuracy: 0.6216 - val_loss: 4.8650 - learning_rate: 0.0010
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 590ms/step - accuracy: 0.5874 - loss: 16.1335 - val_accuracy: 0.4865 - val_loss: 12.7247 - learning_rate: 0.0010
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 582ms/step - accuracy: 0.5177 - loss: 17.0575 - val_accuracy: 0.4865 - val_loss: 27.9037 - learning_rate: 0.0010
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 610ms/step - accuracy: 0.5265 - loss: 11.6918 - val_accuracy: 0.4865 - val_loss: 45.6204 - learning_rate: 0.0010
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 586ms/step - accuracy: 0.6514 - loss: 7.1352 - val_accuracy: 0.4865 - val_loss: 61.4349 - learning_rate: 0.0010
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

#### (3) with masked data

In [95]:
# coronal이미지 유형에 맞는 데이터 불러오기
train_data, train_labels, val_data, val_labels = load_data_for_image_type("transverse_masked", split_data)

# 레이블을 numpy 배열로 변환
train_labels = np.array(train_labels)
val_labels = np.array(val_labels)

# 불러온 데이터를 모델 학습에 사용
history = create_and_fit_model(train_data, train_labels, val_data, val_labels)

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 679ms/step - accuracy: 0.4995 - loss: 40.6061 - val_accuracy: 0.4865 - val_loss: 5.4101 - learning_rate: 0.0010
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 627ms/step - accuracy: 0.5073 - loss: 40.6735 - val_accuracy: 0.4865 - val_loss: 8.1379 - learning_rate: 0.0010
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 627ms/step - accuracy: 0.5192 - loss: 23.3567 - val_accuracy: 0.4865 - val_loss: 10.8190 - learning_rate: 0.0010
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 612ms/step - accuracy: 0.5786 - loss: 13.6165 - val_accuracy: 0.4865 - val_loss: 13.6909 - learning_rate: 0.0010
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 616ms/step - accuracy: 0.6295 - loss: 7.9739 - val_accuracy: 0.4865 - val_loss: 16.4534 - learning_rate: 0.0010
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3

In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()