In [None]:
import os
import glob
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np

# DATA 준비

DATA Load

In [None]:
images = np.load('/content/drive/MyDrive/pharyngitis/data/images_to_numpy.npy') #npy파일 불러오기
labels = labels = [1 if i <= 147 else 0 for i in range(1, 363)]

In [None]:
#class imbalace 해결하기 위한 class weight 계산
num_0 = 215 #no_pharyngitis, label=1
num_1 = 147 #pharyngitis, label=0
num_tot=num_0+num_1

k = num_tot**2 / (2*num_0*num_1)
weight_for_0 = k * (num_1 / num_tot)
weight_for_1 = k * (num_0 / num_tot)

class_weight = {0: weight_for_0, 1: weight_for_1}

In [None]:
class_weight

TF Dataset 으로 만들기

train, val split

In [None]:
dataset1 = tf.data.Dataset.from_tensor_slices((images, labels))
shuffled_dataset = dataset1.cache().shuffle(10000)
# 데이터셋 크기
dataset_size = len(shuffled_dataset)

# 분할 비율 설정
train_ratio = 0.8
val_ratio = 0.2

# 분할 크기 계산
train_size = int(dataset_size * train_ratio)
val_size = int(dataset_size * val_ratio)

# 데이터셋 분할
train_dataset = shuffled_dataset.take(train_size)
val_dataset = shuffled_dataset.skip(train_size)

In [None]:
print('train_size:',train_size)
print('val_size:', dataset_size - train_size)

In [None]:
#augmentation layer
data_augmentation = tf.keras.models.Sequential(
  [
    tf.keras.layers.RandomContrast(factor=(0.1, 0.1)),
    tf.keras.layers.RandomRotation(0.05),
    tf.keras.layers.RandomZoom(0.05),
  ]
)

def augment_images(image, label):
  image = data_augmentation(image)
  return image, label

aug_train_dataset = train_dataset.map(augment_images)

In [None]:
AUTOTUNE = tf.data.AUTOTUNE
train_dataset = train_dataset.cache().shuffle(10000).batch(16).prefetch(buffer_size=AUTOTUNE)
aug_train_dataset = aug_train_dataset.cache().shuffle(10000).batch(16).prefetch(buffer_size=AUTOTUNE)
val_dataset = val_dataset.cache().batch(16).prefetch(buffer_size=AUTOTUNE)

In [None]:
#val dataset의 구성 확인(혹시 모두 0은 아닌지 확인)
val_labels = []
for _, label in val_dataset:
    val_labels.extend(np.array(label))


num_zeros = val_labels.count(0)
num_ones = val_labels.count(1)

print("Number of zeros:", num_zeros)
print("Number of ones:", num_ones)

# Model 만들기

Model 2 만들기

In [None]:
def get_model_2_p2(input_shape=(224, 224, 3)):
    resnet50_without_top_layer = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    for layer in resnet50_without_top_layer.layers[:160]:
        layer.trainable = False

    x = tf.keras.layers.GlobalAveragePooling2D()(resnet50_without_top_layer.output)
    x = tf.keras.layers.Dropout(0.2)(x)
    x =  tf.keras.layers.Dense(16, activation='sigmoid')(x)
    output = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=resnet50_without_top_layer.input, outputs=output)
    return model

def get_model_2(input_shape=(224, 224, 3)):
    model_2_p2 = get_model_2_p2(input_shape=(224, 224, 3))

    inputs = tf.keras.Input(shape=input_shape)
    h = tf.keras.applications.resnet.preprocess_input(inputs) #resnet50이 요구하는 형태로 image 변형
    outputs = model_2_p2(h)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    return model

In [None]:
get_model_2().summary()

# Train

In [None]:
model = get_model_2()
model.compile(
    optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=0.01, decay=1e-6, momentum= 0.9, nesterov = True)
    ,loss='binary_crossentropy',
    metrics=['accuracy'])

callback = tf.keras.callbacks.EarlyStopping(
    monitor='loss',
    patience=10,
    restore_best_weights=True
)

history = model.fit(train_dataset, epochs=60, callbacks = callback, validation_data=val_dataset, class_weight = class_weight) #class_weight = class_weight

In [None]:
#loss and accuracy graph
fig, axs = plt.subplots(1, 2 ,figsize=(12, 6))

# loss 그래프
axs[0].plot(history.history['loss'], label='Train Loss')
axs[0].plot(history.history['val_loss'], label='Validation Loss')
axs[0].set_title('Training and Validation Loss')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Loss')
axs[0].legend()

# accuracy 그래프
axs[1].plot(history.history['accuracy'], label='Train Accuracy')
axs[1].plot(history.history['val_accuracy'], label='Validation Accuracy')
axs[1].set_title('Training and Validation Accuracy')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Accuracy')
axs[1].legend()

plt.show()


모델 prediction 확인

In [None]:
random_indices = np.random.choice(len(images), size=10, replace=False)
fig, axes = plt.subplots(2, 5, figsize=(12, 6))
axes = axes.ravel()

for i, idx in enumerate(random_indices):
    image = images[idx]
    prediction = float(model(tf.expand_dims(image, axis=0)))
    axes[i].imshow(image)
    axes[i].axis('off')
    img_title = 'Image #{}\nPharyngitis Prediction: {:.4f}\nAnswer: {}'.format(idx+1, prediction, labels[idx])
    axes[i].set_title(img_title, fontsize=10, pad=5)  # Set the title with the image index

plt.tight_layout()
plt.show()


모델이 틀리게 예측한 이미지들 확인

In [None]:
# 잘못 예측한 이미지 확인
misclassified_images = []
for batch_images, batch_labels in val_dataset:
    batch_size = batch_images.shape[0]

    for i in range(batch_size):
        image = tf.expand_dims(batch_images[i], axis=0)
        label = batch_labels[i]

        predicted_probabilities = float(model.predict(image, verbose=0))
        if predicted_probabilities>=0.5:
            predicted_label = 1
        else:
            predicted_label = 0

        if predicted_label != label.numpy():
            misclassified_images.append((image[0], label.numpy(), predicted_probabilities))
#틀린 개수
print('# wrong:', len(misclassified_images))

# 잘못 예측한 이미지 시각화
for i in range(len(misclassified_images)):
    image, true_label, predicted_label = misclassified_images[i]
    plt.imshow(image, cmap='gray')  # 이미지는 흑백으로 가정
    plt.title(f'True Label: {true_label}, Prediction Value: {predicted_label}')
    plt.show()


# Save model

모델 .h5으로 저장

In [None]:
# 모델 저장
model_dir =  '/content/drive/MyDrive/pharyngitis/models'
model_name = 'some_resnet50_included_onlylastlayer_WithAugmentation_0723_1453.h5'
model_path = os.path.join(model_dir, model_name)

model.save(model_path)

# Load model and test

In [None]:
model_name = 'some_resnet50_included_onlylastlayer_noAugmentation_0723_1445.h5'
model_load_path = os.path.join(model_dir, model_name)

loaded_model = tf.keras.models.load_model(model_load_path)

In [None]:
loaded_model.summary()

In [None]:
results = loaded_model.evaluate(val_dataset)

print("Val Loss:", results[0])
print("Val Accuracy:", results[1])

loaded_model 결과 확인

In [None]:
random_indices = np.random.choice(len(images), size=10, replace=False)
fig, axes = plt.subplots(2, 5, figsize=(12, 6))
axes = axes.ravel()

for i, idx in enumerate(random_indices):
    image = images[idx]
    prediction = float(model(tf.expand_dims(image, axis=0)))
    axes[i].imshow(image)
    axes[i].axis('off')
    img_title = 'Image #{}\nPharyngitis Prediction: {:.4f}\nAnswer: {}'.format(idx+1, prediction, labels[idx])
    axes[i].set_title(img_title, fontsize=10, pad=5)  # Set the title with the image index

plt.tight_layout()
plt.show()


In [None]:
# 잘못 예측한 이미지 확인
misclassified_images = []
for batch_images, batch_labels in val_dataset:
    batch_size = batch_images.shape[0]

    for i in range(batch_size):
        image = tf.expand_dims(batch_images[i], axis=0)
        label = batch_labels[i]

        predicted_probabilities = float(loaded_model.predict(image, verbose=0))
        if predicted_probabilities>=0.5:
            predicted_label = 1
        else:
            predicted_label = 0

        if predicted_label != label.numpy():
            misclassified_images.append((image[0], label.numpy(), predicted_probabilities))
#틀린 개수
print('# wrong:', len(misclassified_images))

# 잘못 예측한 이미지 시각화
for i in range(len(misclassified_images)):
    image, true_label, predicted_label = misclassified_images[i]
    plt.imshow(image, cmap='gray')  # 이미지는 흑백으로 가정
    plt.title(f'True Label: {true_label}, Prediction Value: {predicted_label}')
    plt.show()
