In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

### 파라미터

In [2]:
IMAGE_SIZE = (64, 64)  # 이미지 크기
BATCH_SIZE = 32
EPOCHS = 50

### 1-1. 데이터 준비

In [3]:
labels = pd.read_csv('labels.csv')
images = []
y = []

for _, row in labels.iterrows():
    img_path = os.path.join('data', row['filename'])
    img = load_img(img_path, target_size=IMAGE_SIZE)
    img = img_to_array(img) / 255.0
    images.append(img)
    y.append(row['label'])

X = np.array(images) # 이미지 데이터
y = np.array(y) # 종류

### 1-2. 라벨 인코딩 및 train/test 분리

In [4]:
encoder   = LabelEncoder()
y_encoded = encoder.fit_transform(y)
num_classes = len(np.unique(y_encoded))
y_onehot    = to_categorical(y_encoded, num_classes)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_onehot, test_size=0.2, stratify=y_encoded, random_state=42
)

### 0. 데이터 증강

In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rotation_range=30,       # 0~360° 완전 랜덤 회전
    width_shift_range=0.1,    # 좌우 최대 30% 이동
    height_shift_range=0.1,   # 상하 최대 30% 이동
    horizontal_flip=False,
    vertical_flip=False,
)

test_datagen = ImageDataGenerator()

In [6]:
train_generator = train_datagen.flow(
    X_train, y_train,
    batch_size=BATCH_SIZE,
    shuffle=True
)

validation_generator = test_datagen.flow(
    X_test, y_test,
    batch_size=BATCH_SIZE,
    shuffle=False
)

### 2. 모델 구성

In [7]:
def build_cnn(input_shape=(64, 64, 3), classes=num_classes):
    my_model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape),
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),

        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),

        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(classes, activation='softmax')
    ])
    return my_model

model = build_cnn()
model.summary()

2025-06-19 11:45:20.962334: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-06-19 11:45:20.962373: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-06-19 11:45:20.962379: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2025-06-19 11:45:20.962415: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-06-19 11:45:20.962432: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 64, 64, 32)        896       
                                                                 
 conv2d_1 (Conv2D)           (None, 64, 64, 32)        9248      
                                                                 
 max_pooling2d (MaxPooling2  (None, 32, 32, 32)        0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 32, 32, 32)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 32, 32, 64)        18496     
                                                                 
 conv2d_3 (Conv2D)           (None, 32, 32, 64)        36928     
                                                        

### 2-1. 모델 구조 출력 및 저장

In [8]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [9]:
os.makedirs('output', exist_ok=True)
early_cb = EarlyStopping(monitor='val_loss', patience=5,
                         restore_best_weights=True, verbose=1)
ckpt_cb  = ModelCheckpoint('output/leesanghyun.h5', monitor='val_loss',
                           save_best_only=True, verbose=1)

### 3. 학습

In [10]:
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=validation_generator,
    callbacks=[early_cb, ckpt_cb],
    verbose=2
)

Epoch 1/50


2025-06-19 11:45:21.649487: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-06-19 11:45:21.668635: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
2025-06-19 11:45:21.676885: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
2025-06-19 11:45:25.567670: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-06-19 11:45:25.576364: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
2025-06-19 11:45:25.580143: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deseria


Epoch 1: val_loss improved from inf to 3.98915, saving model to model/best.h5
184/184 - 5s - loss: 4.3762 - accuracy: 0.0420 - val_loss: 3.9891 - val_accuracy: 0.0758 - 5s/epoch - 27ms/step
Epoch 2/50


  saving_api.save_model(



Epoch 2: val_loss improved from 3.98915 to 3.61771, saving model to model/best.h5
184/184 - 4s - loss: 3.9263 - accuracy: 0.0833 - val_loss: 3.6177 - val_accuracy: 0.1140 - 4s/epoch - 23ms/step
Epoch 3/50

Epoch 3: val_loss improved from 3.61771 to 3.27734, saving model to model/best.h5
184/184 - 4s - loss: 3.5475 - accuracy: 0.1366 - val_loss: 3.2773 - val_accuracy: 0.1966 - 4s/epoch - 23ms/step
Epoch 4/50

Epoch 4: val_loss improved from 3.27734 to 3.12321, saving model to model/best.h5
184/184 - 4s - loss: 3.2882 - accuracy: 0.1849 - val_loss: 3.1232 - val_accuracy: 0.2423 - 4s/epoch - 23ms/step
Epoch 5/50

Epoch 5: val_loss improved from 3.12321 to 2.78440, saving model to model/best.h5
184/184 - 4s - loss: 3.0786 - accuracy: 0.2170 - val_loss: 2.7844 - val_accuracy: 0.2894 - 4s/epoch - 21ms/step
Epoch 6/50

Epoch 6: val_loss improved from 2.78440 to 2.62217, saving model to model/best.h5
184/184 - 4s - loss: 2.8923 - accuracy: 0.2547 - val_loss: 2.6222 - val_accuracy: 0.3386 - 4s

### 3-1. Loss/Accuracy 그래프

In [None]:
test_loss, test_acc = model.evaluate(validation_generator, verbose=0)
print(f"\n✅  Test accuracy = {test_acc:.4f} | loss = {test_loss:.4f}")

### 4. 평가 및 오분류 샘플 분석

In [12]:
predictions = model.predict(validation_generator)



2025-06-19 11:57:11.247402: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-06-19 11:57:11.251567: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
2025-06-19 11:57:11.253548: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.




In [24]:
result = np.argmax(predictions, axis=1)
# 결과값(pred)과 원래값(y_test)을 비교하는 코드
cnt = 0
for i in range(len(result)):
    original_label = np.argmax(y_test[i])  # 원래값
    predicted_label = result[i]           # 예측값
    print(f"샘플 {i}: 원래값 = {original_label}, 예측값 = {predicted_label}")
    if original_label != predicted_label:
        cnt += 1

print(f"\n❗️ 오분류된 샘플 수: {cnt}개")
print(f"정확도: {1 - cnt / len(result):.4f}")

샘플 0: 원래값 = 87, 예측값 = 87
샘플 1: 원래값 = 53, 예측값 = 89
샘플 2: 원래값 = 45, 예측값 = 45
샘플 3: 원래값 = 76, 예측값 = 76
샘플 4: 원래값 = 74, 예측값 = 74
샘플 5: 원래값 = 40, 예측값 = 79
샘플 6: 원래값 = 45, 예측값 = 45
샘플 7: 원래값 = 47, 예측값 = 46
샘플 8: 원래값 = 45, 예측값 = 45
샘플 9: 원래값 = 20, 예측값 = 7
샘플 10: 원래값 = 55, 예측값 = 55
샘플 11: 원래값 = 97, 예측값 = 18
샘플 12: 원래값 = 93, 예측값 = 37
샘플 13: 원래값 = 52, 예측값 = 52
샘플 14: 원래값 = 71, 예측값 = 40
샘플 15: 원래값 = 18, 예측값 = 87
샘플 16: 원래값 = 87, 예측값 = 87
샘플 17: 원래값 = 21, 예측값 = 82
샘플 18: 원래값 = 81, 예측값 = 39
샘플 19: 원래값 = 89, 예측값 = 41
샘플 20: 원래값 = 88, 예측값 = 89
샘플 21: 원래값 = 45, 예측값 = 45
샘플 22: 원래값 = 9, 예측값 = 28
샘플 23: 원래값 = 57, 예측값 = 57
샘플 24: 원래값 = 7, 예측값 = 7
샘플 25: 원래값 = 76, 예측값 = 76
샘플 26: 원래값 = 16, 예측값 = 16
샘플 27: 원래값 = 50, 예측값 = 50
샘플 28: 원래값 = 36, 예측값 = 36
샘플 29: 원래값 = 78, 예측값 = 76
샘플 30: 원래값 = 94, 예측값 = 72
샘플 31: 원래값 = 48, 예측값 = 48
샘플 32: 원래값 = 70, 예측값 = 70
샘플 33: 원래값 = 80, 예측값 = 80
샘플 34: 원래값 = 21, 예측값 = 9
샘플 35: 원래값 = 50, 예측값 = 50
샘플 36: 원래값 = 45, 예측값 = 88
샘플 37: 원래값 = 72, 예측값 = 72
샘플 38: 원래값 = 43, 예측값 = 43
샘

### 5. 모델 저장 (.h5)

### 6. 결과 요약(마크다운, 노트북에 작성)