In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.layers import BatchNormalization, Dropout

In [2]:
def load_and_preprocess_images(folders, image_size=(128, 128)):
    images = []
    labels = []
    label_map = {folder: idx for idx, folder in enumerate(folders)}

    for folder in folders:
        for filename in os.listdir(folder):
            img_path = os.path.join(folder, filename)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, image_size)
                img = img / 255.0  # Normalize pixel values
                images.append(img)
                labels.append(label_map[folder])

    images = np.array(images)
    labels = to_categorical(labels, num_classes=len(folders))

    return images, labels

In [3]:
base = "/content/drive/MyDrive/2023_2/Caltech_101/101_ObjectCategories/"
folders = [base + 'tick',base +  'trilobite',base +  'umbrella',base +  'watch',base +  'water_lilly', base + 'wheelchair', base + 'wild_cat',base +  'windsor_chair', base + 'wrench', base + 'yin_yang']


In [None]:
image_size = (128, 128)
images, labels = load_and_preprocess_images(folders, image_size)

In [5]:
def build_cnn(input_shape, num_classes):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))
    return model

# CNN 모델 생성
model = build_cnn(input_shape=image_size + (3,), num_classes=len(folders))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 63, 63, 32)        0         
 D)                                                              
                                                                 
 flatten (Flatten)           (None, 127008)            0         
                                                                 
 dense (Dense)               (None, 128)               16257152  
                                                                 
 batch_normalization (Batch  (None, 128)               512       
 Normalization)                                                  
                                                                 
 dropout (Dropout)           (None, 128)               0

In [7]:
# 데이터셋 분할
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# 모델 학습
history = model.fit(X_train, y_train, epochs=20, validation_split=0.2)

# 성능 평가
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_acc)


NameError: ignored

In [6]:
import matplotlib.pyplot as plt

# Loss 그래프
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Accuracy 그래프
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()


NameError: ignored

In [None]:
"""
다양한 네트워크 아키텍처: 모델이 복잡할수록 성능 좋아짐, 하지만 너무 레이어가 많아지면 성능 감소
배치 정규화 기법: 배치 정규화를 사용하면 모델학습이 안정적으로 되며 성능향상
드롭아웃 및 정규화: 성능이 향상됨, 또한 오버피팅을 방지할수 있음
손실 함수 비교: 손실함수에 따라 성능이 달라지며, 이를 잘 확인할수 있음
신경망 압축: 모델 압축을 하면 성능은 떨어지지만, 그만큼 용량이 줄어져 학습과 추론시간이 빨라짐
학습률과 배치 크기: 배치크기가 크면 클수록 빠르게 모델을 학습할수 있지만, 그만큼 학습이 불안해짐
가중치 초기화: 사전 훈련된 CNN을 사용하면 더 빠르게 loss를 수렴시킬수 있음
"""