In [None]:
!pip install --upgrade tfds-nightly
!pip install --upgrade tensorflow tensorflow-datasets

Collecting tfds-nightly
  Downloading tfds_nightly-4.9.5.dev202406040044-py3-none-any.whl (5.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/5.1 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
Collecting immutabledict (from tfds-nightly)
  Downloading immutabledict-4.2.0-py3-none-any.whl (4.7 kB)
Collecting simple-parsing (from tfds-nightly)
  Downloading simple_parsing-0.1.5-py3-none-any.whl (113 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m113.6/113.6 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: simple-parsing, immutabledict, tfds-nightly
Successfully installed immutabledict-4.2.0 simple-parsing-0.1.5 tfds-nightly-4.9.5.dev202406040044
Collecting tensorflow
  Downloading tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (589.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m589.8/589.8 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow-datas

In [None]:
import time
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam

# EMNIST 데이터셋 로드
ds_builder = tfds.builder("emnist/balanced")
ds_builder.download_and_prepare()
ds_info = ds_builder.info
dataset = ds_builder.as_data_source()

# Label Mapping
label_mapping = [
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
    'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
    'U', 'V', 'W', 'X', 'Y', 'Z',
    'a', 'b', 'd', 'e', 'f', 'g', 'h', 'n', 'q', 'r', 't'
]

# 이미지 크기 조정 및 정규화
def preprocess(image, label):
    image = tf.image.resize(image, (224, 224))  # 이미지 크기를 224x224로 조정
    image = tf.image.grayscale_to_rgb(image)    # 그레이스케일 이미지를 RGB로 변환
    image = image / 255.0                       # 정규화
    label = tf.keras.utils.to_categorical(label, num_classes=47)  # 원-핫 인코딩
    return image, label

# 데이터 생성 (train, test)
X = []
y = []
for example in dataset['train']:
    X.append(example['image'])
    y.append(example['label'])

X_test = []
y_test = []
for example in dataset['test']:
    X_test.append(example['image'])
    y_test.append(example['label'])

X = np.array(X)
y = np.array(y)

X_test = np.array(X_test)
y_test = np.array(y_test)

# dataset 분할 (train:validation = 5:1), test dataset은 emnist dataset에 있는 것으로 사용
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=1/6, random_state=42)

# 데이터셋 구성 및 배치 설정
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).map(preprocess).shuffle(1000).batch(32)
valid_dataset = tf.data.Dataset.from_tensor_slices((X_valid, y_valid)).map(preprocess).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).map(preprocess).batch(32)

Downloading and preparing dataset 535.73 MiB (download: 535.73 MiB, generated: 56.63 MiB, total: 592.36 MiB) to /root/tensorflow_datasets/emnist/balanced/3.1.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/2 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/112800 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/emnist/balanced/incomplete.YZLD2N_3.1.0/emnist-train.array_record*...:   0…

Generating test examples...:   0%|          | 0/18800 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/emnist/balanced/incomplete.YZLD2N_3.1.0/emnist-test.array_record*...:   0%…

Dataset emnist downloaded and prepared to /root/tensorflow_datasets/emnist/balanced/3.1.0. Subsequent calls will reuse this data.


In [None]:
# Chollet, F. (2017). Xception: Deep Learning with Depthwise Separable Convolutions. arXiv preprint arXiv:1610.02357.
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

# Xception 모델 정의
def create_Xception_model(input_shape, num_classes):
    base_model = Xception(weights='imagenet', include_top=False, input_shape=input_shape)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    return model

# 모델 생성
input_shape = (224, 224, 3)
num_classes = len(label_mapping)
model = create_Xception_model(input_shape, num_classes)
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m83683744/83683744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [None]:
# 모델 컴파일
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# 학습 시작 시간
start_time = time.time()

# 모델 학습
history = model.fit(train_dataset, epochs=20, validation_data=valid_dataset)

# 학습 시간 계산
training_time = time.time() - start_time


Epoch 1/20
[1m2938/2938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1603s[0m 523ms/step - accuracy: 0.7489 - loss: 0.8229 - val_accuracy: 0.8620 - val_loss: 0.4038
Epoch 2/20
[1m2938/2938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1513s[0m 515ms/step - accuracy: 0.8719 - loss: 0.3583 - val_accuracy: 0.8787 - val_loss: 0.3476
Epoch 3/20
[1m2938/2938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1524s[0m 515ms/step - accuracy: 0.8842 - loss: 0.3132 - val_accuracy: 0.8852 - val_loss: 0.3164
Epoch 4/20
[1m2938/2938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1513s[0m 515ms/step - accuracy: 0.8928 - loss: 0.2878 - val_accuracy: 0.8790 - val_loss: 0.3360
Epoch 5/20
[1m2938/2938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1517s[0m 513ms/step - accuracy: 0.8991 - loss: 0.2642 - val_accuracy: 0.8956 - val_loss: 0.2919
Epoch 6/20
[1m2938/2938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1508s[0m 513ms/step - accuracy: 0.9050 - loss: 0.2469 - val_accuracy: 0.8939 - val



*   Colab GPU 사용량 제한으로 학습 중단

*   성능은 우수한 것으로 보이나 학습 시간이 너무 오래 걸려 최종 모델 후보에서 제외



In [None]:
# 학습 결과 시각화
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='train_accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
# 모델 평가
start_time = time.time()  # 추론 시작 시간
test_loss, test_accuracy = model.evaluate(test_dataset)
end_time = time.time()  # 추론 종료 시간
total_inference_time = end_time - start_time
average_inference_time = total_inference_time / len(X_test)

print(f"Test Accuracy: {test_accuracy:.4f}, Test Loss: {test_loss:.4f}")
print(f"훈련시간 : {training_time/60:.2f} minutes.")
print(f"평균 추론 시간: {average_inference_time:.4f} seconds.")