In [1]:
import numpy as np
import tensorflow as tf

from tensorflow.python.client import device_lib
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam

In [2]:
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 10595882542913383917
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 5365813238
locality {
  bus_id: 1
  links {
  }
}
incarnation: 4966935548514944597
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3060 Ti, pci bus id: 0000:01:00.0, compute capability: 8.6"
]


In [3]:
print("num gpus available ", len(tf.config.experimental.list_physical_devices('GPU')))
print(tf.test.is_built_with_cuda())
print(tf.version.VERSION)
print(tf.test.gpu_device_name())

num gpus available  1
True
2.6.0
/device:GPU:0


In [6]:

# 데이터 불러오기
# 넘파이 데이터를 텐서 데이터로 변환
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 28, 28, 1)
x_test = x_test.reshape(10000, 28, 28, 1)
x_train = x_train.astype(np.float32)/255.0
x_test = x_test.astype(np.float32)/255.0
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

n_input = 784
n_hidden1 = 1024
n_hidden2 = 512
n_hidden3 = 512
n_hidden4 = 512
n_output = 10


# 레이어 설계
cnn = Sequential()
cnn.add(Conv2D(6, (5, 5), padding='same', activation='relu', input_shape=(28, 28, 1)))
cnn.add(MaxPooling2D(pool_size=(2, 2)))
cnn.add(Conv2D(16, (5, 5), padding='same', activation='relu'))
cnn.add(MaxPooling2D(pool_size=(2, 2)))
cnn.add(Conv2D(120, (5, 5), padding='same', activation='relu'))
cnn.add(Flatten())
cnn.add(Dense(84, activation='relu'))
cnn.add(Dense(10, activation='softmax'))

# 모델 컴파일
cnn.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

print("CPU를 사용한 학습")
with tf.device("/device:CPU:0"):
    cnn.fit(x_train, y_train, batch_size=128, epochs=5, validation_data=(x_test, y_test), verbose=2)

print("GPU를 사용한 학습")
with tf.device("/device:GPU:0"):
    cnn.fit(x_train, y_train, batch_size=128, epochs=5, validation_data=(x_test, y_test), verbose=2)

CPU를 사용한 학습
Epoch 1/5
469/469 - 12s - loss: 0.1943 - accuracy: 0.9397 - val_loss: 0.0626 - val_accuracy: 0.9809
Epoch 2/5
469/469 - 12s - loss: 0.0556 - accuracy: 0.9830 - val_loss: 0.0410 - val_accuracy: 0.9873
Epoch 3/5
469/469 - 12s - loss: 0.0386 - accuracy: 0.9877 - val_loss: 0.0374 - val_accuracy: 0.9885
Epoch 4/5
469/469 - 12s - loss: 0.0294 - accuracy: 0.9908 - val_loss: 0.0330 - val_accuracy: 0.9894
Epoch 5/5
469/469 - 12s - loss: 0.0219 - accuracy: 0.9930 - val_loss: 0.0265 - val_accuracy: 0.9916
GPU를 사용한 학습
Epoch 1/5
469/469 - 12s - loss: 0.0186 - accuracy: 0.9941 - val_loss: 0.0261 - val_accuracy: 0.9920
Epoch 2/5
469/469 - 2s - loss: 0.0154 - accuracy: 0.9949 - val_loss: 0.0276 - val_accuracy: 0.9917
Epoch 3/5
469/469 - 2s - loss: 0.0130 - accuracy: 0.9955 - val_loss: 0.0322 - val_accuracy: 0.9922
Epoch 4/5
469/469 - 2s - loss: 0.0111 - accuracy: 0.9963 - val_loss: 0.0278 - val_accuracy: 0.9904
Epoch 5/5
469/469 - 2s - loss: 0.0091 - accuracy: 0.9972 - val_loss: 0.0278 - v