In [16]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [20]:
# 데이터 구성
mnist = keras.datasets.mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train , X_test = X_train/255.0, X_test/255.0

X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

print(len(X_train), len(X_test))

60000 10000


## (1) Sequential API 활용

In [3]:
# 모델 구성
model = keras.models.Sequential([
    keras.layers.Conv2D(32, 3, activation='relu'),
    keras.layers.Conv2D(64, 3, activation='relu'),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

In [4]:
# 모델 학습 설정
model.compile(optimizer='adam',
             loss = 'sparse_categorical_crossentropy',
             metrics = ['accuracy'])

model.fit(X_train, y_train, epochs=5)
model.evaluate(X_test, y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 2s - loss: 0.0538 - accuracy: 0.9887


[0.05379066616296768, 0.9886999726295471]

### CIFAR-100

In [23]:
cifar100 = keras.datasets.cifar100

(X_train, y_train), (X_test, y_test) = cifar100.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0
print(len(X_train), len(X_test))

50000 10000


In [24]:
model = keras.Sequential([
    keras.layers.Conv2D(16, 3, activation='relu'),
    keras.layers.MaxPool2D((2,2)),
    keras.layers.Conv2D(32, 3, activation='relu'),
    keras.layers.MaxPool2D((2,2)),
    keras.layers.Flatten(),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(100, activation='softmax')
])

In [25]:
model.compile(optimizer='adam',
             loss = 'sparse_categorical_crossentropy',
             metrics = ['accuracy'])

model.fit(X_train, y_train, epochs=5)
model.evaluate(X_test, y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 2.5743 - accuracy: 0.3623


[2.5743465423583984, 0.36230000853538513]

## Functional API 활용

In [7]:
# 모델 구성
inputs = keras.Input(shape=(28,28,1))

x = keras.layers.Conv2D(32, 3, activation='relu')(inputs)
x = keras.layers.Conv2D(64, 3, activation='relu')(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(128, activation='relu')(x)
predictions = keras.layers.Dense(10, activation='softmax')(x)

model = keras.Model(inputs = inputs, outputs = predictions)

In [8]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=5)

model.evaluate(X_test, y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 0.0453 - accuracy: 0.9875


[0.04528305307030678, 0.987500011920929]

### CIFAR-100

In [26]:
cifar100 = keras.datasets.cifar100

(X_train, y_train), (X_test, y_test) = cifar100.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0
print(len(X_train), len(X_test))

50000 10000


In [27]:
inputs = keras.Input(shape=(32,32,3))

x = keras.layers.Conv2D(16, 3, activation='relu')(inputs)
x = keras.layers.MaxPool2D((2,2))(x)
x = keras.layers.Conv2D(32, 3, activation='relu')(x)
x = keras.layers.MaxPool2D((2,2))(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(256, activation='relu')(x)
predictions = keras.layers.Dense(100, activation='softmax')(x)

model = keras.Model(inputs = inputs, outputs = predictions)

In [28]:
model.compile(optimizer='adam',
             loss = 'sparse_categorical_crossentropy',
             metrics = ['accuracy'])

model.fit(X_train, y_train, epochs=5)
model.evaluate(X_test, y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 2.5804 - accuracy: 0.3591


[2.5803825855255127, 0.35910001397132874]

## Subclassing 활용

In [9]:
class CustomModel(keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = keras.layers.Conv2D(32, 3, activation='relu')
        self.conv2 = keras.layers.Conv2D(64, 3, activation='relu')
        self.flatten = keras.layers.Flatten()
        self.fc1 = keras.layers.Dense(128, activation='relu')
        self.fc2 = keras.layers.Dense(10, activation='softmax')
        
    
    def call(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        
        return x
    
model = CustomModel()

In [11]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=5)

model.evaluate(X_test,  y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 0.0406 - accuracy: 0.9897


[0.04060938581824303, 0.9897000193595886]

### CIFAR-100

In [29]:
cifar100 = keras.datasets.cifar100

(X_train, y_train), (X_test, y_test) = cifar100.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0
print(len(X_train), len(X_test))

50000 10000


In [30]:
class CustomModel(keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = keras.layers.Conv2D(16, 3, activation='relu')
        self.pool = keras.layers.MaxPool2D((2,2))
        self.conv2 = keras.layers.Conv2D(32, 3, activation='relu')
        self.flatten = keras.layers.Flatten()
        self.fc1 = keras.layers.Dense(256, activation='relu')
        self.fc2 = keras.layers.Dense(100, activation='softmax')
        
    
    def call(self,x):
        x = self.conv1(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.pool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        
        return x
    
model = CustomModel()

In [31]:
model.compile(optimizer='adam',
             loss = 'sparse_categorical_crossentropy',
             metrics = ['accuracy'])

model.fit(X_train, y_train, epochs=5)
model.evaluate(X_test, y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 2.5902 - accuracy: 0.3647


[2.59017276763916, 0.36469998955726624]

# Gradient Tape

- Forward pass로 진행된 모든 연산의 중간 레이어값을 tape에 기록하고, 이를 이용해 gradient를 계산한 후 tape를 폐기하는 기능을 수행

In [33]:
import tensorflow as tf
from tensorflow import keras

#데이터 구성부분
cifar100 = keras.datasets.cifar100

(X_train, y_train), (X_test, y_test) = cifar100.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0

#모델 구성부분
class CustomModel(keras.Model):
    def __init__(self):
        
        super().__init__()
        self.conv1 = keras.layers.Conv2D(16, 3, activation='relu')
        self.pool = keras.layers.MaxPool2D((2,2))
        self.conv2 = keras.layers.Conv2D(32, 3, activation='relu')
        self.flatten = keras.layers.Flatten()
        self.fc1 = keras.layers.Dense(256, activation='relu')
        self.fc2 = keras.layers.Dense(100, activation='softmax')
        
        
        
    def call(self, x):
        
        x = self.conv1(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.pool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        
        return x
    
model = CustomModel()

In [34]:
model.compile(optimizer = 'adam',
             loss = 'sparse_categorical_crossentropy',
             metrics = ['accuracy'])

- 위와 같이 loss, optimizer를 지정해주면 내부적으로 매 스텝이 진행될 때마다 model.compile() 안에서 자동으로 학습이 진행된다.

In [35]:
loss_func = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

#tf.GradientTape() 를 활용한 train step
def train_step(features, labels):
    with tf.GradientTape() as tape:
        predictions = model(features)
        loss = loss_func(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

In [36]:
model.fit(X_train, y_train, epochs=5, batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f06261f06d0>

In [38]:
import time
def train_model(batch_size=32):
    start = time.time()
    for epoch in range(5):
        X_batch = []
        y_batch = []
        for step, (x, y) in enumerate(zip(X_train, y_train)):
            if step % batch_size == batch_size-1:
                X_batch.append(x)
                y_batch.append(y)
                loss = train_step(np.array(X_batch, dtype=np.float32), np.array(y_batch, dtype=np.float32))
                X_batch = []
                y_batch = []
        print('Epoch %d: last batch loss = %.4f' % (epoch, float(loss)))
    print("It took {} seconds".format(time.time() - start))

train_model()

Epoch 0: last batch loss = 5.6303
Epoch 1: last batch loss = 4.3588
Epoch 2: last batch loss = 0.0431
Epoch 3: last batch loss = 0.1761
Epoch 4: last batch loss = 2.5353
It took 113.75798654556274 seconds


tf.GradientTape()를 활용하여 model.compile(), model.fit() 안에 있던 한 스텝의 학습 단계를 끄집어내서 자유롭게 재구성할 수 있다.

In [39]:
prediction = model.predict(X_test, batch_size=X_test.shape[0], verbose=1)
temp = sum(np.squeeze(y_test) == np.argmax(prediction, axis=1))
temp/len(y_test)  # Accuracy



0.1848