In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

# MINIST 데이터를 가지고 모델 만들기

In [2]:
mnist = keras.datasets.mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

# 픽셀은 0~255값을 가지므로 정규화를 시켜준다.
X_train, X_test = X_train/255, X_test/255

X_train.shape, X_test.shape

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


((60000, 28, 28), (10000, 28, 28))

In [3]:
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

X_train.shape, X_test.shape

((60000, 28, 28, 1), (10000, 28, 28, 1))

#### np.newaxis
차원을 늘려준다.    

https://azanewta.tistory.com/3

### Sequential Model
sequential model을 구성해보자.

In [4]:
??keras.layer.Conv2D

Object `keras.layer.Conv2D` not found.


In [5]:
model = keras.models.Sequential([
    keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)), # 32 채널, 커널크기 3
    keras.layers.Conv2D(64, 3, activation='relu'),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10, activation='softmax'),
])

#### Conv2D
https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D
#### Flatten
https://www.tensorflow.org/api_docs/python/tf/keras/layers/Flatten

In [6]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5)
model.evaluate(X_test, y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 2s - loss: 0.0730 - accuracy: 0.9827


[0.07301543653011322, 0.982699990272522]

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
flatten (Flatten)            (None, 36864)             0         
_________________________________________________________________
dense (Dense)                (None, 128)               4718720   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________


https://www.tensorflow.org/guide/keras/sequential_model?hl=ko#sequential_%EB%AA%A8%EB%8D%B8%EC%9D%84_%EC%82%AC%EC%9A%A9%ED%95%98%EB%8A%94_%EA%B2%BD%EC%9A%B0     

sequential 모델의 입력 형상을 알고 있는 경우 input_shape를 지정해주는 것이 좋다.

### Functional

In [8]:
inputs = keras.Input(shape=(28,28,1))
x = keras.layers.Conv2D(32,3,activation='relu')(inputs)
x = keras.layers.Conv2D(64,3,activation='relu')(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(128, activation='relu')(x)
outputs = keras.layers.Dense(10, activation='relu')(x)
model = keras.Model(inputs=inputs, outputs=outputs)

In [9]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
flatten_1 (Flatten)          (None, 36864)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               4718720   
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
___________________________________________________

In [10]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5)
model.evaluate(X_test, y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 2.3026 - accuracy: 0.0980


[2.30259108543396, 0.09799999743700027]

### Subclassing

In [11]:
class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = keras.layers.Conv2D(32,3,activation='relu')
        self.conv2 = keras.layers.Conv2D(64,3,activation='relu')
        self.flatten = keras.layers.Flatten()
        self.dense1 = keras.layers.Dense(128, activation='relu')
        self.dense2 = keras.layers.Dense(10, activation='relu')
        
    def call(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)
        return x

In [12]:
model = MyModel()

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5)
model.evaluate(X_test, y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 2.3026 - accuracy: 0.0980


[2.30259108543396, 0.09799999743700027]

# CIFAR-100 으로 모델 만들기

In [13]:
cifar100 = keras.datasets.cifar100

(X_train, y_train), (X_test, y_test) = cifar100.load_data()
X_train, X_test = X_train/255.0, X_test/255.0
X_train.shape, X_test.shape

((50000, 32, 32, 3), (10000, 32, 32, 3))

### Sequential Model
sequential model을 구성해보자.

In [14]:
model = keras.models.Sequential([
    keras.layers.Conv2D(16, 3, activation='relu'),
    keras.layers.MaxPool2D(pool_size=(2,2)),
    keras.layers.Conv2D(32, 3, activation='relu'),
    keras.layers.MaxPool2D(pool_size=(2,2)),
    keras.layers.Flatten(),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(100, activation='softmax')
])

In [15]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5)
model.evaluate(X_test, y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 2.5785 - accuracy: 0.3554


[2.5784811973571777, 0.3553999960422516]

### Functional

In [21]:
inputs = keras.Input(shape=(32,32,3))
x = keras.layers.Conv2D(16, 3, activation='relu')(inputs)
x = keras.layers.MaxPool2D(pool_size=(2,2))(x)
x = keras.layers.Conv2D(32, 3, activation='relu')(x)
x = keras.layers.MaxPool2D(pool_size=(2,2))(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(256, activation='relu')(x)
outputs = keras.layers.Dense(100, activation='softmax')(x)
model = keras.Model(inputs = inputs, outputs = outputs)

In [22]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5)
model.evaluate(X_test, y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 2.5459 - accuracy: 0.3658


[2.545853614807129, 0.36579999327659607]

### Subclassing

In [23]:
class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = keras.layers.Conv2D(16, 3, activation='relu')
        self.maxpool1 = keras.layers.MaxPool2D((2,2))
        self.conv2 = keras.layers.Conv2D(32, 3, activation='relu')
        self.maxpool2 = keras.layers.MaxPool2D((2,2))
        self.flatten = keras.layers.Flatten()
        self.dense1 = keras.layers.Dense(256, activation='relu')
        self.dense2 = keras.layers.Dense(100, activation='softmax')
    
    def call(self, x):
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.maxpool2(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)
        return x

In [24]:
model = MyModel()

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5)
model.evaluate(X_test, y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 2.6542 - accuracy: 0.3418


[2.6542084217071533, 0.3418000042438507]

## GradientTape
자동 미분을 하는 기능    
동적으로 Gradient 값들을 확인해 볼 수 있다.    
https://teddylee777.github.io/tensorflow/gradient-tape

In [31]:
# loss function 정의
loss_func = tf.keras.losses.SparseCategoricalCrossentropy()
# optimizer 정의
optimizer = tf.keras.optimizers.Adam()

def train_step(features, labels):
    with tf.GradientTape() as tape:
        # 1. 예측
        predictions = model(features)
        # 2. loss 계산
        loss = loss_func(labels, predictions)
        # 3. gradient 계산
        gradients = tape.gradient(loss, model.trainable_variables)
    
    # 4. backpropagation = weight 업데이트
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss    

In [32]:
import time

def train_model(batch_size=32):
    start = time.time()
    for epoch in range(5):
        x_batch=[]
        y_batch=[]
        for step, (x,y) in enumerate(zip(X_train, y_train)):
            if step % batch_size == batch_size-1:
                x_batch.append(x)
                y_batch.append(y)
                loss = train_step(np.array(x_batch, dtype=np.float32), np.array(y_batch, dtype=np.float32))
                x_batch = []
                y_batch = []
        print(f'epoch:{epoch}, last batch loss:{loss:.4f}')
    print(f'end time:{time.time()-start}')

train_model()

epoch:0, last batch loss:4.5271
epoch:1, last batch loss:1.6532
epoch:2, last batch loss:0.0560
epoch:3, last batch loss:1.2766
epoch:4, last batch loss:2.3966
end time:89.20607662200928


In [34]:
predict = model.predict(X_test, batch_size=X_test.shape[0], verbose=1)
sum(np.squeeze(y_test) == np.argmax(predict, axis=1))/len(y_test)



0.1774

#### np.squeeze??
차원 축소하는 함수
np.squeeze(배열, 축) : 지정된 축의 차원을 축소하는데 축을 입력하지 않으면 1차원으로 축소됨.