# Tensorflow datasets 실습
### 모델
- Sequential API
- Functional API

### 학습
- train_on_batch
- GradientTape

### 사용할 데이터
- Cifar-10

## 데이터셋 만들기

In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds

In [2]:
train_datasets = tfds.load('cifar10', split='train')
test_datasets = tfds.load('cifar10', split='test')

[1mDownloading and preparing dataset cifar10/3.0.2 (download: 162.17 MiB, generated: 132.40 MiB, total: 294.58 MiB) to /root/tensorflow_datasets/cifar10/3.0.2...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]






0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/cifar10/3.0.2.incompleteN7FGLV/cifar10-train.tfrecord


  0%|          | 0/50000 [00:00<?, ? examples/s]

0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/cifar10/3.0.2.incompleteN7FGLV/cifar10-test.tfrecord


  0%|          | 0/10000 [00:00<?, ? examples/s]

[1mDataset cifar10 downloaded and prepared to /root/tensorflow_datasets/cifar10/3.0.2. Subsequent calls will reuse this data.[0m


In [3]:
train_datasets

<PrefetchDataset shapes: {id: (), image: (32, 32, 3), label: ()}, types: {id: tf.string, image: tf.uint8, label: tf.int64}>

In [4]:
for data in train_datasets.take(5):
    print(data['id'])
    print(data['image'].shape)
    print(data['label'])

tf.Tensor(b'train_16399', shape=(), dtype=string)
(32, 32, 3)
tf.Tensor(7, shape=(), dtype=int64)
tf.Tensor(b'train_01680', shape=(), dtype=string)
(32, 32, 3)
tf.Tensor(8, shape=(), dtype=int64)
tf.Tensor(b'train_47917', shape=(), dtype=string)
(32, 32, 3)
tf.Tensor(4, shape=(), dtype=int64)
tf.Tensor(b'train_17307', shape=(), dtype=string)
(32, 32, 3)
tf.Tensor(4, shape=(), dtype=int64)
tf.Tensor(b'train_27051', shape=(), dtype=string)
(32, 32, 3)
tf.Tensor(6, shape=(), dtype=int64)


In [5]:
def preprocessing(data):
    image = tf.cast(data['image'], tf.float32) / 255. 
    label = data['label']
    return image, label

In [6]:
type(train_datasets)

tensorflow.python.data.ops.dataset_ops.PrefetchDataset

In [7]:
BATCH_SIZE = 128
train_data = train_datasets.map(preprocessing).shuffle(1000).batch(BATCH_SIZE)
test_data = test_datasets.map(preprocessing).batch(BATCH_SIZE)

In [8]:
train_data

<BatchDataset shapes: ((None, 32, 32, 3), (None,)), types: (tf.float32, tf.int64)>

In [9]:
for image, label in train_data.take(1):
    print(image.shape)
    print(label.shape)

(128, 32, 32, 3)
(128,)


## Sequential

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten

In [11]:
model = Sequential([
    Conv2D(32, 3, activation='relu', input_shape=(32,32,3)),
    MaxPooling2D(2, 2),
    Conv2D(64, 3, activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 2304)              0         
_________________________________________________________________
dense (Dense)                (None, 32)                73760     
_________________________________________________________________
dense_1 (Dense)              (None, 10)                3

In [13]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

In [14]:
model.fit(train_data, validation_data=test_data, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fef142234d0>

## Functional API

In [15]:
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model 

In [16]:
input_ = Input(shape=(32,32,3))

In [19]:
x = Conv2D(32, 3, activation='relu')(input_)
x = MaxPooling2D(2, 2)(x)
x = Conv2D(64, 3, activation='relu')(x)
x = MaxPooling2D(2, 2)(x)
x = Flatten()(x)
x = Dense(32, activation='relu')(x)
x = Dense(10, activation='softmax')(x)

In [20]:
model = Model(input_, x)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 2304)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                73760 

In [21]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

In [22]:
model.fit(train_data, validation_data=test_data, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7feef47dc510>

## train_on_batch

In [23]:
model = Sequential([
    Conv2D(32, 3, activation='relu', input_shape=(32,32,3)),
    MaxPooling2D(2, 2),
    Conv2D(64, 3, activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

In [24]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

In [25]:
EPOCHS = 10

for epoch in range(EPOCHS):
    for batch, (image, label) in train_data.enumerate():
        history = model.train_on_batch(image, label)
        print(f'epoch: {epoch+1}, batch: {batch+1}, loss: {history[0]:.4f}, acc: {history[1]:.4f}')

epoch: 1, batch: 1, loss: 2.2868, acc: 0.0859
epoch: 1, batch: 2, loss: 2.3347, acc: 0.0625
epoch: 1, batch: 3, loss: 2.2812, acc: 0.1719
epoch: 1, batch: 4, loss: 2.3091, acc: 0.0781
epoch: 1, batch: 5, loss: 2.2827, acc: 0.1719
epoch: 1, batch: 6, loss: 2.2908, acc: 0.1406
epoch: 1, batch: 7, loss: 2.2877, acc: 0.1250
epoch: 1, batch: 8, loss: 2.3117, acc: 0.1016
epoch: 1, batch: 9, loss: 2.2871, acc: 0.0938
epoch: 1, batch: 10, loss: 2.2826, acc: 0.1328
epoch: 1, batch: 11, loss: 2.2839, acc: 0.1328
epoch: 1, batch: 12, loss: 2.2800, acc: 0.1094
epoch: 1, batch: 13, loss: 2.2665, acc: 0.1484
epoch: 1, batch: 14, loss: 2.2645, acc: 0.1250
epoch: 1, batch: 15, loss: 2.2603, acc: 0.2188
epoch: 1, batch: 16, loss: 2.2760, acc: 0.1406
epoch: 1, batch: 17, loss: 2.2783, acc: 0.1172
epoch: 1, batch: 18, loss: 2.2695, acc: 0.1484
epoch: 1, batch: 19, loss: 2.2497, acc: 0.2109
epoch: 1, batch: 20, loss: 2.2462, acc: 0.1562
epoch: 1, batch: 21, loss: 2.2244, acc: 0.1875
epoch: 1, batch: 22, l

## GradientTape

In [26]:
model = Sequential([
    Conv2D(32, 3, activation='relu', input_shape=(32,32,3)),
    MaxPooling2D(2, 2),
    Conv2D(64, 3, activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

In [27]:
loss_function = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

In [28]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_acc = tf.keras.metrics.SparseCategoricalAccuracy(name='train_acc')
valid_loss = tf.keras.metrics.Mean(name='valid_loss')
valid_acc = tf.keras.metrics.SparseCategoricalAccuracy(name='valid_acc')

In [29]:
@tf.function
def train_step(image, label):
    with tf.GradientTape() as tape:
        prediction = model(image, training=True)
        loss = loss_function(label, prediction)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_acc(label, prediction)

In [30]:
@tf.function
def valid_step(image, label):
    prediction = model(image, training=False)
    loss = loss_function(label, prediction)

    valid_loss(loss)
    valid_acc(label, prediction)

In [31]:
EPOCHS = 10

for epoch in range(EPOCHS):
    train_loss.reset_states()
    train_acc.reset_states()
    valid_loss.reset_states()
    valid_acc.reset_states()

    for image, label in train_data:
        train_step(image, label)
    for image, label in test_data:
        valid_step(image, label)

    print(f'epoch: {epoch+1}, loss: {train_loss.result():.4f}, acc: {train_acc.result():.4f} \
    val_loss: {valid_loss.result():.4f}, val_acc: {valid_acc.result():.4f}')


epoch: 1, loss: 1.6439, acc: 0.4044     val_loss: 1.3599, val_acc: 0.5143
epoch: 2, loss: 1.3005, acc: 0.5364     val_loss: 1.2301, val_acc: 0.5565
epoch: 3, loss: 1.1662, acc: 0.5890     val_loss: 1.1310, val_acc: 0.6038
epoch: 4, loss: 1.0792, acc: 0.6249     val_loss: 1.0725, val_acc: 0.6304
epoch: 5, loss: 1.0214, acc: 0.6460     val_loss: 1.0100, val_acc: 0.6531
epoch: 6, loss: 0.9739, acc: 0.6621     val_loss: 0.9882, val_acc: 0.6625
epoch: 7, loss: 0.9365, acc: 0.6751     val_loss: 0.9763, val_acc: 0.6625
epoch: 8, loss: 0.8985, acc: 0.6883     val_loss: 0.9419, val_acc: 0.6777
epoch: 9, loss: 0.8761, acc: 0.6971     val_loss: 0.9569, val_acc: 0.6693
epoch: 10, loss: 0.8483, acc: 0.7069     val_loss: 0.9325, val_acc: 0.6824
