<a href="https://colab.research.google.com/github/guscldns/TestProject/blob/main/0711/11_ensemble_basic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50

In [None]:
from tensorflow.keras import datasets, layers, models

from tensorflow.keras.layers import Dense, Flatten, MaxPooling2D
from tensorflow.keras import Input
from tensorflow.keras.layers import Dropout, BatchNormalization

import matplotlib.pyplot as plt

In [None]:
# 케라스 데이터셋을 다운받아 변수에 각각 넣어준다.
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
train_images.shape, train_labels.shape, test_images.shape, test_labels.shape

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))

In [None]:
# 라벨 설정
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

## 전이학습 (transfer learning)

### resnet50_ver1

In [None]:
resnet_v1 = ResNet50(include_top=False, input_shape = (32,32 ,3), weights = 'imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
resnet_v1.output_shape

(None, 1, 1, 2048)

1) include_top = True, 분류기(완전연결계층) 여부  
2) weights = 'imagenet', 사전학습 weight 여부  
3) input_shape, Input 사이즈 (32, 32, 3)으로 변경

In [None]:
resnet_v1.trainable = False

In [None]:
# sparse categorical crossentropy VS categorical crossentropy + one_hot
y_train = tf.keras.utils.to_categorical(train_labels, 10)
y_test = tf.keras.utils.to_categorical(test_labels, 10)
y_train.shape, y_test.shape

((50000, 10), (10000, 10))

In [None]:
y_train[0]

array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.], dtype=float32)

In [None]:
# model.Sequential()없이 모델 구축하는 방법
inputs = tf.keras.Input(shape=(32, 32, 3))

x = resnet_v1(inputs, training=False)
x = tf.keras.layers.Flatten(input_shape=resnet_v1.output_shape[1:])(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
x= tf.keras.layers.Dropout(0.5)(x)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

model_v1 = tf.keras.Model(inputs, outputs)

In [None]:
# categorical_crossentropy
model_v1.compile(optimizer = tf.keras.optimizers.Adam(learning_rate= 0.001),
                  loss = 'categorical_crossentropy',
                  metrics=['accuracy'])

# 모델 fitting
model_v1.fit(train_images, y_train, epochs = 1, validation_data=(test_images, y_test), batch_size=128)



<keras.callbacks.History at 0x7fb0ae328b20>

In [None]:
pred_v1 = model_v1.predict(test_images)



In [None]:
pred_v1.shape

(10000, 10)

In [None]:
pred_v1[0]

array([0.00859409, 0.00521369, 0.10049511, 0.61686987, 0.03776868,
       0.17377445, 0.04552881, 0.00857076, 0.00140559, 0.00177885],
      dtype=float32)

### resnet_v2

In [None]:
resnet_v2 = ResNet50(include_top=False, input_shape = (32,32 ,3), weights = 'imagenet')

In [None]:
resnet_v2.output_shape

(None, 1, 1, 2048)

1) include_top = True, 분류기(완전연결계층) 여부  
2) weights = 'imagenet', 사전학습 weight 여부  
3) input_shape, Input 사이즈 (32, 32, 3)으로 변경

In [None]:
resnet_v2.trainable = False

In [None]:
# sparse categorical crossentropy VS categorical crossentropy + one_hot
y_train = tf.keras.utils.to_categorical(train_labels, 10)
y_test = tf.keras.utils.to_categorical(test_labels, 10)
y_train.shape, y_test.shape

((50000, 10), (10000, 10))

In [None]:
y_train[0]

array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.], dtype=float32)

In [None]:
# model.Sequential()없이 모델 구축하는 방법
inputs = tf.keras.Input(shape=(32, 32, 3))

x = resnet_v1(inputs, training=False)
x = tf.keras.layers.Flatten(input_shape=resnet_v2.output_shape[1:])(x)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
x= tf.keras.layers.Dropout(0.5)(x)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

model_v2 = tf.keras.Model(inputs, outputs)

In [None]:
# categorical_crossentropy
model_v2.compile(optimizer = tf.keras.optimizers.Adam(learning_rate= 0.001),
                  loss = 'categorical_crossentropy',
                  metrics=['accuracy'])

# 모델 fitting
model_v2.fit(train_images, y_train, epochs = 1, validation_data=(test_images, y_test), batch_size=128)



<keras.callbacks.History at 0x7fb0aeb94df0>

In [None]:
pred_v2 = model_v2.predict(test_images)



In [None]:
pred_v2

array([[4.82589426e-03, 1.18165165e-02, 2.61181947e-02, ...,
        1.18371984e-02, 6.11204188e-03, 2.87322956e-03],
       [6.68133944e-02, 4.74359304e-01, 2.31746471e-05, ...,
        2.77411546e-05, 4.31680530e-01, 2.69427504e-02],
       [9.06165410e-03, 9.80799645e-03, 4.13161906e-05, ...,
        2.27158249e-04, 9.56489205e-01, 2.23382320e-02],
       ...,
       [7.47342361e-04, 5.42136899e-04, 2.79386248e-02, ...,
        2.25149896e-02, 8.95141638e-05, 4.29548323e-04],
       [2.49304444e-01, 1.80455655e-01, 9.79267135e-02, ...,
        1.04320630e-01, 2.91921236e-02, 6.34766966e-02],
       [7.55169021e-05, 1.55142698e-04, 1.80253817e-03, ...,
        9.42924917e-01, 1.08129439e-04, 2.82261812e-04]], dtype=float32)

In [None]:
pred_v1[0]

array([0.00859409, 0.00521369, 0.10049511, 0.61686987, 0.03776868,
       0.17377445, 0.04552881, 0.00857076, 0.00140559, 0.00177885],
      dtype=float32)

In [None]:
pred_v2[0]

array([0.00482589, 0.01181652, 0.02611819, 0.6104309 , 0.02315335,
       0.27416113, 0.02867152, 0.0118372 , 0.00611204, 0.00287323],
      dtype=float32)

### 앙상블

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

In [None]:
pred_ensemble = np.mean([pred_v1, pred_v2], axis=0)
pred_ensemble

array([[5.14482241e-03, 1.28384186e-02, 5.39679006e-02, ...,
        1.25428420e-02, 5.52075636e-03, 2.18478404e-03],
       [5.32471575e-02, 5.49458742e-01, 4.16760595e-05, ...,
        3.11002441e-05, 3.31701517e-01, 6.51690662e-02],
       [2.27380320e-02, 1.18470499e-02, 8.82680179e-05, ...,
        2.46991927e-04, 9.20271397e-01, 4.31859568e-02],
       ...,
       [1.06554355e-04, 9.13288735e-04, 5.97864017e-02, ...,
        2.19920874e-02, 1.30776491e-04, 4.93414700e-04],
       [7.14453608e-02, 9.52197686e-02, 1.31609440e-01, ...,
        2.16381013e-01, 3.51665355e-02, 5.49287498e-02],
       [1.69369712e-04, 8.18765402e-05, 3.23846028e-03, ...,
        9.17732120e-01, 1.15710545e-05, 9.93263966e-05]], dtype=float32)

In [None]:
pred_ensemble = np.argmax(pred_ensemble, axis=1)

In [None]:
y_test = np.argmax(y_test, axis=1)

In [None]:
y_test.shape, pred_ensemble.shape

((10000,), (10000,))

In [None]:
accuracy_score(pred_ensemble, y_test)

0.5997

In [None]:
# 단일 모델과 비교(앙상블이 일반적으로 올라간다(= 낮아지지 않는다))
accuracy_score(np.argmax(pred_v1, axis=1), y_test)

0.5663

In [None]:
accuracy_score(np.argmax(pred_v2, axis=1), y_test)

0.5942