# 전이학습 : 사전훈련된 신경망 모델을 가져와 해결하려는 task의 시작점으로 삼는 것
* 방대한 이미지 데이터로 사전 학습된 신경망 모델의 가중치 및 특성 맵을 가져와 문제에 적용한다.

In [14]:
# 1. Pre-trained VGG-16
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.preprocessing import image
from keras.applications import imagenet_utils
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input, decode_predictions
from keras.applications import mobilenet
from tensorflow.keras.optimizers import Adam, SGD
from keras.metrics import categorical_crossentropy
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import preprocessing
from keras.layers import Dense, Flatten, Dropout, BatchNormalization
from keras.models import Model

from sklearn.metrics import confusion_matrix
from sklearn.datasets import load_files
from keras.utils import np_utils
from tqdm import tqdm
import numpy as np
import itertools
import matplotlib.pyplot as plt

In [2]:
# 전이학습 구현
# 1) 가중치를 포함한 가져올 모델의 오픈소스 구현을 내려받아 base_model을 만든다
#    include_top=False --> 분류기 부분의 가중치는 내려받지 않는다 --> 직접 우리가 분류기 및 가중치 설정

# 2) model.summary()로 사전 훈련된 모델의 구조를 확인한다.

# 3) 사전 훈련된 층의 가중치는 고정시킨다(freeze)

# 4) 분류기 부분을 새로 구현해서 추가한다



# 사전 훈련된 VGG-16모델을 활용한 분류기 구현

# 1)
base_model = VGG16(weights='imagenet', include_top=False,
                   input_shape=(224,224,3)) # base_model에 imagenet데이터셋으로 학습된 가중치를 내려받음
                                            # include_top=False --> 분류기 부분의 가중치는 받지 않겠다!
                                            # input_shape : 우리 데이터셋의 shape 지정
# 2)                                          
print(base_model.summary())                                  

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)    

In [3]:
# 3) 
for layer in base_model.layers:
  layer.trainable = False
print(base_model.summary())

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [4]:
# 4) 
last_layer = base_model.get_layer('block5_pool')   # 신경망의 마지막 층(block5_pool)에 접근
last_output = last_layer.output   # 마지막 층의 출력을 변수로 가져옴

x = Flatten()(last_output)
x = Dense(2, activation='softmax', name='softmax')(x)  # class 2개를 분류하는, softmax 함수를 활성화함수로 갖는 층 추가

new_model = Model(inputs=base_model.input, outputs=x)
print(new_model.summary())

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

# 전이학습의 3가지 방식
## 1. 사전학습된 신경망을 분류기로 이용하기
## 2. 사전학습된 신경망을 특징 추출기로만 이용하기
## 3. 미세조정(fine-tuning)하기

In [5]:
# 1. 사전학습된 신경망을 분류기로 이용하기
# --> 사전 학습된 신경망의 가중치 freeze 및 추가학습이 필요하지 않음
# --> 비슷한 과업에 대해 사전학습된 신경망을 골라 직접 새로운 과업에 바로 투입(분류기 포함)
# --> 사전 학습 신경망에 쓰인 데이터셋과 새로운 데이터셋의 도메인이 매우 유사한 경우 사용

model_2 = VGG16(weights='imagenet', include_top=True, input_shape=(224,224,3)) # include_top=True

# 개(German_shepherd)를 사전학습된 VGG16 모델이 제대로 분류하는지 체크
image = load_img('/content/drive/MyDrive/com_vision_study/data/dog.jpg', target_size=(224,224))
image = img_to_array(image) # img_to_array : 이미지의 픽셀값을 numpy array 형태로 변환
print(image.shape)
image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
print(type(image))

image = preprocess_input(image)   # vgg16의 모듈 --> VGG모델 입력을 위해 numpy array데이터를 전처리

yhat = model_2.predict(image)
label = decode_predictions(yhat) # predict로 부터 예측 클래스 추출
label = label[0][0]  # 가장 확률이 높은 클래스값 추출

print(f'{label[1]} :  {label[2]}')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
(224, 224, 3)
<class 'numpy.ndarray'>
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json
German_shepherd :  0.9971912503242493


In [6]:
# 2. 사전학습된 신경망을 특징 추출기로만 이용하기
# 실습 - VGG16을 활용해 개/고양이 분류


# 이미지를 가져와 데이터 전처리
train_path = '/content/drive/MyDrive/com_vision_study/data/dogs_vs_cats_project/data/train'
valid_path = '/content/drive/MyDrive/com_vision_study/data/dogs_vs_cats_project/data/valid'
test_path = '/content/drive/MyDrive/com_vision_study/data/dogs_vs_cats_project/data/test'

idg = ImageDataGenerator()

train_batches = idg.flow_from_directory(train_path, target_size=(224,224), batch_size=10)
valid_batches = idg.flow_from_directory(valid_path, target_size=(224,224), batch_size=10)
test_batches = idg.flow_from_directory(test_path, target_size=(224,224), batch_size=10, shuffle=False)

Found 202 images belonging to 2 classes.
Found 103 images belonging to 2 classes.
Found 451 images belonging to 2 classes.


In [7]:
kargs = {'dense_1' : 64, 'dropout_rate' : 0.5, 'dense_2' : 2}

def pretrained_vgg_16_cat_dog(**kargs):
  base_model = VGG16(weights='imagenet', include_top=False,
                     input_shape=(224,224,3))
  
  for layer in base_model.layers:
    layer.trainable = False    # 사전학습 층 고정
  
  last_layer = base_model.get_layer('block5_pool')
  last_output = last_layer.output

  x = Flatten()(last_output)  # last_output을 Dense layer에서 쓰이게끔 Flatten()
  x = Dense(kargs['dense_1'], activation='relu', name='FC_2')(x)
  x = BatchNormalization()(x)
  x = Dropout(kargs['dropout_rate'])(x)
  x = Dense(kargs['dense_2'], activation='softmax', name='softmax')(x)

  new_model = Model(inputs=base_model.input, outputs=x)
  print(new_model.summary())

  return new_model  

model_3 = pretrained_vgg_16_cat_dog(**kargs)
model_3.compile(Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['acc'])
model_3.fit_generator(train_batches, steps_per_epoch=4, validation_data=valid_batches, 
                      validation_steps=2, epochs=20, verbose=2)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

  super(Adam, self).__init__(name, **kwargs)


Epoch 1/20
4/4 - 52s - loss: 1.4145 - acc: 0.5000 - val_loss: 2.2202 - val_acc: 0.6000 - 52s/epoch - 13s/step
Epoch 2/20
4/4 - 43s - loss: 0.6692 - acc: 0.6750 - val_loss: 2.3883 - val_acc: 0.6500 - 43s/epoch - 11s/step
Epoch 3/20
4/4 - 27s - loss: 0.6179 - acc: 0.7750 - val_loss: 1.0251 - val_acc: 0.8000 - 27s/epoch - 7s/step
Epoch 4/20
4/4 - 25s - loss: 0.3398 - acc: 0.8500 - val_loss: 1.1068 - val_acc: 0.7500 - 25s/epoch - 6s/step
Epoch 5/20
4/4 - 16s - loss: 0.6212 - acc: 0.7500 - val_loss: 1.3291 - val_acc: 0.8000 - 16s/epoch - 4s/step
Epoch 6/20
4/4 - 10s - loss: 0.2466 - acc: 0.8500 - val_loss: 0.4516 - val_acc: 0.8500 - 10s/epoch - 3s/step
Epoch 7/20
4/4 - 11s - loss: 0.1773 - acc: 0.9500 - val_loss: 0.4897 - val_acc: 0.8500 - 11s/epoch - 3s/step
Epoch 8/20
4/4 - 5s - loss: 0.3404 - acc: 0.8750 - val_loss: 0.4112 - val_acc: 0.8000 - 5s/epoch - 1s/step
Epoch 9/20
4/4 - 7s - loss: 0.2564 - acc: 0.8750 - val_loss: 0.3418 - val_acc: 0.8000 - 7s/epoch - 2s/step
Epoch 10/20
4/4 - 4s 

<keras.callbacks.History at 0x7f8f66774790>

In [15]:
# 데이터셋을 가져와 이미지, 정답레이블로 분류하는 함수
def load_dataset(path):
  data = load_files(path)
  paths = np.array(data['filenames']) # 이미지 path
  targets = np_utils.to_categorical(np.array(data['target'])) # 정답값
  return paths, targets

# test_files(테스트 이미지 데이터)를 텐서 형태로 변환하는 함수
def path_to_tensor(img_path):
  img = preprocessing.image.load_img(img_path, target_size=(224,224))  # PIL.Image.Image타입으로 데이터 로드  (224,224,3)
  x = image.img_to_array(img)  # (224,224,3)
  return np.expand_dims(x, axis=0) # (1, 224,224,3)

def paths_to_tensor(img_paths):
  list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
  return np.vstack(list_of_tensors)

test_files, test_targets = load_dataset('/content/drive/MyDrive/com_vision_study/data/dogs_vs_cats_project/data/test')
test_tensors = preprocess_input(paths_to_tensor(test_files))


# test
model_3.evaluate(test_tensors, test_targets)



  0%|          | 0/451 [00:00<?, ?it/s][A[A

  7%|▋         | 30/451 [00:00<00:01, 293.24it/s][A[A

 13%|█▎        | 60/451 [00:00<00:01, 274.88it/s][A[A

 20%|█▉        | 88/451 [00:00<00:01, 240.29it/s][A[A

 25%|██▌       | 113/451 [00:00<00:01, 177.03it/s][A[A

 29%|██▉       | 133/451 [00:00<00:01, 171.79it/s][A[A

 35%|███▍      | 156/451 [00:00<00:01, 185.58it/s][A[A

 39%|███▉      | 177/451 [00:00<00:01, 191.25it/s][A[A

 45%|████▌     | 204/451 [00:00<00:01, 209.77it/s][A[A

 51%|█████     | 228/451 [00:01<00:01, 217.76it/s][A[A

 56%|█████▌    | 252/451 [00:01<00:00, 222.72it/s][A[A

 61%|██████    | 276/451 [00:01<00:00, 226.42it/s][A[A

 66%|██████▋   | 299/451 [00:01<00:00, 224.14it/s][A[A

 72%|███████▏  | 323/451 [00:01<00:00, 227.61it/s][A[A

 77%|███████▋  | 346/451 [00:01<00:00, 227.05it/s][A[A

 82%|████████▏ | 369/451 [00:01<00:00, 227.48it/s][A[A

 87%|████████▋ | 392/451 [00:01<00:00, 225.59it/s][A[A

 92%|█████████▏| 415/451 [



[0.13667601346969604, 0.9467849135398865]

In [16]:
# 3. 미세조정(fine-tuning)하기

# 실습2 - VGG16 & 수화데이터셋(정답10개 : 0~9)
# VGG16의 사전 훈련된 데이터셋과 수화데이터셋(task로 주어진 데이터셋)이 다르므로 fine_tuning 필요

train_path = '/content/drive/MyDrive/com_vision_study/data/sign_language_project/dataset/train'
valid_path = '/content/drive/MyDrive/com_vision_study/data/sign_language_project/dataset/valid'
test_path = '/content/drive/MyDrive/com_vision_study/data/sign_language_project/dataset/test'

idg_2 = ImageDataGenerator()

train_batches = idg_2.flow_from_directory(train_path, target_size=(224,224), batch_size=10)
valid_batches = idg_2.flow_from_directory(valid_path, target_size=(224,224), batch_size=30)
test_batches = idg_2.flow_from_directory(test_path, target_size=(224,224), batch_size=50, shuffle=False)

Found 1712 images belonging to 10 classes.
Found 300 images belonging to 10 classes.
Found 50 images belonging to 10 classes.


In [17]:
kargs_2 = {'dense' : 10, 'layer_name' : 'softmax'}

def fine_tuned_vgg_16(**kargs_2):
  base_model = VGG16(weights='imagenet', include_top=False,
                     input_shape=(224,224,3), pooling='avg')
  # pooling='avg' --> 마지막 합성곱층의 출력에 AveragePooling을 적용하라는 뜻

  for layer in base_model.layers[:-5]:     # layer[:-5] 전까지만 freeze 나머지는 분류기와 함께 fine_tuning
    layer.trainable=False
  
  last_output = base_model.output
  x = Dense(kargs_2['dense'], activation='softmax', name=kargs_2['layer_name'])(last_output) # 분류기
  new_model = Model(inputs=base_model.input, outputs=x)

  print(new_model.summary())
  return new_model

model_4 = fine_tuned_vgg_16(**kargs_2)
model_4.compile(Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['acc'])
es = EarlyStopping(monitor='val_acc', min_delta=0.0001, patience=4, verbose=0, restore_best_weights=True)
cp = ModelCheckpoint('pretrained_vgg16_handsign.h5', monitor='val_acc', save_best_only=True, save_weights_only=True)

model_4.fit_generator(train_batches, steps_per_epoch=18, validation_data=valid_batches,
                      validation_steps=3, epochs=20, verbose=1, callbacks=[es,cp])

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

  super(Adam, self).__init__(name, **kwargs)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20


<keras.callbacks.History at 0x7f900ea28090>

In [18]:
test_files, test_targets = load_dataset('/content/drive/MyDrive/com_vision_study/data/sign_language_project/dataset/test')
test_tensors = preprocess_input(paths_to_tensor(test_files))


model_4.load_weights('pretrained_vgg16_handsign.h5')
model_4.evaluate(test_tensors, test_targets)


  0%|          | 0/50 [00:00<?, ?it/s][A
100%|██████████| 50/50 [00:00<00:00, 318.80it/s]




[0.3023955821990967, 0.9399999976158142]