# Video Summarizer 모델 학습/평가 코드

In [8]:
import os
import math
import numpy as np

## 경로 설정

In [9]:
dataset_dir = r'E:\Work\YasuoNet\data\dataset14_sl3_vsr2_vw64_vh64_asr22050_mfcc'
ckpt_dir = 'checkpoints'

## 데이터 로더 생성

In [10]:
from data_loader import DataLoader

# for basic model
# data_loader = DataLoader(dataset_dir, x_includes=['video', 'audio'])
# for sequence model
data_loader = DataLoader(dataset_dir, x_includes=['video', 'audio'], x_expand=2) # 앞 2개, 뒤 2개 segment 포함

data_config = data_loader.get_metadata()['config']
input_shape_dict = data_loader.get_metadata()['data_shape']

## 하이퍼파라미터 설정

In [11]:
learning_rate = 1e-4
epochs = 200
batch_size = 256
class_weights = (1, 1)

## 모델 생성

In [12]:
from tensorflow.keras.layers import Dense, Dropout, Conv3D, Conv2D, Input, MaxPool3D, MaxPool2D, Flatten, concatenate
from tensorflow.keras.layers import TimeDistributed, LSTM
from tensorflow.keras.backend import expand_dims
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model

def build_basic_model(input_shape_dict):
    video_input_shape = input_shape_dict['video']
    audio_input_shape = input_shape_dict['audio']
    weight_decay = 0.005

    # Video 3D Conv layers
    video_input = Input(video_input_shape)
    x = video_input
    x = Conv3D(8, (3, 3, 3), strides=(1, 1, 1), padding='same', activation='relu', kernel_initializer='he_uniform', kernel_regularizer=l2(weight_decay))(x)
    x = MaxPool3D((2, 2, 2), strides=(2, 2, 2), padding='same')(x)
    video_output = Flatten()(x)

    # Audio 2D Conv layers
    audio_input = Input(audio_input_shape)
    x = expand_dims(audio_input)    # add channel dim
    x = Conv2D(4, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='he_uniform', kernel_regularizer=l2(weight_decay))(x)
    x = MaxPool2D((2, 2), strides=(2, 2), padding='same')(x)
    audio_output = Flatten()(x)

    # Fully-connected layers
    x = concatenate([video_output, audio_output])
    x = Dense(16, activation='relu', kernel_initializer='he_uniform', kernel_regularizer=l2(weight_decay))(x)
    #     x = Dropout(0.2)(x)
    fc_output = Dense(1, activation='sigmoid', kernel_initializer='he_uniform', kernel_regularizer=l2(weight_decay))(x)

    model = Model(inputs=[video_input, audio_input], outputs=fc_output)

    return model

def build_sequence_model(input_shape_dict):
    video_input_shape = [None,] + input_shape_dict['video']
    audio_input_shape = [None,] + input_shape_dict['audio']
    weight_decay = 0.005

    # Video 3D Conv layers
    video_input = Input(video_input_shape)
    x = video_input
    x = TimeDistributed(Conv3D(8, (3, 3, 3), strides=(1, 1, 1), padding='same', activation='relu', kernel_initializer='he_uniform', kernel_regularizer=l2(weight_decay)))(x)
    x = TimeDistributed(MaxPool3D((2, 2, 2), strides=(2, 2, 2), padding='same'))(x)
    video_output = TimeDistributed(Flatten())(x)
    print(x.shape, video_output.shape)
    
    # Audio 2D Conv layers
    audio_input = Input(audio_input_shape)
    x = expand_dims(audio_input)    # add channel dim
    x = TimeDistributed(Conv2D(4, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='he_uniform', kernel_regularizer=l2(weight_decay)))(x)
    x = TimeDistributed(MaxPool2D((2, 2), strides=(2, 2), padding='same'))(x)
    audio_output = TimeDistributed(Flatten())(x)
    
    # LSTM layers
    x = concatenate([video_output, audio_output])
    print(x.shape)
    x = LSTM(16, activation='relu', kernel_initializer='he_uniform', kernel_regularizer=l2(weight_decay))(x)
    print(x.shape)

    # Fully-connected layers
    x = Dense(16, activation='relu', kernel_initializer='he_uniform', kernel_regularizer=l2(weight_decay))(x)
    #     x = Dropout(0.2)(x)
    fc_output = Dense(1, activation='sigmoid', kernel_initializer='he_uniform', kernel_regularizer=l2(weight_decay))(x)

    model = Model(inputs=[video_input, audio_input], outputs=fc_output)

    return model

In [13]:
# for basic model
# model = build_basic_model(input_shape_dict)
# for sequence model
model = build_sequence_model(input_shape_dict)
model.summary()

(None, None, 3, 32, 32, 8) (None, None, 24576)
(None, None, 29776)
(None, 16)
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, None, 40, 13 0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, None, 6, 64, 0                                            
__________________________________________________________________________________________________
tf_op_layer_ExpandDims_1 (Tenso [(None, None, 40, 13 0           input_4[0][0]                    
__________________________________________________________________________________________________
time_distributed_6 (TimeDistrib (None, None, 6, 64,  656         input_3[0][0]                    
______________

## 모델 학습

In [None]:
from trainer import Trainer
from tensorflow.keras.optimizers import Adam

# 학습 시작
trainer = Trainer(model, data_loader, ckpt_dir)
trainer.train(Adam(learning_rate), epochs, batch_size, class_weights)

Training started at 20200820-224243
optimizer: {'name': 'Adam', 'learning_rate': 0.0001, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}
epochs: 200
batch size: 256
class weights: (1, 1)
normalized class weights: [1. 1.]



HBox(children=(FloatProgress(value=0.0, description='Train 1/200', max=47.0, style=ProgressStyle(description_w…

0.7067966461181641


##  가중치 복원
모델이 선언되어 있을 때 저장된 가중치를 복원

In [19]:
checkpoint_name = 'ckpt-20200819-221154-0056-0.4570'
model.load_weights(os.path.join(ckpt_dir, checkpoint_name + '.h5'))

## 모델 테스트

In [20]:
loss, accuracy, precision, recall, f1score = trainer.test(batch_size)
print(f'loss: {loss:.4f}, accuracy: {accuracy:.4f}, precision: {precision:.4f}, recall: {recall:.4f}, f1score: {f1score:.4f}')

HBox(children=(FloatProgress(value=0.0, description='Test', max=15.0, style=ProgressStyle(description_width='i…


loss: 0.1226, accuracy: 0.9076, precision: 0.5662, recall: 0.6185, f1score: 0.5912


In [21]:
from sklearn.metrics import confusion_matrix, classification_report

y_true, y_pred = trainer.test_prediction(batch_size)

print(f'test data count: {len(y_true)}')
print(f'true_1, pred_1: {y_true.sum(), y_pred.sum()}')
print()
print('Confusion Matrix:')
print(confusion_matrix(y_true, y_pred))
print()
print('Report:')
print(classification_report(y_true, y_pred))

test data count: 3711
true_1, pred_1: (401, 438)

Confusion Matrix:
[[3120  190]
 [ 153  248]]

Report:
              precision    recall  f1-score   support

           0       0.95      0.94      0.95      3310
           1       0.57      0.62      0.59       401

    accuracy                           0.91      3711
   macro avg       0.76      0.78      0.77      3711
weighted avg       0.91      0.91      0.91      3711



## 모델의 모든 정보를 온전하게 저장 / 복원
모델의 가중치 뿐만아니라 모든 레이어 구성 정보를 저장하여 추후 모델 선언부가 없어도 불러와서 사용 가능

### 모델 저장

In [None]:
checkpoint_name = 'ckpt-20200818-124333-0011-0.3412'
model_name = checkpoint_name + '_model'
model_path = os.path.join(ckpt_dir, model_name + '.h5')
print(model_path)

In [None]:
model.save(model_path)

### 모델 복원

In [5]:
checkpoint_name = 'ckpt-20200818-124333-0011-0.3412'
model_name = checkpoint_name + '_model'
model_path = os.path.join(ckpt_dir, model_name + '.h5')
print(model_path)

checkpoints\ckpt-20200818-124333-0011-0.3412_model.h5


In [6]:
from tensorflow.keras.models import load_model

model_restored = load_model(model_path)

In [7]:
model_restored.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 40, 130)]         0         
_________________________________________________________________
tf_op_layer_ExpandDims (Tens (None, 40, 130, 1)        0         
_________________________________________________________________
conv2d (Conv2D)              (None, 40, 130, 4)        40        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 20, 65, 4)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 5200)              0         
_________________________________________________________________
dense (Dense)                (None, 16)                83216     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17  

In [10]:
from trainer import Trainer
trainer = Trainer(model_restored, data_loader, ckpt_dir)

loss, accuracy, precision, recall, f1score = trainer.test(batch_size)
print(f'loss: {loss:.4f}, accuracy: {accuracy:.4f}, precision: {precision:.4f}, recall: {recall:.4f}, f1score: {f1score:.4f}')

HBox(children=(FloatProgress(value=0.0, description='Test', max=15.0, style=ProgressStyle(description_width='i…


loss: 0.2698, accuracy: 0.8416, precision: 0.3518, recall: 0.5536, f1score: 0.4302
