<a href="https://colab.research.google.com/github/arjasc5231/Lingometer/blob/speaker_verification/speaker_verificaiton/development.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
# colab에서만 사용하는 코드. import될 때 주석처리 되어있어야 한다.

# drive mount. colab에 내 구글 드라이브 연결
from google.colab import drive
drive.mount('/content/drive')

# import_ipynb module 설치
!pip install import_ipynb

# import를 위한 경로이동
%cd /content/drive/MyDrive/team_malmungchi/colab/speaker_verification/code
!ls
"""

Mounted at /content/drive
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting import_ipynb
  Downloading import_ipynb-0.1.4-py3-none-any.whl (4.1 kB)
Collecting jedi>=0.10
  Downloading jedi-0.18.1-py2.py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 33.4 MB/s 
Installing collected packages: jedi, import-ipynb
Successfully installed import-ipynb-0.1.4 jedi-0.18.1
/content/drive/.shortcut-targets-by-id/1rEYIox5uYM9uP2spDttCYCqtt2GvSbn1/2022 기술 혁신 실험/colab/speaker_verification/code
batcher.ipynb	   eval_metrics.ipynb  network.ipynb	 utils.ipynb
constants.ipynb    fitter.ipynb        preprocess.ipynb
development.ipynb  loss.ipynb	       test_eer.ipynb


In [None]:
import os
import sys
import tensorflow as tf
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import SGD
import pickle

#sys.path.append("/content/drive/MyDrive/team_malmungchi/colab/speaker_verification/code")
import import_ipynb
from constants import CHECKPOINTS_DIR, NUM_FRAME, NUM_FBANK
import batcher
import network
import loss
import fitter
import utils
from test_eer import test_frame, test_utt

# eager execution 사용. test_step에서 eer 계산에 numpy를 사용하기 때문
tf.config.run_functions_eagerly(True)

importing Jupyter notebook from constants.ipynb
importing Jupyter notebook from batcher.ipynb
importing Jupyter notebook from network.ipynb
importing Jupyter notebook from eval_metrics.ipynb
importing Jupyter notebook from utils.ipynb
importing Jupyter notebook from loss.ipynb
importing Jupyter notebook from fitter.ipynb
importing Jupyter notebook from test_eer.ipynb


In [None]:
 def train(model_name, batcher_name, loss_name, train_data_path, initial_epoch=0, max_epoch=0, pre_checkpoint_dir=None, tag=None):
    

    # Batcher 로드 (train dataset 로드)
    Batcher = batcher.get_batcher(batcher_name, train_data_path)


    # validation dataset 로드
    val_dataset_path = '../data/dataset/val_331_25_128_512.npy'
    with open(val_dataset_path,"rb") as f: val_X = pickle.load(f)
    print('==================================================')
    print('load validation dataset')
    print('shape of data :', val_X.shape)
    

    # 모델 생성
    Model = network.get_network(model_name)
    Model.summary()
    print('==================================================')
    print(model_name+' is created')


    # optimizer 객체 생성
    optimizer = tf.optimizers.Adam(learning_rate=0.0001) # 원래는 0.001


    # loss 객체 생성
    Loss = loss.get_loss(loss_name)


    # 체크포인트 경로 생성
    print('==================================================')
    print('preparing checkpoint...')
    checkpoint_dir = f'../model/{model_name}-{batcher_name}-{loss_name}'
    if tag: checkpoint_dir+='-'+tag
    # transfer learning의 경우 pretrain된 체크포인트 로드
    if pre_checkpoint_dir:  
      checkpoint_dir += '--transferedFrom--'+pre_checkpoint_dir.split('/')[-1]
      best_ckpt = utils.load_best_checkpoint(pre_checkpoint_dir)
      Model.load_weights(pre_checkpoint_dir+'/'+best_ckpt)
      print('\nload pre-trainded checkpoint that model:'+pre_checkpoint_dir.split('/')[-1]+f', epoch:{initial_epoch}, EER:'+best_ckpt.split('-')[-1][:-5])
    if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir)
    # 이전에 학습중이었던 경우 해당 체크포인트 로드해 이어서 학습
    best_ckpt = utils.load_best_checkpoint(checkpoint_dir)
    if best_ckpt:
      initial_epoch = int(best_ckpt.split('-')[0])
      Model.load_weights(checkpoint_dir+'/'+best_ckpt)
      print('\nload exist checkpoint that model:'+checkpoint_dir.split('/')[-1]+f', epoch:{initial_epoch}, EER:'+best_ckpt.split('-')[-1][:-5])
    # 체크포인트 객체 생성
    checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_dir+'/{epoch:05d}-{val_eer:.4f}.hdf5', monitor='val_eer', mode='min', save_best_only=True)


    # early_stopping 객체 생성. 30epoch동안 eer이 0.1%도 감소하지 않는다면 중지
    early_stopping = EarlyStopping(monitor='val_eer', min_delta=0.001, patience=50, mode='min', verbose=1)

    
    # reduce_lr 객체 생성. 10epoch동안 val_eer이 감소하지 않는다면 lr 절반으로 줄이기
    reduce_lr = ReduceLROnPlateau(monitor='val_eer', factor=0.5, patience=20, mode='min', min_lr=0.000001, verbose=1)

    
    # train
    callbacks = [checkpoint, early_stopping, reduce_lr]
    fitter.fit(Model, Batcher, val_X, Loss, initial_epoch, max_epoch, optimizer, callbacks)
    print('\n training is end')


    # load best weights
    print('==================================================')
    print('preparing checkpoint...')
    best_ckpt = utils.load_best_checkpoint(checkpoint_dir)
    Model.load_weights(checkpoint_dir+'/'+best_ckpt)
    best_epoch = int(best_ckpt.split('-')[0])
    print(f'load best checkpoint that epoch:{best_epoch}, EER:'+best_ckpt.split('-')[-1][:-5])
    del Batcher
    del val_X


    # test
    test_frame(Model)
    test_utt(Model)

In [None]:
# naive train
#train('ACRNN', 'naive_batcher', 'cross_entropy', max_epoch=10)

# simMat train
# train('CNN', 'simMat_batcher', 'simMat_loss', '../data/dataset/train_300_200_128_512.pickle', max_epoch=100)

# transfer
# train('naive_model', 'naive_batcher', 'cross_entropy', max_epoch=4)
#train('naive_model', 'simMat_batcher', 'simMat_loss', max_epoch=1, pre_checkpoint_dir='../model/naive_model-naive_batcher-cross_entropy')

[batcher:simMat] load train dataset
shape of data : (300, 200, 128, 128, 1)
load validation dataset
shape of data : (331, 25, 128, 128, 1)
Model: "custom_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 128, 1)]     0         
                                                                 
 conv2d (Conv2D)             (None, 126, 126, 32)      320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 63, 63, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 61, 61, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 31, 31, 64)       0         
 2D)                                           

  "Even though the `tf.config.experimental_run_functions_eagerly` "



 training is end
preparing checkpoint...
load best checkpoint that epoch:1, EER:0.3553
start frame level test
EER: 0.3585294117647059
start utterence level test
EER: 0.35721393034825866
