<a href="https://colab.research.google.com/github/arjasc5231/Lingometer/blob/speaker_verification/speaker_verificaiton/network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras
from eval_metrics import calculate_eer
import numpy as np

import import_ipynb
from utils import normalize, cosine_similarity
from constants import NUM_FBANK, HOP_LENGTH, NUM_SPEC_PER_SPEAKER, NUM_FRAME, NUM_ENROLL_UTT, NUM_TRUE_UTT, NUM_FALSE_UTT

In [None]:
class CustomModel(keras.Model):
    def test_step(self, data):

        # Unpack the data. [batch][NUM_ENROLL_UTT+NUM_TRUE_UTT+NUM_FALSE_UTT][128][128][1]
        batchs, _ = data

        true_score = []   # true 발화의 점수를 저장할 배열
        false_score = []  # false 발화의 점수를 저장할 배열

        for batch in batchs:
          y_pred = self(batch, training=False).numpy()
          y_pred = normalize(y_pred)

          enroll = y_pred[:NUM_ENROLL_UTT]
          enroll = np.mean(enroll, axis=0) # 등록 발화를 평균

          # 등록 발화와 다른 발화의 similarity score 계산
          for j in range(NUM_TRUE_UTT): true_score.append(cosine_similarity(enroll,y_pred[NUM_ENROLL_UTT+j]))
          for j in range(NUM_FALSE_UTT): false_score.append(cosine_similarity(enroll,y_pred[NUM_ENROLL_UTT+NUM_TRUE_UTT+j]))

        # calculate_eer함수의 인자로 알맞은 형태로 변환. true의 label에 1 표시해주기
        scores = np.array(true_score+false_score)
        labels = np.array([1.0]*len(true_score)+[0.0]*len(false_score))

        # eer 계산
        eer = calculate_eer(np.arange(0, 1.0, 0.001), scores, labels)
        return {"eer": eer}

In [None]:
def CNN():
    inputs = keras.Input(shape=(128, 128, 1))
    conv1 = keras.layers.Conv2D(filters=32, kernel_size=[3, 3], activation=tf.nn.relu)(inputs)
    pool1 = keras.layers.MaxPool2D(padding='SAME')(conv1)
    conv2 = keras.layers.Conv2D(filters=64, kernel_size=[3, 3], activation=tf.nn.relu)(pool1)
    pool2 = keras.layers.MaxPool2D(padding='SAME')(conv2)
    conv3 = keras.layers.Conv2D(filters=128, kernel_size=[3, 3], activation=tf.nn.relu)(pool2)
    pool3 = keras.layers.MaxPool2D(padding='SAME')(conv3)
    
    flatten = keras.layers.Flatten()(pool3)
    fc1 = keras.layers.Dense(200)(flatten)
    fc2 = keras.layers.Dense(200)(fc1)
    return CustomModel(inputs=inputs, outputs=fc2)

In [None]:
def CRNN():
    inputs = keras.Input(shape=(128, 128, 1))
    conv1 = keras.layers.Conv2D(filters=32, kernel_size=[3, 3], activation=tf.nn.relu)(inputs)
    pool1 = keras.layers.MaxPool2D(padding='SAME')(conv1)
    conv2 = keras.layers.Conv2D(filters=64, kernel_size=[3, 3], activation=tf.nn.relu)(pool1)
    pool2 = keras.layers.MaxPool2D(padding='SAME')(conv2)
    conv3 = keras.layers.Conv2D(filters=128, kernel_size=[3, 3], activation=tf.nn.relu)(pool2)
    pool3 = keras.layers.MaxPool2D(padding='SAME')(conv3)

    reshape = keras.layers.Reshape((-1, 15*128))(pool3) # 열 개수(freq축)*ch
    lstm = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=False))(reshape) # dropout=0.5
    
    fc1 = keras.layers.Dense(200)(lstm)
    fc2 = keras.layers.Dense(200)(fc1)
    return CustomModel(inputs=inputs, outputs=fc2)

In [None]:
def ACRNN():
    inputs = keras.Input(shape=(128, 128, 1))
    conv1 = keras.layers.Conv2D(filters=32, kernel_size=[3, 3], activation=tf.nn.relu)(inputs)
    pool1 = keras.layers.MaxPool2D(padding='SAME')(conv1)
    conv2 = keras.layers.Conv2D(filters=64, kernel_size=[3, 3], activation=tf.nn.relu)(pool1)
    pool2 = keras.layers.MaxPool2D(padding='SAME')(conv2)
    conv3 = keras.layers.Conv2D(filters=128, kernel_size=[3, 3], activation=tf.nn.relu)(pool2)
    pool3 = keras.layers.MaxPool2D(padding='SAME')(conv3)
    
    reshape = keras.layers.Reshape((-1, 15*128))(pool3) # 열 개수(freq축)*ch
    lstm = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(reshape) # (time=15,hidden=256) dropout=0.3

    attention_score1 = keras.layers.Dense(1, activation='tanh')(lstm) # lstm(time,hidden)*W(hidden,1)=score(time,1)
    attention_score2 = keras.layers.Softmax()(attention_score1)
    attention = keras.layers.Dot(axes=(1,1))([lstm, attention_score2]) # (time=15, hidden=256) * (time,) => (hidden=256)
    flatten = keras.layers.Flatten()(attention)

    fc1 = keras.layers.Dense(200)(flatten)
    fc2 = keras.layers.Dense(200)(fc1)
    return CustomModel(inputs=inputs, outputs=fc2)

In [None]:
def modCRNN():
    inputs = keras.Input(shape=(128, 128, 1))
    conv1 = keras.layers.Conv2D(filters=32, kernel_size=[3, 3], activation=tf.nn.relu)(inputs)
    conv2 = keras.layers.Conv2D(filters=64, kernel_size=[3, 3], activation=tf.nn.relu)(conv1)
    pool2 = keras.layers.MaxPool2D(padding='SAME')(conv2)
    conv3 = keras.layers.Conv2D(filters=128, kernel_size=[3, 3], activation=tf.nn.relu)(pool2)
    pool3 = keras.layers.MaxPool2D(padding='SAME')(conv3)

    reshape = keras.layers.Reshape((-1, 30*128))(pool3) # 열 개수(freq축)*ch
    lstm1 = keras.layers.LSTM(256, return_sequences=True)(reshape)
    lstm2 = keras.layers.LSTM(256, return_sequences=True)(lstm1)
    lstm3 = keras.layers.LSTM(256, return_sequences=False)(lstm2)
    
    fc1 = keras.layers.Dense(200)(lstm3)
    fc2 = keras.layers.Dense(200)(fc1)
    return CustomModel(inputs=inputs, outputs=fc2)

In [None]:
def modACRNN():
    inputs = keras.Input(shape=(128, 128, 1))
    conv1 = keras.layers.Conv2D(filters=32, kernel_size=[3, 3], activation=tf.nn.relu)(inputs)
    conv2 = keras.layers.Conv2D(filters=64, kernel_size=[3, 3], activation=tf.nn.relu)(conv1)
    pool2 = keras.layers.MaxPool2D(padding='SAME')(conv2)
    conv3 = keras.layers.Conv2D(filters=128, kernel_size=[3, 3], activation=tf.nn.relu)(pool2)
    pool3 = keras.layers.MaxPool2D(padding='SAME')(conv3)
    
    reshape = keras.layers.Reshape((-1, 30*128))(pool3)  # 열 개수(freq축)*ch
    lstm1 = keras.layers.LSTM(256, return_sequences=True)(reshape)
    lstm2 = keras.layers.LSTM(256, return_sequences=True)(lstm1)
    lstm3 = keras.layers.LSTM(256, return_sequences=False)(lstm2)

    attention_score1 = keras.layers.Dense(1, activation='tanh')(lstm3) # lstm(time,hidden)*W(hidden,1)=score(time,1)
    attention_score2 = keras.layers.Softmax()(attention_score1)
    attention = keras.layers.Dot(axes=(1,1))([lstm3, attention_score2]) # (time=15, hidden=256) * (time,) => (hidden=256)
    flatten = keras.layers.Flatten()(attention)

    fc1 = keras.layers.Dense(200)(flatten)
    fc2 = keras.layers.Dense(200)(fc1)
    return CustomModel(inputs=inputs, outputs=fc2)

In [None]:
def ADRNN(): # 작년 프로젝트에서 최고성능을 보였던 모델. 아마도?. bidirectional만 lstm 정방향 두개로 
    inputs = keras.Input(shape=(128, 128, 1))
    conv1 = keras.layers.Conv2D(filters=64, kernel_size=[3, 3], padding='same', activation=tf.nn.leaky_relu)(inputs)
    pool1 = keras.layers.MaxPool2D()(conv1)
    
    conv2 = keras.layers.Conv2D(filters=128, kernel_size=[3, 3], dilation_rate=2, padding='same')(pool1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.LeakyReLU()(conv2)
    conv3 = keras.layers.Conv2D(filters=128, kernel_size=[3, 3], dilation_rate=2, padding='same')(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.LeakyReLU()(conv3)
    conv4 = keras.layers.Conv2D(filters=128, kernel_size=[3, 3], dilation_rate=2, padding='same')(conv3)
    conv4 = keras.layers.BatchNormalization()(conv4)
    conv4 = keras.layers.LeakyReLU()(conv4)

    conv_res = keras.layers.Conv2D(filters=128, kernel_size=[3, 3], dilation_rate=2, padding='same')(pool1)
    conv_res = keras.layers.BatchNormalization()(conv_res)

    res = keras.layers.Add()([conv_res,conv4])
    res = keras.layers.LeakyReLU()(res)

    reshape = keras.layers.Reshape((-1, 64*128))(res) # 열 개수(freq축)*ch
    linear = keras.layers.Dense(512)(reshape)
    """
    lstm = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=False, dropout=0.5))(linear)

    fc = keras.layers.Dense(128)(lstm)
    """
    lstm1 = keras.layers.LSTM(256, return_sequences=True, dropout=0.5)(linear)
    lstm2 = keras.layers.LSTM(256, return_sequences=True, dropout=0.5)(lstm1)

    attention_score1 = keras.layers.Dense(1, activation='tanh')(lstm2) # lstm(time,hidden)*W(hidden,1)=score(time,1)
    attention_score2 = keras.layers.Softmax()(attention_score1)
    attention = keras.layers.Dot(axes=(1,1))([lstm2, attention_score2]) # (time=15, hidden=256) * (time,) => (hidden=256)
    flatten = keras.layers.Flatten()(attention)
  
    fc = keras.layers.Dense(200, activation=tf.nn.leaky_relu)(flatten)
    # 윗 주석은 CRNN, 다음 코드는 ACRNN
    drop = keras.layers.Dropout(0.5)(fc)
    output = keras.layers.Dense(200)(drop)
    return CustomModel(inputs=inputs, outputs=output)

In [1]:
def tiny_CRNN():
    inputs = keras.Input(shape=(128, 128, 1))
    conv1 = keras.layers.Conv2D(filters=8, kernel_size=[3, 3], padding='same', activation=tf.nn.relu)(inputs)
    conv2 = keras.layers.Conv2D(filters=16, kernel_size=[3, 3], padding='same', activation=tf.nn.relu)(conv1)
    pool2 = keras.layers.MaxPool2D(padding='SAME')(conv2)
    reshape = keras.layers.Reshape((-1, 64*16))(pool2) # 열 개수(freq축)*ch
    lstm = keras.layers.LSTM(64, return_sequences=False, unroll=True)(reshape) # unroll=True for convert tflite micro
    
    fc = keras.layers.Dense(200)(lstm)
    return CustomModel(inputs=inputs, outputs=fc)

def tiny_CNN():
    inputs = keras.Input(shape=(128, 128, 1))
    conv1 = keras.layers.Conv2D(filters=8, kernel_size=[3, 3], padding='same', activation=tf.nn.relu)(inputs)
    pool1 = keras.layers.MaxPool2D(padding='SAME')(conv1)
    conv2 = keras.layers.Conv2D(filters=8, kernel_size=[3, 3], padding='same', activation=tf.nn.relu)(pool1)
    pool2 = keras.layers.MaxPool2D(padding='SAME')(conv2)
    conv3 = keras.layers.Conv2D(filters=8, kernel_size=[3, 3], padding='same', activation=tf.nn.relu)(pool2)
    pool3 = keras.layers.MaxPool2D(padding='SAME')(conv3)
    flatten = keras.layers.Flatten()(pool3)
    fc = keras.layers.Dense(50)(flatten)
    fc = keras.layers.Dense(200)(flatten)
    return CustomModel(inputs=inputs, outputs=fc)

In [None]:
def get_network(model_name):
    if model_name=='naive_model' or model_name=='CNN': return CNN()
    elif model_name=='CRNN': return CRNN()
    elif model_name=='ACRNN': return ACRNN()
    elif model_name=='modACRNN': return modACRNN()
    elif model_name=='modCRNN': return modCRNN()
    elif model_name=='ADRNN': return ADRNN()
    elif model_name=='tiny_CRNN': return tiny_CRNN()
    elif model_name=='tiny_CNN': return tiny_CNN()