<a href="https://colab.research.google.com/github/arjasc5231/moodots/blob/ACRNN/SER/CNN/ACRNNdeep_ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import sys
sys.path.append("/content/drive/MyDrive/team_runner/colab/emoDB/CNN")
from kfold import kfold

In [None]:
X_train, X_test, Y_train, Y_test = np.load("/content/drive/MyDrive/team_runner/colab/dataset/emoDB/emo_mel_3d.npy", allow_pickle=True)  # 0.8070+-0.0354
#X_train, X_test, Y_train, Y_test = np.load("/content/drive/MyDrive/team_runner/colab/dataset/emoDB/emo_mel_more.npy", allow_pickle=True) # 정확도 0.8049+-0.0349 (0.74가 한 번 나와서 많이 낮아짐)
X = np.concatenate((X_train, X_test), axis=0)
#X = np.expand_dims(X, axis=-1)
Y = np.concatenate((Y_train, Y_test), axis=0)

print(X.shape)
print(Y.shape)

(911, 128, 128, 3)
(911,)


In [None]:
"""
attention 이론은 https://wikidocs.net/22893 참고
Bahdanau방식 https://github.com/philipperemy/keras-attention-mechanism 참고
ACRNN 논문에서는 LSTM의 마지막 출력값도 사용하지 않은 특이한 attention을 사용...
  논문 코드 https://github.com/xuanjihe/speech-emotion-recognition/blob/master/crnn.py 에서 u_omega의 역할을 이해 못했음 일단 빼고 구현
https://stackoverflow.com/questions/42918446/how-to-add-an-attention-mechanism-in-keras?answertab=votes#tab-top repeatvector를 이용하기도 하네
https://github.com/keras-team/keras/issues/1472 여기도 참고
"""


def create_model():
    inputs = keras.Input(shape=(128, 128, 3))
    #inputs = keras.Input(shape=(128, 128, 1))
    conv1 = keras.layers.Conv2D(filters=32, kernel_size=[3, 3], padding='same', activation=tf.nn.relu)(inputs)
    pool1 = keras.layers.MaxPool2D()(conv1)
    conv2 = keras.layers.Conv2D(filters=64, kernel_size=[3, 3], padding='same', activation=tf.nn.relu)(pool1)
    pool2 = keras.layers.MaxPool2D()(conv2)
    conv3 = keras.layers.Conv2D(filters=128, kernel_size=[3, 3], padding='same', activation=tf.nn.relu)(pool2)
    pool3 = keras.layers.MaxPool2D()(conv3)
    # conv2까지만 있어도 큰 차이는 없는듯?

    trans = keras.layers.Permute((2,1,3))(pool3)
    reshape = keras.layers.Reshape((-1, 16*128))(trans) # 열 개수(freq축)*ch
    linear = keras.layers.Dense(512)(reshape)
    lstm = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True, dropout=0.5))(linear) # (time=15,hidden=256)

    attention_score1 = keras.layers.Dense(1, activation='tanh')(lstm) # lstm(time,hidden)*W(hidden,1)=score(time,1)
    attention_score2 = keras.layers.Softmax()(attention_score1)
    attention = keras.layers.Dot(axes=(1,1))([lstm, attention_score2]) # (time=15, hidden=256) * (time,) => (hidden=256)
    flatten = keras.layers.Flatten()(attention)
    
    #drop1 = keras.layers.Dropout(0.5)(flatten) #drop1,2 추가시 0.7586 +- 0.0238
    fc = keras.layers.Dense(128)(flatten)
    drop2 = keras.layers.Dropout(0.5)(fc)
    output = keras.layers.Dense(7)(drop2)
    return keras.Model(inputs=inputs, outputs=output)

In [None]:
model = create_model()
model.summary()

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
conv2d_18 (Conv2D)              (None, 128, 128, 32) 896         input_7[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_18 (MaxPooling2D) (None, 64, 64, 32)   0           conv2d_18[0][0]                  
__________________________________________________________________________________________________
conv2d_19 (Conv2D)              (None, 64, 64, 64)   18496       max_pooling2d_18[0][0]           
____________________________________________________________________________________________

In [None]:
kfold(5, create_model, X, Y, 0.001, 100, 120, 10) # 예상보다 정확도가 안나온다. attention을 잘못 사용하고있거나(논문의 model.py함수 그대로 따라해보자.), attention의 장점을 살리기 위해 인풋의 크기를 키우거나.

Learning started. k=5
<<fold 1>>
Epoch	loss		train acc	test acc
10  	1.0543  	0.6232  	0.5933
20  	0.6268  	0.8229  	0.6704
30  	0.3257  	0.9205  	0.6934
40  	0.1446  	0.9700  	0.7295
50  	0.1148  	0.9800  	0.6763
60  	0.1879  	0.9375  	0.7445
70  	0.0390  	0.9962  	0.7917
80  	0.0043  	1.0000  	0.7936
90  	0.0016  	1.0000  	0.7996
100  	0.0028  	1.0000  	0.7857
110  	0.0007  	1.0000  	0.7716
120  	0.0100  	1.0000  	0.7054
max train accuracy: 1.0000
max test accuracy: 0.8017
<<fold 2>>
Epoch	loss		train acc	test acc
10  	0.9347  	0.7147  	0.6127
20  	0.4353  	0.8844  	0.6893
30  	0.2183  	0.9550  	0.7098
40  	0.1495  	0.9762  	0.6954
50  	0.1001  	0.9788  	0.7176
60  	0.0230  	1.0000  	0.7309
70  	0.0011  	1.0000  	0.7359
80  	0.0011  	1.0000  	0.7087
90  	0.0049  	1.0000  	0.7115
100  	0.0008  	1.0000  	0.7165
110  	0.0013  	1.0000  	0.7237
120  	0.0002  	1.0000  	0.7298
max train accuracy: 1.0000
max test accuracy: 0.7480
<<fold 3>>
Epoch	loss		train acc	test acc
10  	0.9802  	0.6755