# Pembangunan Model Gabungan Langsung - Sistem Pengenal Emosi Cakapan

## Library Preparation

In [None]:
!pip install arff
!pip install imbalanced-learn

Collecting arff
  Downloading https://files.pythonhosted.org/packages/50/de/62d4446c5a6e459052c2f2d9490c370ddb6abc0766547b4cef585913598d/arff-0.9.tar.gz
Building wheels for collected packages: arff
  Building wheel for arff (setup.py) ... [?25l[?25hdone
  Created wheel for arff: filename=arff-0.9-cp37-none-any.whl size=4970 sha256=f41daa417196a16af8dc0628f03ee58b9cc6001c7ca259fda4b5a180a273798b
  Stored in directory: /root/.cache/pip/wheels/04/d0/70/2c73afedd3ac25c6085b528742c69b9587cbdfa67e5194583b
Successfully built arff
Installing collected packages: arff
Successfully installed arff-0.9


In [None]:
import arff
import joblib
from io import StringIO
from google.colab import drive
from google.colab import files

import pandas as pd
import numpy as np
import tensorflow as tf

from tensorflow import keras
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from keras.models import Sequential
from keras import layers
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.feature_extraction.text import TfidfVectorizer
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler



In [None]:
drive.mount('/content/gdrive')
drive_dir = "/content/gdrive/MyDrive/Teknik Informatika/Semester 7/TUGAS AKHIR/TA 2"

Mounted at /content/gdrive


## Load Data

### Audio

In [None]:
def read_arff_data(data):
  row = next(data)
  row = next(data)
  list_data = []
  i = 0
  while row:
    # if (i % 100 == 0):
    #   print('Reading...', i, 'data already done.')
    row_data = []
    for j in range(len(row)):
      if (j != 0 and row[j] != ''):
        row_data.append(row[j])
    list_data.append(row_data)
    try :
      row = next(data)
      i = i + 1
    except :
      break
  return np.array(list_data)

In [None]:
feature_akustik = read_arff_data(arff.load(drive_dir + '/Eksperimen/Data/eGeMAPS.arff'))
feature_akustik.shape

(10822, 88)

### Textual

In [None]:
data = joblib.load(drive_dir + '/Eksperimen/Data/stopword.pkl')
tfidf_vectorizer = TfidfVectorizer()
feature_tfidf = tfidf_vectorizer.fit_transform(data['Transkripsi'])
feature_leksikal = np.array(feature_tfidf.todense())
feature_leksikal.shape

(10822, 5017)

## Prepare Data Pemodelan

### Scaling Data Akustik

In [None]:
scaler = MinMaxScaler()
feature_akustik = scaler.fit_transform(feature_akustik)
feature_akustik.shape

(10822, 88)

### Construct Concatenate Features

In [None]:
def concat_features(feature_1, feature_2):
  features = []
  for i in range(len(feature_1)):
    temp = []
    temp.extend(feature_1[i])
    temp.extend(feature_2[i])
    features.append(temp)
  return np.array(features, dtype=object)

In [None]:
feature_concat = concat_features(feature_leksikal, feature_akustik)
feature_concat.shape

(10822, 5105)

### Construct Sequential Data 

In [None]:
def construct_sequential(id, data):
  sequential_datas = []
  new_data= []
  for i in range(len(id)):
    if id[i].endswith('001'):
      sequential_datas.append(new_data)
      new_data = []
    new_data.append(data[i])
  sequential_datas.append(new_data)

  return np.array(sequential_datas, dtype=object)[1:]

In [None]:
def construct_abstract(id, abstraksi):
  new_abstraksi = []
  for i in range(len(id)):
    if id[i].endswith('001'):
      new_abstraksi.append(abstraksi[i])

  return np.array(new_abstraksi, dtype=object)

In [None]:
id = data['ID Ucapan']
sequential_features_akustik, sequential_features_leksikal, sequential_features_concat = construct_sequential(id, feature_akustik), construct_sequential(id, feature_leksikal), construct_sequential(id, feature_concat)
print(sequential_features_akustik.shape, sequential_features_leksikal.shape, sequential_features_concat.shape)

(2003,) (2003,) (2003,)


In [None]:
abstraksi = construct_abstract(id, data['Abstraksi Emosi'])
print(abstraksi.shape)

(2003,)


### Label One Hot Encoding

In [None]:
encoder_cakapan = OneHotEncoder(sparse=False)
abstraksi = abstraksi.reshape(-1, 1)
abstraksi = encoder_cakapan.fit_transform(abstraksi)
abstraksi.shape

(2003, 6)

### Split Train and Test Data

In [None]:
X_akustik = sequential_features_akustik
X_leksikal = sequential_features_leksikal
X_concat = sequential_features_concat
y = abstraksi

In [None]:
X_akustik = tf.keras.preprocessing.sequence.pad_sequences(X_akustik, padding="post", dtype='float32')
X_leksikal = tf.keras.preprocessing.sequence.pad_sequences(X_leksikal, padding="post", dtype='float32')
X_concat = tf.keras.preprocessing.sequence.pad_sequences(X_concat, padding="post", dtype='float32')
y = tf.keras.preprocessing.sequence.pad_sequences(y, padding="post", dtype='float32')
print("Akustik", X_akustik.shape, y.shape)
print("Leksikal", X_leksikal.shape, y.shape)
print("Concat", X_concat.shape, y.shape)

Akustik (2003, 15, 88) (2003, 6)
Leksikal (2003, 15, 5017) (2003, 6)
Concat (2003, 15, 5105) (2003, 6)


In [None]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.15, random_state=42)
for train_index, test_index in sss.split(X_concat, y):
  train_index, test_index = train_index, test_index
print("Train data", train_index.shape)
print("Test data", test_index.shape)

Train data (1702,)
Test data (301,)


#### Eliminate Minority Label

In [None]:
def get_label_idxs(list, label):
  idxs = []
  for i in range(len(list)):
    for l in label:
      if list[i] == l:
        idxs.append(i)
  return np.array(idxs)

In [None]:
dec_abstraksi = encoder_cakapan.inverse_transform(abstraksi)
del_idxs = get_label_idxs(dec_abstraksi, ["Jijik", "Takut"])

In [None]:
X_akustik_4 = np.delete(sequential_features_akustik, del_idxs)
X_leksikal_4 = np.delete(sequential_features_leksikal, del_idxs)
X_concat_4 = np.delete(sequential_features_concat, del_idxs)
label_4 = np.delete(dec_abstraksi, del_idxs)
print(X_akustik_4.shape, X_leksikal_4.shape, X_concat_4.shape, label_4.shape)

(1936,) (1936,) (1936,) (1936,)


In [None]:
encoder_cakapan_4 = OneHotEncoder(sparse=False)
label_4 = label_4.reshape(-1, 1)
label_4 = encoder_cakapan_4.fit_transform(label_4)
label_4.shape

(1936, 4)

In [None]:
X_akustik_4 = tf.keras.preprocessing.sequence.pad_sequences(X_akustik_4, padding="post", dtype='float32')
X_leksikal_4 = tf.keras.preprocessing.sequence.pad_sequences(X_leksikal_4, padding="post", dtype='float32')
X_concat_4 = tf.keras.preprocessing.sequence.pad_sequences(X_concat_4, padding="post", dtype='float32')
y_4 = tf.keras.preprocessing.sequence.pad_sequences(label_4, padding="post", dtype='float32')
print(X_akustik_4.shape, X_leksikal_4.shape, X_concat_4.shape, y_4.shape)

(1936, 15, 88) (1936, 15, 5017) (1936, 15, 5105) (1936, 4)


In [None]:
sss_4 = StratifiedShuffleSplit(n_splits=1, test_size=0.15, random_state=42)
for train_index_4, test_index_4 in sss_4.split(X_akustik_4, y_4):
  train_index_4, test_index_4 = train_index_4, test_index_4
print("Train data", train_index_4.shape)
print("Test data", test_index_4.shape)

Train data (1645,)
Test data (291,)


## 1. Model Gabungan Level Fitur

### Modelling Preparation

#### Define Model

In [None]:
def get_gru_1(shape, label_shape):
  model = Sequential()
  model.add(layers.Masking(mask_value=0, input_shape=shape))
  model.add(layers.GRU(128))
  model.add(layers.Dense(label_shape, activation='softmax'))
  return model

def get_lstm_1(shape, label_shape):
  model = Sequential()
  model.add(layers.Masking(mask_value=0, input_shape=shape))
  model.add(layers.LSTM(128))
  model.add(layers.Dense(label_shape, activation='softmax'))
  return model

In [None]:
def get_gru_2(shape, label_shape):
  model = Sequential()
  model.add(layers.Masking(mask_value=0, input_shape=shape))
  model.add(layers.GRU(128))
  model.add(layers.Dropout(0.2))
  model.add(layers.Dense(label_shape, activation='softmax'))
  return model

def get_lstm_2(shape, label_shape):
  model = Sequential()
  model.add(layers.Masking(mask_value=0, input_shape=shape))
  model.add(layers.LSTM(256))
  model.add(layers.Dropout(0.2))
  model.add(layers.Dense(label_shape, activation='softmax'))
  return model

In [None]:
def get_gru_3(shape, label_shape):
  model = Sequential()
  model.add(layers.Masking(mask_value=0, input_shape=shape))
  model.add(layers.GRU(128))
  model.add(layers.Dropout(0.2))
  model.add(layers.Dense(128, activation='relu'))
  model.add(layers.Dense(label_shape, activation='softmax'))
  return model

def get_lstm_3(shape, label_shape):
  model = Sequential()
  model.add(layers.Masking(mask_value=0, input_shape=shape))
  model.add(layers.LSTM(128))
  model.add(layers.Dropout(0.2))
  model.add(layers.Dense(128, activation='relu'))
  model.add(layers.Dense(label_shape, activation='softmax'))
  return model

#### Define Training Parameter

In [None]:
batch_size = 32
epochs = 20
validation_split = 0.2
optimizer = 'adam'
loss = 'categorical_crossentropy'
metrics = ['accuracy']
callbacks = [EarlyStopping(monitor='val_accuracy', mode='max', min_delta=0.0001, patience=3, restore_best_weights=True)]

#### Define Schema Experiment

In [None]:
def train_5fold_cv(X, y, shape, label_shape, mode):

  loss_score = []
  accuracy_score = []  

  cv = KFold(n_splits=5, random_state=42, shuffle=True)

  for train_index, val_index in cv.split(X):
      
      X_train, X_val, y_train, y_val = X[train_index], X[val_index], y[train_index], y[val_index]
      
      model = create_model(shape, label_shape, mode)
      model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
      model.fit(X_train, y_train,
              validation_data=(X_val, y_val),
              epochs=epochs,
              batch_size=batch_size,
              callbacks=callbacks)
      
      l, a = model.evaluate(X_val, y_val)
      loss_score.append(l)
      accuracy_score.append(a)
  
  loss_score = np.round(np.mean(loss_score), 4)
  accuracy_score = np.round(np.mean(accuracy_score), 4)
  print("Loss Score : ", loss_score, "Accuracy Score : ", accuracy_score)
  return loss_score, accuracy_score

In [None]:
def train_stratified_random_sampling(X_train, y_train, X_test, y_test, shape, label_shape, return_sequences):

  model = None
  if return_sequences :
    model = get_contextual_model(shape, label_shape)
  else :
    model = get_classification_model(shape, label_shape)
  model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
  model.fit(X_train, y_train,
            validation_data=(X_test, y_test),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks)
      
  loss_score, accuracy_score = model.evaluate(X_test, y_test)
  loss_score = np.round(np.mean(loss_score), 4)
  accuracy_score = np.round(np.mean(accuracy_score), 4)

  print("Loss Score : ", loss_score, "Accuracy Score : ", accuracy_score)
  
  return model, accuracy_score

### Model Comparation

In [None]:
model = ["gru 1", "gru 2", "gru 3", "lstm 1", "lstm 2", "lstm 3"]
n = 5

In [None]:
def run_tahap_4(X, label, model, train_index, n):
  performances = []
  for i in range(n):
    p = []
    for m in model:
      X_train, y_train = X[train_index], label[train_index]
      loss_score, accuracy_score = train_5fold_cv(X_train, y_train, X_train.shape[1:], y.shape[1], m)
      p.append((m, loss_score, accuracy_score))
    performances.append((i, p))
  return performances

In [None]:
performances = run_tahap_4(X_concat, y, model, train_index, n)

Epoch 1/10
Epoch 2/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss Score :  1.4443 Accuracy Score :  0.3725
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss Score :  1.4381 Accuracy Score :  0.3743
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 1/10
Epo

In [None]:
for p in performances:
  print("Training ke- : ", p[0])
  res = pd.DataFrame(p[1], columns=['Model', 'Loss Score', 'Accuracy Score'])
  print(res['Accuracy Score'])

Training ke- :  0
0    0.3725
1    0.3743
2    0.3743
3    0.3737
4    0.3749
5    0.3737
Name: Accuracy Score, dtype: float64
Training ke- :  1
0    0.3713
1    0.3731
2    0.3590
3    0.3737
4    0.3737
5    0.3743
Name: Accuracy Score, dtype: float64
Training ke- :  2
0    0.3702
1    0.3725
2    0.3725
3    0.3737
4    0.3737
5    0.3737
Name: Accuracy Score, dtype: float64
Training ke- :  3
0    0.3760
1    0.3737
2    0.3743
3    0.3737
4    0.3749
5    0.3755
Name: Accuracy Score, dtype: float64
Training ke- :  4
0    0.3731
1    0.3725
2    0.3749
3    0.3725
4    0.3737
5    0.3755
Name: Accuracy Score, dtype: float64


In [None]:
res = pd.DataFrame(performances[0][1], columns=['Model', 'Loss Score', 'Accuracy Score'])
for performance in performances[1:]:
  temp = pd.DataFrame(performance[1], columns=['Model', 'Loss Score', 'Accuracy Score'])
  res['Loss Score'] += temp['Loss Score']
  res['Accuracy Score'] += temp['Accuracy Score']
res['Loss Score'] /= n
res['Accuracy Score'] /= n
print(res.sort_values(by='Accuracy Score', ascending=False, ignore_index=True))

    Model  Loss Score  Accuracy Score
0  lstm 3     1.42546         0.37454
1  lstm 2     1.43442         0.37418
2  lstm 1     1.43624         0.37346
3   gru 2     1.43360         0.37322
4   gru 1     1.43412         0.37262
5   gru 3     1.42700         0.37100


#### Training Model

In [None]:
def run_tahap_5(X_train, y_train, X_test, y_test, n, encoder):
  model = None
  best_acc = 0
  for i in range(n):
    model, accuracy_score = train_stratified_random_sampling(X_train, y_train, X_test, y_test, X_train.shape[1:], y.shape[1], 2, encoder)
    if best_acc < accuracy_score:
      best_acc = accuracy_score
      best_model = model
  return best_model

In [None]:
best_model = run_tahap_5(X_concat[train_index], y[train_index], X_concat[test_index], y[test_index], 1, encoder_cakapan)

In [None]:
best_model.save(drive_dir + "/Eksperimen/Model/model_gabungan_langsung_6.h5")

### Evaluate Model

#### 6 Kelas Emosi

In [None]:
optimizer = keras.optimizers.Adam(learning_rate=0.1) 

In [None]:
def run_model(X_train, y_train, X_test, y_test, n, encoder):
  model = None
  best_acc = 0
  for i in range(n):
    model, accuracy_score = train_stratified_random_sampling(X_train, y_train, X_test, y_test, X_train.shape[1:], y_train.shape[1], "lstm 3", encoder)
    if best_acc < accuracy_score:
      best_acc = accuracy_score
      best_model = model
  return best_model

In [None]:
model_level_fitur = run_model(X_concat[train_index], y[train_index], X_concat[test_index], y[test_index], 1, encoder_cakapan)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Loss Score :  1.4634 Accuracy Score :  0.5216
              precision    recall  f1-score   support

       Jijik     0.0000    0.0000    0.0000         1
       Marah     0.8000    0.1905    0.3077        42
       Sedih     0.4435    0.6395    0.5238        86
      Senang     0.6705    0.5221    0.5871       113
       Takut     0.0000    0.0000    0.0000         9
    Terkejut     0.4430    0.7000    0.5426        50

    accuracy                         0.5216       301
   macro avg     0.3928    0.3420    0.3269       301
weighted avg     0.5636    0.5216    0.5031       301

[[ 0  0  0  0  0  1]
 [ 0  8 19  5  0 10]
 [ 0  1 55 19  0 11]
 [ 0  0 37 59  0 17]
 [ 0  0  3  1  0  5]
 [ 0  1 10  4  0 35]]


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
pred = model_level_fitur.predict(X_concat[test_index])
y_pred = encoder_cakapan.inverse_transform(pred)
y_true = encoder_cakapan.inverse_transform(y[test_index])
print(classification_report(y_true, y_pred, digits=4))

              precision    recall  f1-score   support

       Jijik     0.0000    0.0000    0.0000         1
       Marah     0.6129    0.4524    0.5205        42
       Sedih     0.4795    0.4070    0.4403        86
      Senang     0.5745    0.7168    0.6378       113
       Takut     0.0000    0.0000    0.0000         9
    Terkejut     0.5179    0.5800    0.5472        50

    accuracy                         0.5449       301
   macro avg     0.3641    0.3594    0.3576       301
weighted avg     0.5242    0.5449    0.5288       301



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
model_level_fitur.save(drive_dir + "/Eksperimen/Model/model_gabungan_langsung_6.h5")

In [None]:
model_level_fitur = keras.models.load_model(drive_dir + "/Eksperimen/Model/model_gabungan_langsung_6.h5")
pred_gabungan_fitur = model_level_fitur.predict(X_concat[test_index])
y_pred = encoder_cakapan.inverse_transform(pred_gabungan_fitur)
y_true = encoder_cakapan.inverse_transform(y[test_index])
print(classification_report(y_true, y_pred, digits=4))

              precision    recall  f1-score   support

       Jijik     0.0000    0.0000    0.0000         1
       Marah     0.6129    0.4524    0.5205        42
       Sedih     0.4795    0.4070    0.4403        86
      Senang     0.5745    0.7168    0.6378       113
       Takut     0.0000    0.0000    0.0000         9
    Terkejut     0.5179    0.5800    0.5472        50

    accuracy                         0.5449       301
   macro avg     0.3641    0.3594    0.3576       301
weighted avg     0.5242    0.5449    0.5288       301



  _warn_prf(average, modifier, msg_start, len(result))


#### 4 Kelas Emosi

In [None]:
model_level_fitur_4 = run_model(X_concat_4[train_index_4], y_4[train_index_4], X_concat_4[test_index_4], y_4[test_index_4], 10, encoder_cakapan_4)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Loss Score :  1.1112 Accuracy Score :  0.5395
              precision    recall  f1-score   support

       Marah     0.6154    0.1905    0.2909        42
       Sedih     0.5000    0.4186    0.4557        86
      Senang     0.5414    0.7522    0.6296       113
    Terkejut     0.5714    0.5600    0.5657        50

    accuracy                         0.5395       291
   macro avg     0.5571    0.4803    0.4855       291
weighted avg     0.5450    0.5395    0.5183       291

[[ 8 12 14  8]
 [ 3 36 42  5]
 [ 2 18 85  8]
 [ 0  6 16 28]]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Loss Score :  1.0903 Accuracy Score :  0.5498
              precision    recall  f1-score   support

       Marah     0.4667    0.5000    0.4828        42
       Sedih     0.5667    0.1977    0.2931        86
      Senang     0.5613    0.7699    0.6493       113
    Terkejut     0.5738    0.7000    0.63

In [None]:
pred = model_level_fitur_4.predict(X_concat_4[test_index_4])
y_pred = encoder_cakapan_4.inverse_transform(pred)
y_true = encoder_cakapan_4.inverse_transform(y_4[test_index_4])
print(classification_report(y_true, y_pred, digits=4))
print(confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

       Marah     0.6061    0.4762    0.5333        42
       Sedih     0.5062    0.4767    0.4910        86
      Senang     0.6325    0.6549    0.6435       113
    Terkejut     0.5333    0.6400    0.5818        50

    accuracy                         0.5739       291
   macro avg     0.5695    0.5620    0.5624       291
weighted avg     0.5743    0.5739    0.5719       291

[[20 12  4  6]
 [ 5 41 29 11]
 [ 5 23 74 11]
 [ 3  5 10 32]]


In [None]:
model_level_fitur_4.save(drive_dir + "/Eksperimen/Model/model_gabungan_langsung_4.h5")

## 2. Model Gabungan Level Keputusan

### 6 Kelas

#### Load Model

In [None]:
model_akustik = keras.models.load_model(drive_dir + "/Eksperimen/Model/model_akustik_langsung_6.h5")
model_leksikal = keras.models.load_model(drive_dir + "/Eksperimen/Model/model_leksikal_langsung_6.h5")

#### Evaluation Model

In [None]:
pred_akustik = model_akustik.predict(X_akustik[test_index])
pred_leksikal = model_leksikal.predict(X_leksikal[test_index])

In [None]:
y_pred = encoder_cakapan.inverse_transform(pred_akustik)
y_true = encoder_cakapan.inverse_transform(y[test_index])
print(classification_report(y_true, y_pred, digits=4))
print(confusion_matrix(y_true, y_pred))

y_pred = encoder_cakapan.inverse_transform(pred_leksikal)
y_true = encoder_cakapan.inverse_transform(y[test_index])
print(classification_report(y_true, y_pred, digits=4))
print(confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

       Jijik     0.0000    0.0000    0.0000         1
       Marah     0.5217    0.2857    0.3692        42
       Sedih     0.5250    0.2442    0.3333        86
      Senang     0.4009    0.7522    0.5231       113
       Takut     0.0000    0.0000    0.0000         9
    Terkejut     0.5000    0.2600    0.3421        50

    accuracy                         0.4352       301
   macro avg     0.3246    0.2570    0.2613       301
weighted avg     0.4564    0.4352    0.4000       301

[[ 0  0  0  0  0  1]
 [ 0 12  3 26  0  1]
 [ 0  3 21 61  0  1]
 [ 0  6 12 85  0 10]
 [ 0  0  0  9  0  0]
 [ 0  2  4 31  0 13]]
              precision    recall  f1-score   support

       Jijik     0.0000    0.0000    0.0000         1
       Marah     0.7083    0.4048    0.5152        42
       Sedih     0.4227    0.4767    0.4481        86
      Senang     0.5796    0.8053    0.6741       113
       Takut     0.0000    0.0000    0.0000         9
    T

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
pred_gabungan_keputusan = []
for i in range(len(pred_akustik)):
  if (pred_akustik[i].max() > pred_leksikal[i].max()):
    print("Akustik", pred_akustik[i].max(), pred_leksikal[i].max())
    pred_gabungan_keputusan.append(pred_akustik[i])
  else :
    print("Leksikal", pred_akustik[i].max(), pred_leksikal[i].max())
    pred_gabungan_keputusan.append(pred_leksikal[i])
pred_gabungan_keputusan = np.array(pred_gabungan_keputusan)

In [None]:
y_pred = encoder_cakapan.inverse_transform(pred_gabungan_keputusan)
y_true = encoder_cakapan.inverse_transform(y[test_index])
print(classification_report(y_true, y_pred, digits=4))
print(confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

       Jijik     0.0000    0.0000    0.0000         1
       Marah     0.6000    0.2143    0.3158        42
       Sedih     0.4821    0.3140    0.3803        86
      Senang     0.4593    0.8496    0.5963       113
       Takut     0.0000    0.0000    0.0000         9
    Terkejut     0.6190    0.2600    0.3662        50

    accuracy                         0.4817       301
   macro avg     0.3601    0.2730    0.2764       301
weighted avg     0.4967    0.4817    0.4374       301

[[ 0  0  0  0  0  1]
 [ 0  9 11 21  0  1]
 [ 0  2 27 56  0  1]
 [ 0  2 10 96  0  5]
 [ 0  0  0  9  0  0]
 [ 0  2  8 27  0 13]]


  _warn_prf(average, modifier, msg_start, len(result))


### 4 Kelas

#### Load Model

In [None]:
model_akustik_4 = keras.models.load_model(drive_dir + "/Eksperimen/Model/model_akustik_langsung_4.h5")
model_leksikal_4 = keras.models.load_model(drive_dir + "/Eksperimen/Model/model_leksikal_langsung_4.h5")

#### Evaluation Model

In [None]:
pred_akustik_4 = model_akustik_4.predict(X_akustik_4[test_index_4])
pred_leksikal_4 = model_leksikal_4.predict(X_leksikal_4[test_index_4])

In [None]:
y_pred = encoder_cakapan_4.inverse_transform(pred_akustik_4)
y_true = encoder_cakapan_4.inverse_transform(y_4[test_index_4])
print(classification_report(y_true, y_pred, digits=4))
print(confusion_matrix(y_true, y_pred))

y_pred = encoder_cakapan_4.inverse_transform(pred_leksikal_4)
y_true = encoder_cakapan_4.inverse_transform(y_4[test_index_4])
print(classification_report(y_true, y_pred, digits=4))
print(confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

       Marah     0.4483    0.3095    0.3662        42
       Sedih     0.4464    0.2907    0.3521        86
      Senang     0.4286    0.7168    0.5364       113
    Terkejut     0.4706    0.1600    0.2388        50

    accuracy                         0.4364       291
   macro avg     0.4485    0.3693    0.3734       291
weighted avg     0.4439    0.4364    0.4062       291

[[13  5 21  3]
 [ 4 25 56  1]
 [ 8 19 81  5]
 [ 4  7 31  8]]
              precision    recall  f1-score   support

       Marah     0.5526    0.5000    0.5250        42
       Sedih     0.4375    0.4070    0.4217        86
      Senang     0.6202    0.7080    0.6612       113
    Terkejut     0.5455    0.4800    0.5106        50

    accuracy                         0.5498       291
   macro avg     0.5389    0.5237    0.5296       291
weighted avg     0.5436    0.5498    0.5449       291

[[21 13  5  3]
 [ 8 35 34  9]
 [ 3 22 80  8]
 [ 6 10 10 24]]


In [None]:
pred_gabungan_4 = []
for i in range(len(pred_akustik_4)):
  if (pred_akustik_4[i].max() > pred_leksikal_4[i].max()):
    print("Akustik", pred_akustik_4[i].max(), pred_leksikal_4[i].max())
    pred_gabungan_4.append(pred_akustik_4[i])
  else :
    print("Leksikal", pred_akustik_4[i].max(), pred_leksikal_4[i].max())
    pred_gabungan_4.append(pred_leksikal_4[i])
pred_gabungan_4 = np.array(pred_gabungan_4)

In [None]:
y_pred = encoder_cakapan_4.inverse_transform(pred_gabungan_4)
y_true = encoder_cakapan_4.inverse_transform(y_4[test_index_4])
print(classification_report(y_true, y_pred, digits=4))
print(confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

       Marah     0.5429    0.4524    0.4935        42
       Sedih     0.4348    0.3488    0.3871        86
      Senang     0.5541    0.7699    0.6444       113
    Terkejut     0.7333    0.4400    0.5500        50

    accuracy                         0.5430       291
   macro avg     0.5663    0.5028    0.5188       291
weighted avg     0.5480    0.5430    0.5304       291

[[19 11 10  2]
 [ 9 30 45  2]
 [ 3 19 87  4]
 [ 4  9 15 22]]


## Hasil Akhir Pengenalan Emosi

### 6 kelas emosi

In [None]:
pred_akustik_proba = pred_akustik.max(axis=1)
pred_leksikal_proba = pred_leksikal.max(axis=1)
pred_gabungan_fitur_proba = pred_gabungan_fitur.max(axis=1)
pred_gabungan_keputusan_proba = pred_gabungan_keputusan.max(axis=1)

In [None]:
pred_akustik = encoder_cakapan.inverse_transform(pred_akustik).reshape(301,)
pred_leksikal = encoder_cakapan.inverse_transform(pred_leksikal).reshape(301,)
pred_gabungan_fitur = encoder_cakapan.inverse_transform(pred_gabungan_fitur).reshape(301,)
pred_gabungan_keputusan = encoder_cakapan.inverse_transform(pred_gabungan_keputusan).reshape(301,)
true = y_true.reshape(301,)

In [None]:
raw = joblib.load(drive_dir + '/Eksperimen/Data/basic.pkl')
result = { 'Transkripsi': raw['Transkripsi'][test_index], 
          'Preprocessed' : data['Transkripsi'][test_index], 
          'Abstraksi Emosi' : true, 
          'Prediksi Model Akustik' : pred_akustik,
          'Confidence Level Model Akustik' : pred_akustik_proba,
          'Prediksi Model Leksikal' : pred_leksikal,
          'Confidence Level Model Leksikal' : pred_leksikal_proba,
          'Prediksi Model Gabungan Level Fitur' : pred_gabungan_fitur,
          'Confidence Level Model Gabungan Level Fitur' : pred_gabungan_fitur_proba,
          'Prediksi Model Gabungan Level Keputusan' : pred_gabungan_keputusan,
          'Confidence Level Model Gabungan Level Keputusan' : pred_gabungan_keputusan_proba,
          }
df = pd.DataFrame(result)
df.to_excel("output.xlsx") 
files.download('output.xlsx')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>