# 4. Training and evaluation

4.1 Preparation <br>
4.2 Data selection (optional) <br>
4.3.Training and evaluation of the model <br>
4.4 Results <br>

## 4.1 Preparation

In [1]:
# Select which GPU to use

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0" # "0" -> usage of first GPU, "1" -> usage of second GPU

In [250]:
# Import data

import numpy as np
X_train = np.load('numpy_arrays/combination/X_train.npy')
y_train = np.load('numpy_arrays/combination/y_train.npy')
X_dev = np.load('numpy_arrays/combination/X_dev.npy')
y_dev = np.load('numpy_arrays/combination/y_dev.npy')
X_test = np.load('numpy_arrays/combination/X_test.npy')
y_test = np.load('numpy_arrays/combination/y_test.npy')

## 4.2 Data selection (optional)

In [231]:
# Select whether to use tdnn or whisper embeddings for training, skip to use the combined embeddings

datasets = [X_train, X_dev, X_test]

for n, ds in enumerate(datasets):
    new = []
    
    for i in range(len(ds)):
        new.append(ds[i][512:]) # tdnn -> [0:512]; whisper -> [512:]
        
    new = np.array(new)
    datasets[n] = new
    
X_train = datasets[0]
X_dev = datasets[1]
X_test = datasets[2]

## 4.3 Training and evaluation of the model

In [252]:
# Create the model

import tensorflow
from tensorflow.keras import layers, models

input_shape = X_train.shape[1:2]
print(input_shape)
model = models.Sequential([
    layers.Input(shape=input_shape),
    #layers.Reshape((1792, 1), input_shape=(1792,)), # for convolution
    layers.Dropout(0.4),
    layers.Dense(1000, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(102, name="output", activation='relu'),
])

model.summary()

(1792,)
Model: "sequential_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout_31 (Dropout)         (None, 1792)              0         
_________________________________________________________________
dense_9 (Dense)              (None, 1000)              1793000   
_________________________________________________________________
dropout_32 (Dropout)         (None, 1000)              0         
_________________________________________________________________
output (Dense)               (None, 102)               102102    
Total params: 1,895,102
Trainable params: 1,895,102
Non-trainable params: 0
_________________________________________________________________


In [253]:
# Compile the model

import tensorflow as tf
model.compile(
    optimizer=tf.keras.optimizers.Adam(), # lr ändern, andere optimizer
    loss='categorical_crossentropy', #tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), #
    metrics=['accuracy'],
)


In [256]:
# Train the model

history = model.fit(
    X_train,
    y_train,
    #validation_split=0.3,
    validation_data=(X_dev, y_dev),
    epochs=20,
    batch_size=4096, # 2048, 4096, 8192, 16384, 32768
    callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=4, monitor='val_accuracy', restore_best_weights=True),
    shuffle=True,
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 00015: early stopping


In [None]:
# Evaluate the model

model.evaluate(X_test, y_test)

## 4.4 Results

### Best results

In [None]:
# training with tdnn, whisper and combined empeddings
# tdnn ca 76-77%; whisper 87.53%; combination 90.01%

### Other results

In [None]:
# 0.3; loss: 0.5441 - accuracy: 0.8782 - val_loss: 0.8528 - val_accuracy: 0.7716

In [42]:
model_current = model
model_current.evaluate(X_test, y_test)



[0.8029866218566895, 0.7869811058044434]

In [None]:
# 0.3; batch_size=8192; loss: 0.6918 - accuracy: 0.8746 - val_loss: 1.0480 - val_accuracy: 0.7869

In [47]:
model_current2 = model
model_current2.evaluate(X_test, y_test)



[0.9772291779518127, 0.8004883527755737]

In [None]:
# 0.3; batch_size=8192; loss: 0.6118 - accuracy: 0.8917 - val_loss: 0.9530 - val_accuracy: 0.7926

In [50]:
model_current3 = model
model_current3.evaluate(X_test, y_test)



[0.8937516212463379, 0.8069271445274353]

In [59]:
# 0.3; batch_size=32768; loss: 0.5478 - accuracy: 0.8710 - val_loss: 0.7211 - val_accuracy: 0.8256

In [60]:
model_current4 = model
model_current4.evaluate(X_test, y_test)



[0.6521162986755371, 0.8330934047698975]

In [70]:
# 0.4; batch_size=16384; loss: 0.6065 - accuracy: 0.8722 - val_loss: 0.7078 - val_accuracy: 0.8283

In [71]:
model_current5 = model
model_current5.evaluate(X_test, y_test)



[0.6582101583480835, 0.8400591015815735]

In [78]:
# dropout=0.4; batch_size=4096; loss: 0.6669 - accuracy: 0.9093 - val_loss: 0.9642 - val_accuracy: 0.8384

In [146]:
model_current6 = model
model_current6.evaluate(X_test, y_test)



[0.8455692529678345, 0.8539133667945862]