# Initial Setup

In [2]:
from keras import callbacks, models
from datahandler.constants import location_labels, activity_labels
from model.transformer import make_transformer_model_v1, make_transformer_model_v3
from utils import print_line_divider
import tensorflow as tf
from datahandler.data_preprocessing_from_jupyter import load_all_raw_multitask_data


In [3]:
# DATA Loader
print_line_divider()
print("Preparing data...")
x_train, y_context_train, y_activity_train, x_test, y_context_test, y_activity_test = load_all_raw_multitask_data()
print("Train data shape: " + str(x_train.shape) + " | Train label shape: " + str(y_context_train.shape) + " and " + str(
    y_activity_train.shape))
print("Test data shape: " + str(x_test.shape) + " | Test label shape: " + str(y_context_test.shape) + " and " + str(
    y_activity_test.shape))
print_line_divider()


****************************************************
Preparing data...
Loading from file: /Users/duc.letran/Desktop/FINAL PROJECT/context_transformer/data/v4/mix_labeled/mm5_datacollection.csv (1/25)
Loading from file: /Users/duc.letran/Desktop/FINAL PROJECT/context_transformer/data/v4/mix_labeled/pp1_datacollection.csv (2/25)
Loading from file: /Users/duc.letran/Desktop/FINAL PROJECT/context_transformer/data/v4/mix_labeled/ds2_datacollection.csv (3/25)
Loading from file: /Users/duc.letran/Desktop/FINAL PROJECT/context_transformer/data/v4/mix_labeled/ds1_datacollection.csv (4/25)
Loading from file: /Users/duc.letran/Desktop/FINAL PROJECT/context_transformer/data/v4/mix_labeled/tt4_datacollection.csv (5/25)
Loading from file: /Users/duc.letran/Desktop/FINAL PROJECT/context_transformer/data/v4/mix_labeled/os4_datacollection.csv (6/25)
Loading from file: /Users/duc.letran/Desktop/FINAL PROJECT/context_transformer/data/v4/mix_labeled/pp2_datacollection.csv (7/25)
Loading from file: /Users/

In [4]:
if len(x_train.shape) == 2:
    input_shape = (x_train.shape[1], 1)
else:
    input_shape = x_train.shape[1:]
print("Input shape: " + str(input_shape))

Input shape: (40, 9)


In [5]:
def check_distribution(labels_list, set_name, is_context):
    label_count_dict = {}
    for label_ind in labels_list:
        if label_ind in label_count_dict.keys():
            label_count_dict[label_ind] += 1
        else:
            label_count_dict[label_ind] = 1

    print_line_divider()
    print("Distribution for set " + set_name + "\n")
    for key in label_count_dict:
        key = int(key)
        if is_context:
            print(location_labels[key] + " count is " + str(label_count_dict[key]))
        else:
            print(activity_labels[key] + " count is " + str(label_count_dict[key]))


check_distribution(y_context_train, "Training set for context detection", True)
check_distribution(y_activity_train, "Training set for activity detection", False)
check_distribution(y_context_test, "Testing set for context detection", True)
check_distribution(y_activity_test, "Testing set for activity detection", False)

****************************************************
Distribution for set Training set for context detection

holdinginhand count is 1280
insidethepantpocket count is 1279
calling count is 1261
beingusedinhand count is 1307
insidethebag count is 1268
****************************************************
Distribution for set Training set for activity detection

standing count is 1982
walking count is 4413
****************************************************
Distribution for set Testing set for context detection

holdinginhand count is 220
insidethepantpocket count is 224
calling count is 240
beingusedinhand count is 194
insidethebag count is 232
****************************************************
Distribution for set Testing set for activity detection

standing count is 333
walking count is 777


# Main Training Configuration

In [6]:
# TRAINING CONFIGURATION
window_time_in_seconds = 2
window_size = 40
epochs = 300
batch_size = 32
validation_split = 15 / 85
optimizer = tf.keras.optimizers.Adam(
    learning_rate=0.005,
    name="Adam",
)
context_loss_function = "sparse_categorical_crossentropy"
activity_loss_function = "sparse_categorical_crossentropy"
callback_list = [
    callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=50, min_lr=0.0001),
    callbacks.EarlyStopping(monitor="val_loss", patience=150, verbose=1)
]

# This is a section for multi-task learning

In [8]:
# EVALUATION
from audio.audio import play_training_is_complete
def test_model(context_ratio, activity_ratio, id):
    saved_model = "model/multitask" + str(id) + ".h5"
    _, model2 = make_transformer_model_v3(
        input_shape=input_shape,
        head_size=64,
        num_heads=4,
        ff_dim=4,
        num_transformer_blocks=1,
        mlp_dropout=0.4,
        dropout=0.25,
    )
    model2.compile(
        optimizer=optimizer,
        loss={'context_output': context_loss_function, 'activity_output': activity_loss_function},
        loss_weights={'context_output': context_ratio, 'activity_output': activity_ratio},
        metrics=["sparse_categorical_accuracy"],
    )
    model2.fit(
        x_train,
        {'context_output': y_context_train, 'activity_output': y_activity_train},
        batch_size=batch_size,
        epochs=epochs,
        callbacks=callback_list + [callbacks.ModelCheckpoint(saved_model, save_best_only=True, monitor="val_loss")],
        validation_split=validation_split,
        verbose=1,
        shuffle=True
    )
    model2 = models.load_model(saved_model)
    _, _, _, context_acc, _ = model2.evaluate(x_test,
                                              {'context_output': y_context_test, 'activity_output': y_activity_test})
    print("Accuracy for context for ratio " + str(context_ratio) + " - " + str(activity_ratio) + " : ", context_acc)
    play_training_is_complete()


In [9]:
test_model(1,1,4)

Epoch 1/300


2022-06-25 14:00:50.555237: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 7

Input #0, wav, from '/var/folders/ph/8yv0q18n4mb1439tnf3rzcf40000gp/T/tmpo520xut0.wav':
  Duration: 00:00:02.27, bitrate: 705 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 1 channels, s16, 705 kb/s
   2.19 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




# This section is for normal model training

In [7]:
_, normal_model = make_transformer_model_v1(
    input_shape=input_shape,
    head_size=64,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=1,
    mlp_units=[128],
    mlp_dropout=0.4,
    dropout=0.25,
)
# _, normal_model = make_cnn_model_v1(input_shape)
normal_model.summary()
normal_model.compile(
    optimizer=optimizer,
    loss=context_loss_function,
    metrics=["sparse_categorical_accuracy"],
)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 40, 9)]      0           []                               
                                                                                                  
 layer_normalization (LayerNorm  (None, 40, 9)       18          ['input_1[0][0]']                
 alization)                                                                                       
                                                                                                  
 multi_head_attention (MultiHea  (None, 40, 9)       9993        ['layer_normalization[0][0]',    
 dAttention)                                                      'layer_normalization[0][0]']    
                                                                                              

2022-06-25 14:49:46.784854: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
from audio.audio import play_training_is_complete

SAVED_BEST_MODEL_2 = "model/best_model_2.h5"
hist = normal_model.fit(
    x_train,
    y_context_train,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callback_list + [callbacks.ModelCheckpoint(SAVED_BEST_MODEL_2, save_best_only=True, monitor="val_loss")],
    validation_split=validation_split,
    verbose=1,
    shuffle=True
)
play_training_is_complete()

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

Input #0, wav, from '/var/folders/ph/8yv0q18n4mb1439tnf3rzcf40000gp/T/tmp25bwgzm1.wav':
  Duration: 00:00:02.27, bitrate: 705 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 1 channels, s16, 705 kb/s
   2.20 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




In [9]:
normal_model = models.load_model(SAVED_BEST_MODEL_2)
_, acc = normal_model.evaluate(x_test, y_context_test)
print("Accuracy for context: ", acc)

Accuracy for context:  0.8477477431297302
