In [1]:
# !pip install seaborn

In [2]:
import os
import time
import datetime
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score, confusion_matrix

import tensorflow as tf
from tensorflow import keras
from tqdm.keras import TqdmCallback

import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')

import Model

2024-07-13 01:48:36.081150: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
def extract_features(X, feature):
    mfcc = X[:, :, :20]
    chroma = X[:, :, 21:33]
    zcr = X[:, :, 20][:, :, np.newaxis]
    rms = X[:, :, 33][:, :, np.newaxis]
    centroid = X[:, :, 34][:, :, np.newaxis]
    bandwidth = X[:, :, 35][:, :, np.newaxis]
    rolloff = X[:, :, 36][:, :, np.newaxis]
    
    if feature == "full":
        return X
    elif feature == "without_mfcc":
        return np.concatenate((zcr, chroma, rms, centroid, bandwidth, rolloff), axis=2)
    elif feature == "without_chroma":
        return np.concatenate((mfcc, zcr, rms, centroid, bandwidth, rolloff), axis=2)
    elif feature == "without_mfcc_and_chroma":
        return np.concatenate((zcr, rms, centroid, bandwidth, rolloff), axis=2)
    elif feature == "without_zcr":
        return np.concatenate((mfcc, chroma, rms, centroid, bandwidth, rolloff), axis=2)
    elif feature == "without_rms":
        return np.concatenate((mfcc, chroma, zcr, centroid, bandwidth, rolloff), axis=2)
    elif feature == "without_spec_cent":
        return np.concatenate((mfcc, chroma, zcr, rms, bandwidth, rolloff), axis=2)
    elif feature == "without_spec_band":
        return np.concatenate((mfcc, chroma, zcr, rms, centroid, rolloff), axis=2)
    elif feature == "without_spec_roll":
        return np.concatenate((mfcc, chroma, zcr, rms, centroid, bandwidth), axis=2)
    elif feature == "mfcc":
        return mfcc
    elif feature == "chroma":
        return chroma
    elif feature == "mfcc_and_chroma":
        return np.concatenate((mfcc, chroma), axis=2)
    else:
        raise ValueError("Invalid feature option")

def extract_features_all(feature, X_train, X_val, y_train, y_val):
    return extract_features(X_train, feature), extract_features(X_train_val, feature), extract_features(X_test, feature), extract_features(X_val, feature)

def log(log_data):
    log_file = "Log/Log.csv"
    df = pd.DataFrame([log_data])
    if os.path.exists(log_file):
        df.to_csv(log_file, mode='a', index=False, header=False)
    else:
        df.to_csv(log_file, index=False)

def format_seconds_to_hhmmss(seconds):
    # Convert seconds to a timedelta object
    delta = datetime.timedelta(seconds=seconds)
    
    # Get the total hours, minutes, and seconds
    hours, remainder = divmod(delta.seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    
    # Format as hh:mm:ss
    formatted_time = f"{hours:02}:{minutes:02}:{seconds:02}"
    return formatted_time

In [4]:
def train(title, create_model, optimizer, lr, batch, X_train, X_train_val, X_val, X_test):
    print(f"-> {title}")
    
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    fold_accuracies = []
    fold_losses = []
    fold_f1_scores = []
    for i, (train_index, val_index) in enumerate(skf.split(X_train_val, y_train_val), start=1):
      print(f"Fold-{i}")
      X_train_fold, X_test_fold = X_train_val[train_index], X_train_val[val_index]
      y_train_fold, y_test_fold = y_train_val[train_index], y_train_val[val_index]
      model = create_model(X_train_fold)
      optimizer = keras.optimizers.Adam(learning_rate=lr)
      if optimizer == "Adam":
        optimizer = keras.optimizers.Adam(learning_rate=lr)
      elif optimizer == "SGD":
        optimizer = keras.optimizers.SGD(learning_rate=lr)
      elif optimizer == "RMSprop":
        optimizer = keras.optimizers.RMSprop(learning_rate=lr)      
      model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
      early_stopping = keras.callbacks.EarlyStopping(monitor='accuracy', patience=5)
      history = model.fit(X_train_fold, y_train_fold, epochs=200, batch_size=batch, callbacks=[early_stopping, TqdmCallback(verbose=0)], verbose=0)
      loss, acc = model.evaluate(X_test_fold, y_test_fold)
      y_pred_fold = np.argmax(model.predict(X_test_fold), axis=1)
      f1 = f1_score(y_test_fold, y_pred_fold, average='weighted')
      fold_accuracies.append(acc)
      fold_losses.append(loss)
      fold_f1_scores.append(f1)
    
    model = create_model(X_train)
    optimizer = keras.optimizers.Adam(learning_rate=lr)
    if optimizer == "Adam":
      optimizer = keras.optimizers.Adam(learning_rate=lr)
    elif optimizer == "SGD":
      optimizer = keras.optimizers.SGD(learning_rate=lr)
    elif optimizer == "RMSprop":
      optimizer = keras.optimizers.RMSprop(learning_rate=lr)      
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    early_stopping = keras.callbacks.EarlyStopping(monitor='accuracy', patience=5)
    start_time = time.time()
    history = model.fit(X_train, y_train, epochs=200, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping, TqdmCallback(verbose=0)], verbose=0)
    end_time = time.time()
    
    # Extract the data
    train_accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(train_accuracy) + 1)
    last_epoch = len(train_accuracy)
    last_train_acc = train_accuracy[-1]
    last_val_acc = val_accuracy[-1]
    last_train_loss = train_loss[-1]
    last_val_loss = val_loss[-1]
    training_time = format_seconds_to_hhmmss(end_time - start_time)
    
    test_loss, test_acc = model.evaluate(X_test, y_test)
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    f1 = f1_score(y_test, y_pred, average='weighted')
    cm = confusion_matrix(y_test, y_pred)
    
    plt.plot(epochs, train_accuracy, label=f'train_accuracy (last: {last_train_acc:.3f})')
    plt.plot(epochs, val_accuracy, label=f'val_accuracy (last: {last_val_acc:.3f})')
    plt.title(f'Model Accuracy (Epochs: {last_epoch})')
    plt.ylim(0, 1)
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig(f"Log/{title}_model_accuracy.png")
    plt.clf()
    plt.plot(epochs, train_loss, label=f'train_loss (last: {last_train_loss:.3f})')
    plt.plot(epochs, val_loss, label=f'val_loss (last: {last_val_loss:.3f})')
    plt.title(f'Model Accuracy (Epochs: {last_epoch})')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig(f"Log/{title}_model_loss.png")
    plt.clf()
    plt.figure(figsize=(10, 8))
    maqams = ["Bayati", "Hijaz", "Jiharkah", "Nahawand", "Rast", "Saba", "Sikah"]
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=maqams, yticklabels=maqams)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(f"Log/{title}_confussion_matrix.png")
    plt.clf()
    
    log_data = {
        "title": title,
        "epoch_stopped": last_epoch,
        "training_time": training_time,
        "train_acc": last_train_acc,
        "train_loss": last_train_loss,
        "val_acc": last_val_acc,
        "val_loss": last_val_loss,
        "test_acc": test_acc,
        "test_loss": test_loss,
        "test_f1_score": f1,
        "fold_mean_acc": np.mean(fold_accuracies),
        "fold_mean_loss": np.mean(fold_losses),
        "fold_mean_f1_score": np.mean(fold_f1_scores)
    }
    for fold in range(5):
        log_data[f"fold_{fold+1}_acc"] = fold_accuracies[fold]
        log_data[f"fold_{fold+1}_loss"] = fold_losses[fold]
        log_data[f"fold_{fold+1}_f1_score"] = fold_f1_scores[fold]
    log(log_data)

In [5]:
dataset = np.load("Dataset/Mujawwad1_(hop=5).npz")

features = dataset["features"]
labels = dataset["labels"]

X = features
y = labels

mean = np.mean(X, axis=(0, 1))
std = np.std(X, axis=(0, 1))
X = (X - mean) / std

X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.1, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, stratify=y_train_val)

X_train_temp = X_train
X_train_val_temp = X_train_val
X_val_temp = X_val
X_test_temp = X_test

In [None]:
features = [
    "full", "without_mfcc", "without_chroma", "without_mfcc_and_chroma",
    "without_zcr", "without_rms", "without_spec_cent", "without_spec_band",
    "without_spec_roll", "mfcc", "chroma", "mfcc_and_chroma"
]

for feature in features:
    X_train = X_train_temp
    X_train_val = X_train_val_temp
    X_val = X_val_temp
    X_test = X_test_temp

    X_train, X_train_val, X_test, X_val = extract_features_all(feature, X_train, X_val, y_train, y_val)

    X_train = np.mean(X_train, axis=1)
    X_train_val = np.mean(X_train_val, axis=1)
    X_test = np.mean(X_test, axis=1)
    X_val = np.mean(X_val, axis=1)

    X_train = X_train[..., np.newaxis]
    X_train_val = X_train_val[..., np.newaxis]
    X_test = X_test[..., np.newaxis]
    X_val = X_val[..., np.newaxis]

    train(f"feature={feature}", Model.deep_ann_1d, "Adam", 0.0001, 64, X_train, X_train_val, X_val, X_test)

-> feature=full
Fold-1


2024-07-13 01:48:47.696560: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1928] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2835 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB MIG 7g.40gb, pci bus id: 0000:4e:00.0, compute capability: 8.0


0epoch [00:00, ?epoch/s]

I0000 00:00:1720810129.540901  561827 service.cc:145] XLA service 0x7f959c003b40 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1720810129.540964  561827 service.cc:153]   StreamExecutor device (0): NVIDIA A100-SXM4-40GB MIG 7g.40gb, Compute Capability 8.0
2024-07-13 01:48:49.643125: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-07-13 01:48:51.410780: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907
I0000 00:00:1720810135.408605  561827 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.9696 - loss: 0.1264 
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
Fold-2


0epoch [00:00, ?epoch/s]

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9754 - loss: 0.0634
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
Fold-3


0epoch [00:00, ?epoch/s]

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9659 - loss: 0.1708
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
Fold-4


0epoch [00:00, ?epoch/s]

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9800 - loss: 0.0784
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
Fold-5


0epoch [00:00, ?epoch/s]

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.9628 - loss: 0.1194
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step


0epoch [00:00, ?epoch/s]

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.9786 - loss: 0.0938
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step
-> feature=without_mfcc
Fold-1


0epoch [00:00, ?epoch/s]

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9361 - loss: 0.1804
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
Fold-2


0epoch [00:00, ?epoch/s]

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9641 - loss: 0.1171
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Fold-3


0epoch [00:00, ?epoch/s]

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9506 - loss: 0.1689
