In [1]:
%load_ext autoreload
%autoreload 2

# Classifying Music Note sounds using Few Shot Deep Learning

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
import math

from matplotlib import pyplot as plt
from tqdm.notebook import tqdm
import numpy as np

#### utils

In [114]:
def fft(f):
    Ni = len(f)
    Mi = int(Ni / 2)
    if Mi <= 2:
        return [f[0] + f[1] + f[2] + f[3], 
               f[0] - 1j*f[1] - f[2] + 1j*f[3],
               f[0] - f[1] + f[2] - f[3],
               f[0] + 1j*f[1] - f[2] - 1j*f[3]]
    
    wn = math.cos(2*math.pi/Ni) - 1j*math.sin(2*math.pi/Ni)
    fe = [f[i] for i in range(Ni) if i % 2 == 0]
    fo = [f[i] for i in range(Ni) if i % 2 == 1]
    Fe = fft(fe)
    Fo = fft(fo)
    return [np.around(Fe[i] + (wn**i)*Fo[i], decimals=10) for i in range(Mi)] + [np.around(Fe[i] - (wn**i)*Fo[i], decimals=10) for i in range(Mi)]

def get_audio_data(filename):
    fs = 2**12 # sample rate
    tp = 2 # sampling duration
    N = n = fs*tp # number of samples
    
    # Extract data and sampling rate from file
    recording, fs = librosa.load(filename, sr=fs, duration=tp)
    recording = recording.reshape((-1, 1))

    n = len(recording)        
    tp = int(n / fs)

    if tp < 2:
        pad_width = N - recording.shape[0]
        recording = np.pad(recording, pad_width=((0, pad_width), (0, 0)), mode='constant')

        n = len(recording)
        tp = int(n / fs)

    N = fs*tp # number of samples
    x = [round(float(recording[i]), 10) for i in range(n)] # input sequence
    return x, tp, n

def get_frequency_amplitude(x, tp, N):
    _X = fft(x) # discrete Fourier transform
    X = [round(Xi/N, 10) for Xi in _X] # frequency spectrum
    X_amp = [np.absolute(Xi) for Xi in X] # amplitude spectrum

    M = int(N/2)
    ti = [i*tp/N for i in range(N)]
    fi = [i/tp for i in range(M)]
    X_amp = np.array(X_amp[:M])*2
    
    return ti, fi, X_amp

def extract_features(filepath):
    # try:
    audio_features = get_audio_data(filepath)
    if not audio_features:
        return

    x, tp, N = audio_features
    ti, fi, X_amp = get_frequency_amplitude(x, tp, N)
    return X_amp
    
    # except Exception as e:
    #     print("Error encountered while parsing file: ", file_name, e)
    #     return None 
    
# def extract_features(file_name):
   
# #     try:
#     audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
#     mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
#     mfccsscaled = np.mean(mfccs.T, axis=0)
        
# #     except Exception as e:
# #         print("Error encountered while parsing file: ", file, e)
# #         return None 
     
#     return mfccsscaled
    

#### Load Preprocessed data 

In [116]:
# Load various imports 
import pandas as pd
import os
import librosa

# Set the path to the full UrbanSound dataset 
DATA_DIR = os.path.join("data", "guitar_sample")
# DATA_DIR = os.path.join("/content/drive/My Drive/Colab Notebooks/data", "guitar_sample")

# feature list
features = []

# Iterate through each sound file and extract the features 
for folder in tqdm(os.listdir(DATA_DIR)):
    for file in os.listdir(os.path.join(DATA_DIR, folder)):
        class_label = folder
        file_name = os.path.join(os.path.join(DATA_DIR, folder, file))
        
        data = extract_features(file_name)
        if data is None:
            continue
        
        data = np.array(data).reshape((64, 64))
#         pad_width = (96*96*3) - data.shape[0]
        
#         data = np.pad(data, pad_width=((0, pad_width), (0, 0)), mode='constant')
        features.append([data, class_label])

# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

print('Finished feature extraction from ', len(featuresdf), ' files') 

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12.0), HTML(value='')))


Finished feature extraction from  251  files


In [117]:
featuresdf.head()

Unnamed: 0,feature,class_label
0,"[[7.2154e-06, 1.1226262854574536e-05, 9.577476...",0A
1,"[[1.25874e-05, 2.9857376931003163e-05, 4.90201...",0A
2,"[[5.00442e-05, 1.584144500100922e-05, 3.131867...",0A
3,"[[5.59316e-05, 2.868965890978838e-05, 2.260507...",0A
4,"[[2.30266e-05, 1.0840161257103143e-05, 2.55471...",0A


In [118]:
from itertools import combinations
from math import factorial

def number_of_combinations(n, r):
    return int(factorial(n) / (factorial(n - r) * factorial(r)))

def prepare_data_pair(X, y, labels):
    data = [[], [], []]
    data_labels = [[], [], []]
    
    for label in labels:
        label = f"1{label}"
        semilabel = f"0{label}"

        indices = np.array(list(range(len(y))))
        similar_indices = indices[y == label]
        train_half_size = number_of_combinations(len(similar_indices), 2)

        semisimilar_indices = indices[y == semilabel][:train_half_size]

        dissimilar_indices = indices[(y != label) & (y != semilabel)]
        np.random.shuffle(dissimilar_indices)

        dissimilar_indices = dissimilar_indices[:train_half_size - len(semisimilar_indices)]
        dissimilar_indices = np.concatenate([semisimilar_indices, dissimilar_indices])

        np.random.shuffle(dissimilar_indices)
        it = iter(dissimilar_indices)

        for i, j in combinations(similar_indices, 2):
            z = next(it)
            for index, value in enumerate([z, i, j]):
                data[index].append(X[value])
                data_labels[index].append(y[value])
            
        print(y[z], y[i], y[j])
    
    data = np.array(data)
    data_labels = np.array(data_labels)
    return data, data_labels

In [119]:
os.listdir(DATA_DIR)

['0A', '0B', '0D', '0EH', '0EL', '0G', '1A', '1B', '1D', '1EH', '1EL', '1G']

In [120]:
# split the dataset 
from sklearn.model_selection import train_test_split 

# Convert features and corresponding classification labels into numpy arrays
input_data = np.array(featuresdf.feature.tolist())
input_labels = np.array(featuresdf.class_label.tolist())

# split train and test data
x_train, x_test, y_train, y_test = train_test_split(input_data, input_labels, test_size=0.2, random_state = 42)

# labels
labels = ["EH", "D", "A", "B", "G", "EL"]

# prepare data set pairs
X, y = prepare_data_pair(x_train, y_train, labels)

0EH 1EH 1EH
1EH 1D 1D
0D 1A 1A
0D 1B 1B
1A 1G 1G
1D 1EL 1EL


In [121]:
X.shape, y.shape

((3, 759, 64, 64), (3, 759))

In [122]:
x_train.shape, y_train.shape

((200, 64, 64), (200,))

In [123]:
x_test.shape, y_test.shape

((51, 64, 64), (51,))

In [124]:
for i in range(3):
    print(y[:, i])

['0D' '1EH' '1EH']
['0EL' '1EH' '1EH']
['0D' '1EH' '1EH']


### Convolutional Neural Network (CNN) model architecture 

In [125]:
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Dense, Dropout, Flatten, Lambda, LayerNormalization, Layer
K.clear_session()

def build_base_network(input_shape):
    model = Sequential()
    
    model.add(Conv1D(3, kernel_size=3, activation='relu', input_shape=input_shape, padding="VALID"))
#     model.add(LayerNormalization(axis=1))
    model.add(MaxPooling1D(pool_size=2))
    
    model.add(Conv1D(3, kernel_size=3, activation='relu', padding="VALID"))
#     model.add(LayerNormalization(axis=1))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.2))
    
#     model.add(Conv1D(32, kernel_size=3, activation='relu'))
# #     model.add(LayerNormalization(axis=1))
#     model.add(MaxPooling1D(pool_size=2))
#     model.add(Dropout(0.2))
    
#     model.add(Conv1D(32, kernel_size=3, activation='relu'))
# #     model.add(LayerNormalization(axis=1))
#     model.add(MaxPooling1D(pool_size=2))
#     model.add(Dropout(0.2))
    
#     model.add(Conv1D(32, kernel_size=3, activation='relu'))
# #     model.add(LayerNormalization(axis=1))
#     model.add(MaxPooling1D(pool_size=2))
#     model.add(Dropout(0.2))
    
#     model.add(Conv1D(32, kernel_size=3, activation='relu'))
# #     model.add(LayerNormalization(axis=1))
#     model.add(MaxPooling1D(pool_size=2))
#     model.add(Dropout(0.2))

#     model.add(Conv1D(32, kernel_size=3, activation='relu'))
# #     model.add(LayerNormalization(axis=1))
#     model.add(MaxPooling1D(pool_size=2))
#     model.add(Dropout(0.2))

#     model.add(Conv1D(32, kernel_size=3, activation='relu'))
# #     model.add(LayerNormalization(axis=1))
#     model.add(MaxPooling1D(pool_size=2))
#     model.add(Dropout(0.2))
    
#     model.add(Conv1D(32, kernel_size=3, activation='relu'))
# #     model.add(LayerNormalization(axis=1))
#     model.add(MaxPooling1D(pool_size=2))
#     model.add(Dropout(0.2))
    
    model.add(Flatten())
    
#     model.add(Dense(1024))
#     model.add(LayerNormalization())
#     model.add(Dropout(0.3))
    
#     model.add(Dense(512))
#     model.add(LayerNormalization())
#     model.add(Dropout(0.3))
    
#     model.add(Dense(256))
#     model.add(LayerNormalization())
#     model.add(Dropout(0.3))
    
    model.add(Dense(128))
    return model

def distance(emb1, emb2):
    return np.sum(np.square(emb1 - emb2))

### Compiling the model 

For compiling our model, we will use the same three parameters as the previous model: 

In [126]:
# from model import create_model as build_base_network

input_dim = X.shape[2:]

# We only test DenseNet-121 in this script for demo purpose
import tensorflow as tf
# base_network = tf.keras.applications.DenseNet121(
#     include_top=False, weights=None, input_tensor=None, input_shape=input_dim,
#     pooling="max", classes=128
# )

base_network = build_base_network(input_dim)

audio_n = Input(shape=input_dim)
audio_a = Input(shape=input_dim)
audio_p = Input(shape=input_dim)

feat_vecs_n = base_network(audio_n)
feat_vecs_a = base_network(audio_a)
feat_vecs_p = base_network(audio_p)

class TripletLossLayer(Layer):
    def __init__(self, alpha, **kwargs):
        self.alpha = alpha
        super(TripletLossLayer, self).__init__(**kwargs)
    
    def triplet_loss(self, inputs):
        a, p, n = inputs
        p_dist = K.sum(K.square(a-p), axis=-1)
        n_dist = K.sum(K.square(a-n), axis=-1)
        return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
    
    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss

# Layer that computes the triplet loss from anchor, positive and negative embedding vectors
difference = TripletLossLayer(alpha=0.2, name='triplet_loss_layer')([feat_vecs_n, feat_vecs_a, feat_vecs_p])

# initialize training params
epochs = 128
batch_size = 32

optimizer = Adam()
# optimizer = RMSprop()

# initialize the network
model = Model(inputs=[audio_n, audio_a, audio_p], outputs=difference)
model.compile(loss=None, optimizer=optimizer)

In [127]:
# Display model architecture summary 
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 64, 64)]     0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 64, 64)]     0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 64, 64)]     0                                            
__________________________________________________________________________________________________
sequential (Sequential)         (None, 128)          6113        input_1[0][0]                    
                                                                 input_2[0][0]         

In [128]:
model.layers[3].summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 62, 3)             579       
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 31, 3)             0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 29, 3)             30        
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 14, 3)             0         
_________________________________________________________________
dropout (Dropout)            (None, 14, 3)             0         
_________________________________________________________________
flatten (Flatten)            (None, 42)                0         
_________________________________________________________________
dense (Dense)                (None, 128)               5

### Training 

Here we will train the model. As training a CNN can take a sigificant amount of time, we will start with a low number of epochs and a low batch size. If we can see from the output that the model is converging, we will increase both numbers.  

In [129]:
from tensorflow.keras.callbacks import ModelCheckpoint 
from time import time

checkpointer = ModelCheckpoint(
    filepath='saved_models/weights.best.basic_cnn.hdf5', 
    verbose=1, 
    save_best_only=True
)

start = time()
model.fit(
    [X[0], X[1], X[2]], 
    None, 
    batch_size=batch_size, 
    epochs=epochs, 
    validation_split=0.25,
#     callbacks=[checkpointer], 
    verbose=1
)

duration = (time() - start)/60
print("Training completed in time: ", duration, "min")

Epoch 1/128
Epoch 2/128
Epoch 3/128
Epoch 4/128
Epoch 5/128
Epoch 6/128
Epoch 7/128
Epoch 8/128
Epoch 9/128
Epoch 10/128
Epoch 11/128
Epoch 12/128
Epoch 13/128
Epoch 14/128
Epoch 15/128
Epoch 16/128
Epoch 17/128
Epoch 18/128
Epoch 19/128
Epoch 20/128
Epoch 21/128
Epoch 22/128
Epoch 23/128
Epoch 24/128
Epoch 25/128
Epoch 26/128
Epoch 27/128
Epoch 28/128
Epoch 29/128
Epoch 30/128
Epoch 31/128
Epoch 32/128
Epoch 33/128
Epoch 34/128
Epoch 35/128
Epoch 36/128
Epoch 37/128
Epoch 38/128
Epoch 39/128
Epoch 40/128
Epoch 41/128
Epoch 42/128
Epoch 43/128
Epoch 44/128
Epoch 45/128
Epoch 46/128
Epoch 47/128
Epoch 48/128
Epoch 49/128
Epoch 50/128
Epoch 51/128
Epoch 52/128
Epoch 53/128
Epoch 54/128
Epoch 55/128
Epoch 56/128
Epoch 57/128
Epoch 58/128
Epoch 59/128
Epoch 60/128
Epoch 61/128
Epoch 62/128
Epoch 63/128
Epoch 64/128
Epoch 65/128
Epoch 66/128
Epoch 67/128
Epoch 68/128
Epoch 69/128
Epoch 70/128
Epoch 71/128
Epoch 72/128
Epoch 73/128
Epoch 74/128
Epoch 75/128
Epoch 76/128
Epoch 77/128
Epoch 78

Epoch 83/128
Epoch 84/128
Epoch 85/128
Epoch 86/128
Epoch 87/128
Epoch 88/128
Epoch 89/128
Epoch 90/128
Epoch 91/128
Epoch 92/128
Epoch 93/128
Epoch 94/128
Epoch 95/128
Epoch 96/128
Epoch 97/128
Epoch 98/128
Epoch 99/128
Epoch 100/128
Epoch 101/128
Epoch 102/128
Epoch 103/128
Epoch 104/128
Epoch 105/128
Epoch 106/128
Epoch 107/128
Epoch 108/128
Epoch 109/128
Epoch 110/128
Epoch 111/128
Epoch 112/128
Epoch 113/128
Epoch 114/128
Epoch 115/128
Epoch 116/128
Epoch 117/128
Epoch 118/128
Epoch 119/128
Epoch 120/128
Epoch 121/128
Epoch 122/128
Epoch 123/128
Epoch 124/128
Epoch 125/128
Epoch 126/128
Epoch 127/128
Epoch 128/128
Training completed in time:  0.6204944372177124 min


#### Evaluation

### Best freq treshold

In [130]:
from sklearn.metrics import f1_score, accuracy_score

use_test = False

distances = [] # squared L2 distance between pairs
identical = [] # 1 if same identity, 0 otherwise

if use_test:
    num = len(x_test)
    embedded = model.layers[3].predict(x_test)
    
else:
    num = len(x_train)
    embedded = model.layers[3].predict(x_train)

for i in range(num):
    for j in range(num):
        distances.append(distance(embedded[i], embedded[j]))
        if use_test:
            identical.append(1 if y_test[i] == y_test[j] else 0)
        else:
            identical.append(1 if y_train[i] == y_train[j] else 0)
            
distances = np.array(distances)
identical = np.array(identical)

min_threshold = min(distances)
max_threshold = max(distances)
threshold_step = (max_threshold - min_threshold)/1000
thresholds = np.arange(min_threshold, max_threshold, threshold_step)

f1_scores = [f1_score(identical, distances < t) for t in thresholds]
acc_scores = [accuracy_score(identical, distances < t) for t in thresholds]

# max f1
opt_idx = np.argmax(f1_scores)
opt_f1 = np.max(f1_scores)

# Threshold at maximal F1 score
opt_tau = thresholds[opt_idx]

# Accuracy at maximal F1 score
opt_acc = accuracy_score(identical, distances < opt_tau)

# Plot F1 score and accuracy as function of distance threshold
plt.plot(thresholds, f1_scores, label='F1 score');
plt.plot(thresholds, acc_scores, label='Accuracy');
plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold')
plt.title(f'Max: Acc={opt_acc:.2f}, f1={opt_f1:.2f} at threshold {opt_tau:.8f}');
plt.xlabel('Distance threshold')
plt.legend();

  thresholds = np.arange(min_threshold, max_threshold, threshold_step)


ValueError: arange: cannot compute length

In [None]:
dist_pos = distances[identical == 1]
dist_neg = distances[identical == 0]

plt.figure(figsize=(12,4))

plt.subplot(121)
plt.hist(dist_pos)
plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold')
plt.title('Distances (pos. pairs)')
plt.legend();

plt.subplot(122)
plt.hist(dist_neg)
plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold')
plt.title('Distances (neg. pairs)')
plt.legend();

### Testing Evaluation

In [192]:
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, classification_report

distances = [] # squared L2 distance between pairs
identical = [] # 1 if same identity, 0 otherwise
prediction = []

num = len(x_test)
embedded = model.layers[3].predict(x_test)

for i in range(num):
    for j in range(num):
        distances.append(distance(embedded[i], embedded[j]))
        identical.append(1 if y_test[i] == y_test[j] else 0)
        prediction.append(1 if distances[-1] < opt_tau else 0)
        
distances = np.array(distances)
identical = np.array(identical)
prediction = np.array(prediction)

print("accuracy =", accuracy_score(identical, prediction))

print("\nclassification_report")
print("========================")
print(classification_report(identical, prediction))

print("\nconfusion matrix")
print("========================")
print(confusion_matrix(identical, prediction))

accuracy = 0.8723567858515956

classification_report
              precision    recall  f1-score   support

           0       0.92      0.94      0.93      2320
           1       0.39      0.32      0.35       281

    accuracy                           0.87      2601
   macro avg       0.65      0.63      0.64      2601
weighted avg       0.86      0.87      0.87      2601


confusion matrix
[[2180  140]
 [ 192   89]]


### Testing (new data) Evaluation

In [193]:
x_test_sample = []
y_test_sample = []

# sample_dirs = ["EH", "D", "A", "B", "G", "EL"]
sample_dirs = ["A"]

sample_label = "A"
sample_filepath = os.path.join("data", "sampleA.wav")
sample_vector = extract_features(sample_filepath).reshape((1, -1, 1))
sample_embedded = model.layers[3].predict(sample_vector)

for label in tqdm(sample_dirs):
    print(label, end=", ")
    labeldir= os.path.join("data", "old_guitar_sample", label)

    for filename in (os.listdir(labeldir)):
        anchor_filepath = os.path.join("data", "old_guitar_sample", label, filename)
        
        x_test_sample.append(extract_features(anchor_filepath).reshape((-1, 1)))
        y_test_sample.append(label)

print()
x_test_sample = np.array(x_test_sample)
y_test_sample = np.array(y_test_sample)

print(x_test_sample.shape)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

A, 

(10, 40, 1)


In [194]:
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, classification_report

distances = [] # squared L2 distance between pairs
identical = [] # 1 if same identity, 0 otherwise
prediction = []

num = len(x_test_sample)
embedded = model.layers[3].predict(x_test_sample)

for i in range(num):
    for j in range(num):
        distances.append(distance(embedded[i], embedded[j]))
        identical.append(1 if y_test_sample[i] == y_test_sample[j] else 0)
        prediction.append(1 if distances[-1] < opt_tau else 0)
        
distances = np.array(distances)
identical = np.array(identical)
prediction = np.array(prediction)

print("accuracy =", accuracy_score(identical, prediction))

print("\nclassification_report")
print("========================")
print(classification_report(identical, prediction))

print("\nconfusion matrix")
print("========================")
print(confusion_matrix(identical, prediction))

accuracy = 0.18

classification_report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.18      0.31       100

    accuracy                           0.18       100
   macro avg       0.50      0.09      0.15       100
weighted avg       1.00      0.18      0.31       100


confusion matrix
[[ 0  0]
 [82 18]]


  _warn_prf(average, modifier, msg_start, len(result))


### Testing (sample) Evaluation

In [195]:
x_test_sample = []
y_test_sample = []

sample_dirs = ["1EH", "1D", "1A", "1B", "1G", "1EL"]

sample_label = "A"
sample_filepath = os.path.join("/content/drive/My Drive/Colab Notebooks/data", "sampleA.wav")
sample_vector = extract_features(sample_filepath).reshape((1, -1, 1))
sample_embedded = model.layers[3].predict(sample_vector)

for label in tqdm(sample_dirs):
    print(label, end=", ")
    labeldir= os.path.join(DATA_DIR, label)

    for filename in (os.listdir(labeldir)):
        anchor_filepath = os.path.join(DATA_DIR, label, filename)
        
        x_test_sample.append(extract_features(anchor_filepath).reshape((-1, 1)))
        y_test_sample.append(1 if label.startswith("1") and label.endswith(sample_label) else 0)

print()
x_test_sample = np.array(x_test_sample)
y_test_sample = np.array(y_test_sample)

print(x_test_sample.shape)



Error encountered while parsing file:  G Sample 9.wav


AttributeError: 'NoneType' object has no attribute 'reshape'

In [None]:
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, classification_report

distances = [] # squared L2 distance between pairs
prediction = []

num = len(x_test_sample)
embedded = model.layers[3].predict(x_test_sample)

for i in range(num):
    distances.append(distance(embedded[i], sample_embedded))
    prediction.append(1 if distances[-1] < opt_tau else 0)

identical = y_test_sample
distances = np.array(distances)
prediction = np.array(prediction)

print("accuracy =", accuracy_score(identical, prediction))

print("\nclassification_report")
print("========================")
print(classification_report(identical, prediction))

print("\nconfusion matrix")
print("========================")
print(confusion_matrix(identical, prediction))

In [None]:
prediction.shape, identical.shape

((118,), (118,))

In [39]:
# weights path
# weights_path = os.path.join("C:\\Users\\christian\\Documents\\christian\\work\\python\\guitar_music_note_recognizer\\music_note_recognizer\\static\\music_note_recognizer\\weights", f'{label}_weights.h5')
# weights_path = "saved_models/triplet_128_32.h5"

# load weights
# model.save_weights(weights_path)