In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

import librosa
from tqdm import tqdm
from IPython.display import Audio
from pandas.api.types import is_numeric_dtype

import sys
sys.path.append('../')
import default_style

TS_DATASET_FOLDER = os.path.join("..", "dataset")
TS_PREPROC_FOLDER = os.path.join(TS_DATASET_FOLDER, "preprocessed_traces")
DF_PREPROC_FILE = os.path.join(TS_PREPROC_FOLDER, "preproc_ts.df")

INTERESTING_TRACES = ["clean_trace"]


df = pd.read_csv(DF_PREPROC_FILE)
traces = dict()
for t in INTERESTING_TRACES:
    traces[t] = np.load(os.path.join(TS_PREPROC_FOLDER, f"{t}.npy"), allow_pickle=True)

# traces["syll_labels"] = np.repeat(np.arange(7), len(traces["syllables_fourier"])//7)
# print(traces["syll_labels"].shape)

SAMPLING_RATE = 48_000/8

%load_ext autoreload
%autoreload 2

In [2]:
from djanloo_fourier import FixedResolutionSTFTransformer

fixedtr = FixedResolutionSTFTransformer(n_spectral_points=289, pad_spectra=False)
STFTs = fixedtr.transform(traces["clean_trace"])

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2452/2452 [00:08<00:00, 277.09it/s]


In [3]:
from keras.layers import Conv1D, Dense, Flatten, AveragePooling1D
from keras.models import Sequential

def build_model(input_shape, n_categories,
                n_conv_blocks=2, n_dense_blocks=2, 
                filters=16, kernel_size=5, pool_size=2, 
                dense_nodes=256):

    model = Sequential()
    # Conv1d + temporal average pooling
    for block_id in range(n_conv_blocks):
        conv = Conv1D(filters, kernel_size, 
                      input_shape=input_shape,
                      activation="relu", name=f"conv_{block_id}")
        
        pool = AveragePooling1D(name=f"avg_pooling_{block_id}")
        
        model.add(conv)
        model.add(pool)
        
    # Flattens stuff :3
    model.add(Flatten())
    
    # A good dose of Dense layers never hurted anybody
    for block_id in range(n_dense_blocks):
        model.add(Dense(dense_nodes, activation="relu", name=f"dense_{block_id}"))
    
    # Let the garbage out
    model.add(Dense(n_categories, activation="softmax", name="output"))
    
    model.compile(optimizer="adam", 
                  loss="sparse_categorical_crossentropy", 
                  metrics=['accuracy'])
    return model

2023-06-29 17:01:26.104690: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-29 17:01:26.170726: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-29 17:01:26.171769: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

enc = LabelEncoder()
subset = np.ones(len(df)).astype(bool)#(df.vocal_channel == "song")
labels = np.array([f"{s}{e}{vc}" for s,e,vc in zip(df.sex, df.emotion, df.vocal_channel)])

y = enc.fit_transform(labels[subset])
X = pad_sequences(STFTs)[subset]
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)

print(np.unique(y),"tot", len(np.unique(y)), "samples are", len(y))
print(np.unique(ytrain),"tot", len(np.unique(ytrain)), "samples are", len(ytrain))
print(np.unique(ytest),"tot", len(np.unique(ytest)), "samples are", len(ytest))


test_params = {'n_conv_blocks': 3, 'filters': 5, 'kernel_size': 5, 'pool_size': 5, 'n_dense_blocks': 1, 'dense_nodes': 41}
test_model = build_model(Xtrain.shape[1:], len(np.unique(y)), **test_params)
test_model.summary()
hist = test_model.fit(Xtrain, ytrain, epochs=20, validation_split=0.2)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27] tot 28 samples are 2452
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27] tot 28 samples are 1961
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27] tot 28 samples are 491
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_0 (Conv1D)             (None, 102, 5)            7230      
                                                                 
 avg_pooling_0 (AveragePooli  (None, 51, 5)            0         
 ng1D)                                                           
                                                                 
 conv_1 (Conv1D)             (None, 47, 5)             130       
                                                                 
 avg_pooling_1 (AveragePooli  (None, 23, 5)           

## Tuning

In [5]:
# In the tuning the spectral resolution is optimized too
from djanloo_fourier import FixedResolutionSTFTransformer
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score

# Gets labels
enc = LabelEncoder()
labels = np.array([f"{s}{e}{vc}" for s,e,vc in zip(df.sex, df.emotion, df.vocal_channel)])
y = enc.fit_transform(labels)


def objective(trial, train_idxs, val_idxs):
    global traces, df, y
    
    epochs=trial.suggest_int('epochs',15,40)
    
    ## Really gets the dataset of STFTs
    fixedtr = FixedResolutionSTFTransformer(n_spectral_points=trial.suggest_int('n_spectral_points', 50, 300),
                                            pad_spectra=False,
                                           verbose=False)
    
    # Gets STFTs
    STFTs = fixedtr.transform(traces["clean_trace"])
    X = pad_sequences(STFTs)
    
    ## Concerning the model ...
    
    pars = dict(## Conv stuff
                n_conv_blocks=trial.suggest_int('n_conv_blocks', 1, 4), 
                filters=trial.suggest_int('filters', 2, 16), 
                kernel_size=trial.suggest_int('kernel_size', 3, 9), 
                pool_size=trial.suggest_int('pool_size', 2, 8),
                
                ## Dense stuff
                n_dense_blocks=trial.suggest_int('n_dense_blocks', 1, 4), 
                dense_nodes=trial.suggest_int('dense_nodes', 16, 256)
               )
    print(f"running model having {pars} with SpRes = {fixedtr.n_spectral_points} for epochs = {epochs}")
    
    model = build_model(X.shape[1:], len(np.unique(y)), **pars)
    model.fit(
                    X[train_idxs], y[train_idxs], 
                    epochs=epochs,
                    batch_size=trial.suggest_int('batch_size', 16, 32),
                    verbose=False
                    )
    
    ## Estimate accuracy
    y_pred = np.argmax(model.predict(X[val_idxs], verbose=False), axis=1)
    acc = accuracy_score(y[val_idxs], y_pred)
    
    return acc

In [6]:
## Splits the dataset
idxs = np.arange(len(df))
np.random.shuffle(idxs)
train_idxs, val_idxs, test_idxs = np.split(idxs, (len(df)*np.array([0.7, 0.8])).astype(int))

In [None]:
import optuna
study = optuna.create_study(direction='maximize')
trial_function = lambda trial: objective(trial, train_idxs, val_idxs)
study.optimize(trial_function, n_trials=150, n_jobs=-1, catch=ValueError)

[I 2023-06-29 17:28:59,589] A new study created in memory with name: no-name-207e32f6-26c8-4492-810e-deb43c833a30


running model having {'n_conv_blocks': 3, 'filters': 15, 'kernel_size': 9, 'pool_size': 2, 'n_dense_blocks': 2, 'dense_nodes': 155} with SpRes = 85 for epochs = 38


2023-06-29 17:29:11.751476: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 207704640 exceeds 10% of free system memory.


running model having {'n_conv_blocks': 2, 'filters': 16, 'kernel_size': 4, 'pool_size': 2, 'n_dense_blocks': 4, 'dense_nodes': 44} with SpRes = 194 for epochs = 25


2023-06-29 17:29:16.518437: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 209063712 exceeds 10% of free system memory.


running model having {'n_conv_blocks': 4, 'filters': 5, 'kernel_size': 3, 'pool_size': 5, 'n_dense_blocks': 1, 'dense_nodes': 230} with SpRes = 192 for epochs = 31


## Results

In [None]:
best_params = study.best_params
print(best_params)
# cnn = build_model((54,54), 2, **best_params)
# cnn.summary()
# cnn.fit( np.concatenate((X_train, X_val)), np.concatenate((y_train, y_val)), 
#             epochs=best_params["epochs"], 
#             batch_size=best_params["batch_size"])

In [9]:
# from sklearn.metrics import accuracy_score

# ypred = np.argmax(test_model.predict(Xtest), axis=1)

# print(accuracy_score(ytest, ypred))

In [10]:
# from sklearn.metrics import confusion_matrix

# plt.matshow(confusion_matrix(ytest, ypred))
# plt.xticks(np.unique(ytest), labels = enc.inverse_transform(np.unique(ytest)), rotation=90);
# plt.yticks(np.unique(ytest), labels = enc.inverse_transform(np.unique(ytest)), rotation=0);

# plt.title(f"accuracy = {accuracy_score(ytest, ypred)*100:.2f}")
# plt.grid(ls="")