# Audio Classification - Audio MNIST dataset

In [32]:
import shutil
from pathlib import Path
import pickle

from tqdm import tqdm
import wisardpkg as wp
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import scipy.io.wavfile as wav
from pyAudioAnalysis import audioSegmentation
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import ShortTermFeatures

from free_spoken_digit_dataset.utils.fsdd import FSDD
from free_spoken_digit_dataset.utils.spectogramer import dir_to_spectrogram


def load_audio_files(audio_dir):
    sample_rate_list = list()
    samples_list = list()
    names = list()
    files = [x for x in audio_dir.iterdir() if x.is_file()]
    for file in files:
        sample_rate, samples = wav.read(file)
        sample_rate_list.append(sample_rate)
        samples_list.append(samples)
        names.append(file.name)
        
    return sample_rate_list, samples_list, names


def pad_samples(samples, max_n_samples=None):
    if not max_n_samples:
        max_n_samples = np.max([len(samples_i) for samples_i in samples])
    
    padded_samples = [np.append(samples_i, [0]*(max_n_samples - len(samples_i))) 
                      if len(samples_i) < max_n_samples else samples_i[:max_n_samples]  
                      for samples_i in samples]
    return padded_samples


def plot_amplitude(samples, sample_rate, names, i):
    if isinstance(sample_rate, list):
        sample_rate_i = sample_rate[i]
    else:
        sample_rate_i = sample_rate
    
    secs = np.arange(0, len(samples[i])) / sample_rate_i
    
    fig, ax = plt.subplots(figsize=(10,5))
    _ = ax.plot(secs, samples[i])
    _ = ax.set_title(f'Sample {i} - {names[i]}')
    _ = ax.set_xlabel('Seconds')
    _ = ax.set_ylabel('Amplitude')
    plt.show()


def split_train_and_test(samples, labels, file_names):
    X_train = list()
    y_train = list()
    X_test = list()
    y_test = list()
    
    for i, file_name in enumerate(file_names):
        rec_number = int(file_name.split('_')[-1].split('.')[0])
        if rec_number in [0, 1, 2, 3, 4]:
            X_test.append(samples[i])
            y_test.append(labels[i])
        else:
            X_train.append(samples[i])
            y_train.append(labels[i])
    
    return X_train, y_train, X_test, y_test

## Loading and cleaning data

In [122]:
audio_dir = Path('free_spoken_digit_dataset/recordings/')
sample_rates, samples, names = load_audio_files(audio_dir)

Let's create the labels from the name of the recordings' files.

In [123]:
y = [int(n[:1]) for n in names]
np.unique(y)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

Now we need to separate the dataset in train and test set.

In [126]:
def split_train_and_test(samples, sample_rates, labels, file_names):
    X_train = list()
    train_sample_rates = list()
    y_train = list()
    X_test = list()
    test_sample_rates = list()
    y_test = list()
    
    for i, file_name in enumerate(file_names):
        rec_number = int(file_name.split('_')[-1].split('.')[0])
        if rec_number in [0, 1, 2, 3, 4]:
            X_test.append(samples[i])
            y_test.append(labels[i])
            test_sample_rates.append(sample_rates[i])
        else:
            X_train.append(samples[i])
            y_train.append(labels[i])
            train_sample_rates.append(sample_rates[i])
    
    return X_train, y_train, X_test, y_test, train_sample_rates, test_sample_rates

X_train, y_train, X_test, y_test, train_sample_rates, test_sample_rates = split_train_and_test(samples, sample_rates, y, names)

In [127]:
np.array(X_test).shape

  np.array(X_test).shape


(300,)

In [135]:
len(X_train)

2700

In [136]:
len(y_train)

2700

## Feature Extraction and hyperparameter optimization

In [152]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
from typing import List, Union, Optional
from time import time
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import ParameterGrid
from sklearn.utils.validation import check_is_fitted
from python_speech_features import mfcc
from tqdm import tqdm

np.seterr(divide='ignore')

class DatabaseZScorer(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        self._mean = None
        self._std = None
    
    def fit(self, x, y=None):
        x = np.concatenate(x, axis=0)
        self._mean = np.mean(x, axis=0)
        self._std = np.std(x, axis=0)
        return self
        
    def transform(self, x):
        x_transformed = list()
        for x_i in x:
            x_i = (x_i - self._mean) / self._std
            x_transformed.append(x_i)
        return x_transformed

    
def extract_features(audio_signals, sample_rates, n_filters_mfcc, window_size, 
                     window_step, append_sum=True, verbose=False):
    features = list()

    if verbose:
        iterator = tqdm(zip(audio_signals, sample_rates), desc='Extracting Features')
    else:
        iterator = zip(audio_signals, sample_rates)
    
    for audio_signal, sample_rate in iterator:
        mfccs = mfcc(audio_signal, samplerate=sample_rate, nfft=1024,
                     winlen=window_size, winstep=window_step, 
                     numcep=n_filters_mfcc, nfilt=n_filters_mfcc, 
                     winfunc=np.hamming)

        if append_sum:
            mfccs_processed = np.cumsum(mfccs, axis=0)
        else:
            mfccs_processed = mfccs

        features.append(mfccs_processed)
        
    return features


def tanh(x):
    return [np.tanh(sample) for sample in x]


def cross_validation_wisard(audio_recordings, y, sample_rates, n_splits=5, addressSize=[50], 
                            bleachingActivated=[True], n_filters_mfcc=[20], 
                            window_size=[0.05], window_step=[0.05], append_sum=[True], 
                            n_kernels=[10]):
    grid_search_results = []
    best_params = None
    best_accuracy = -1
    
    if not isinstance(y, np.ndarray):
        y = np.array(y)
    
    param_grid_dict = dict()
    param_grid_dict['addressSize'] = addressSize
    param_grid_dict['bleachingActivated'] = bleachingActivated
    param_grid_dict['n_filters_mfcc'] = n_filters_mfcc
    param_grid_dict['window_size'] = window_size
    param_grid_dict['window_step'] = window_step
    param_grid_dict['append_sum'] = append_sum
    param_grid_dict['n_kernels'] = n_kernels
    
    skf = StratifiedKFold(n_splits=n_splits)
    
    param_grid = ParameterGrid(param_grid_dict)
    
    param_grid_combinations = len(list(param_grid))
    n_combs = n_splits * param_grid_combinations
    progress = tqdm(total=n_combs, position=0, leave=True)
    
    for params in param_grid:
        print(params)
        preprocessing_time = []
        training_time = []
        training_prediction_time = []
        validation_prediction_time = []
        training_accuracy = []
        validation_accuracy = []
        for train_index, val_index in skf.split(np.zeros(len(audio_recordings)), y):
            X_train = [audio_recordings[i] for i in train_index]
            y_train = y[train_index]
            y_train_str = [str(l) for l in y_train]
            
            X_val = [audio_recordings[i] for i in val_index]
            y_val = y[val_index]
            y_val_str = [str(l) for l in y_val]
            
            # Preprocessing
            start = time()
            
            # 1) Extract MFCCs from audio recordings and use Appendsum (or not)
            X_train = extract_features(X_train, sample_rates, params['n_filters_mfcc'], params['window_size'], 
                                       params['window_step'], params['append_sum'], verbose=False)
            X_val = extract_features(X_val, sample_rates, params['n_filters_mfcc'], params['window_size'], 
                                     params['window_step'], params['append_sum'], verbose=False)
            
            # 2) Database Z-score
            z_scorer = DatabaseZScorer()
            X_train = z_scorer.fit_transform(X_train)
            X_val = z_scorer.transform(X_val)
            
            # 3) Hyperbolic tangent (scaling features to (-1, +1) interval
            X_train = tanh(X_train)
            X_val = tanh(X_val)
            
            # 4) Applying kernel canvas to each recordings' feature set
            dimension = X_train[0].shape[1]
            kernel_canvas = wp.KernelCanvas(dimension, params['n_kernels'], bitsBykernel = 16, 
                                            activationDegree = 0.075)
            
            X_train = [kernel_canvas.transform(rec_features) for rec_features in X_train]
            X_val = [kernel_canvas.transform(rec_features) for rec_features in X_val]
            
            end = time()
            preprocessing_time.append(end-start)
            
            # training
            model = wp.Wisard(params['addressSize'], bleachingActivated=params['bleachingActivated'],
                              ignoreZero=True)
            
            start = time()
            model.train(X_train, y_train_str)
            end = time()
            training_time.append(end-start)
            
            start = time()
            pred_train = model.classify(X_train)
            end = time()
            training_prediction_time.append(end-start)
            
            start = time()
            pred_val = model.classify(X_val)
            end = time()
            validation_prediction_time.append(end-start)
            
            training_accuracy.append(accuracy_score(y_train_str, pred_train))
            
            validation_accuracy.append(accuracy_score(y_val_str, pred_val))
            
            progress.update()
        
        cv_result = {'preprocessing_time_mean': np.mean(preprocessing_time),
                     'preprocessing_time_std': np.std(preprocessing_time),
                     'training_time_mean': np.mean(training_time),
                     'training_time_std': np.std(training_time),
                     'training_prediction_time_mean': np.mean(training_prediction_time),
                     'training_prediction_time_std': np.std(training_prediction_time),
                     'validation_prediction_time_mean': np.mean(validation_prediction_time),
                     'validation_prediction_time_std': np.std(validation_prediction_time),
                     'training_accuracy_mean': np.mean(training_accuracy),
                     'training_accuracy_std': np.std(training_accuracy),
                     'validation_accuracy_mean': np.mean(validation_accuracy),
                     'validation_accuracy_std': np.std(validation_accuracy)}
        
        cv_result = {**params, **cv_result}
        
        grid_search_results.append(cv_result)
        
        if (cv_result['validation_accuracy_mean'] > best_accuracy):
            best_accuracy = cv_result['validation_accuracy_mean']
            best_params = params
        
        print(f"Train acc. mean: {cv_result['training_accuracy_mean']:.4f} | Validation acc. mean: {cv_result['validation_accuracy_mean']:.4f}\n")
    
    grid_search_results = pd.DataFrame(grid_search_results)
    grid_search_results['rank'] = grid_search_results['validation_accuracy_mean'].rank(ascending=False)
    
    return grid_search_results, best_params

In [155]:
ws_gs_results, ws_best_params = cross_validation_wisard(X_train, y_train, sample_rates, n_splits=5, addressSize=[5, 10, 15], 
                                                        bleachingActivated=[True], n_filters_mfcc=[10, 12, 14], 
                                                        window_size=[0.1], window_step=[0.02], 
                                                        append_sum=[True], n_kernels=[2**10, 2**11, 2**12])

  0%|          | 0/135 [00:00<?, ?it/s]

{'addressSize': 5, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 10, 'n_kernels': 1024, 'window_size': 0.1, 'window_step': 0.02}


  4%|▎         | 5/135 [01:20<34:58, 16.14s/it]

Train acc. mean: 0.9302 | Validation acc. mean: 0.6511

{'addressSize': 5, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 10, 'n_kernels': 2048, 'window_size': 0.1, 'window_step': 0.02}


  7%|▋         | 10/135 [03:24<48:26, 23.25s/it]

Train acc. mean: 0.9576 | Validation acc. mean: 0.6674

{'addressSize': 5, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 10, 'n_kernels': 4096, 'window_size': 0.1, 'window_step': 0.02}


 11%|█         | 15/135 [07:04<1:21:17, 40.65s/it]

Train acc. mean: 0.9737 | Validation acc. mean: 0.6926

{'addressSize': 5, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 12, 'n_kernels': 1024, 'window_size': 0.1, 'window_step': 0.02}


 15%|█▍        | 20/135 [08:25<38:53, 20.29s/it]  

Train acc. mean: 0.9390 | Validation acc. mean: 0.6230

{'addressSize': 5, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 12, 'n_kernels': 2048, 'window_size': 0.1, 'window_step': 0.02}


 19%|█▊        | 25/135 [10:29<44:26, 24.24s/it]

Train acc. mean: 0.9725 | Validation acc. mean: 0.6707

{'addressSize': 5, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 12, 'n_kernels': 4096, 'window_size': 0.1, 'window_step': 0.02}


 22%|██▏       | 30/135 [14:10<1:11:29, 40.85s/it]

Train acc. mean: 0.9876 | Validation acc. mean: 0.7107

{'addressSize': 5, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 14, 'n_kernels': 1024, 'window_size': 0.1, 'window_step': 0.02}


 26%|██▌       | 35/135 [15:34<34:54, 20.94s/it]  

Train acc. mean: 0.9386 | Validation acc. mean: 0.5774

{'addressSize': 5, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 14, 'n_kernels': 2048, 'window_size': 0.1, 'window_step': 0.02}


 30%|██▉       | 40/135 [17:50<40:12, 25.39s/it]

Train acc. mean: 0.9775 | Validation acc. mean: 0.6459

{'addressSize': 5, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 14, 'n_kernels': 4096, 'window_size': 0.1, 'window_step': 0.02}


 33%|███▎      | 45/135 [21:30<1:01:24, 40.94s/it]

Train acc. mean: 0.9870 | Validation acc. mean: 0.6819

{'addressSize': 10, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 10, 'n_kernels': 1024, 'window_size': 0.1, 'window_step': 0.02}


 37%|███▋      | 50/135 [22:49<28:14, 19.93s/it]  

Train acc. mean: 0.9996 | Validation acc. mean: 0.7089

{'addressSize': 10, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 10, 'n_kernels': 2048, 'window_size': 0.1, 'window_step': 0.02}


 41%|████      | 55/135 [24:47<30:41, 23.02s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7363

{'addressSize': 10, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 10, 'n_kernels': 4096, 'window_size': 0.1, 'window_step': 0.02}


 44%|████▍     | 60/135 [28:15<47:18, 37.85s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7385

{'addressSize': 10, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 12, 'n_kernels': 1024, 'window_size': 0.1, 'window_step': 0.02}


 63%|██████▎   | 57/90 [1:25:11<49:19, 89.67s/it]
 73%|███████▎  | 44/60 [2:07:53<46:30, 174.40s/it]
  8%|▊         | 11/135 [42:43<8:01:32, 233.00s/it]
 48%|████▊     | 65/135 [29:33<22:10, 19.00s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7393

{'addressSize': 10, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 12, 'n_kernels': 2048, 'window_size': 0.1, 'window_step': 0.02}


 52%|█████▏    | 70/135 [31:28<24:06, 22.25s/it]

Train acc. mean: 0.9997 | Validation acc. mean: 0.7507

{'addressSize': 10, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 12, 'n_kernels': 4096, 'window_size': 0.1, 'window_step': 0.02}


 56%|█████▌    | 75/135 [34:44<36:28, 36.48s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7556

{'addressSize': 10, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 14, 'n_kernels': 1024, 'window_size': 0.1, 'window_step': 0.02}


 59%|█████▉    | 80/135 [36:04<18:11, 19.85s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7378

{'addressSize': 10, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 14, 'n_kernels': 2048, 'window_size': 0.1, 'window_step': 0.02}


 63%|██████▎   | 85/135 [38:00<18:35, 22.31s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7341

{'addressSize': 10, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 14, 'n_kernels': 4096, 'window_size': 0.1, 'window_step': 0.02}


 67%|██████▋   | 90/135 [41:23<27:50, 37.13s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7474

{'addressSize': 15, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 10, 'n_kernels': 1024, 'window_size': 0.1, 'window_step': 0.02}


 70%|███████   | 95/135 [42:35<12:03, 18.10s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7081

{'addressSize': 15, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 10, 'n_kernels': 2048, 'window_size': 0.1, 'window_step': 0.02}


 74%|███████▍  | 100/135 [44:24<12:22, 21.22s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7211

{'addressSize': 15, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 10, 'n_kernels': 4096, 'window_size': 0.1, 'window_step': 0.02}


 78%|███████▊  | 105/135 [47:42<18:11, 36.39s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7263

{'addressSize': 15, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 12, 'n_kernels': 1024, 'window_size': 0.1, 'window_step': 0.02}


 81%|████████▏ | 110/135 [48:55<07:35, 18.22s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7100

{'addressSize': 15, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 12, 'n_kernels': 2048, 'window_size': 0.1, 'window_step': 0.02}


 85%|████████▌ | 115/135 [50:46<07:09, 21.49s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7367

{'addressSize': 15, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 12, 'n_kernels': 4096, 'window_size': 0.1, 'window_step': 0.02}


 89%|████████▉ | 120/135 [54:00<08:59, 35.95s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7374

{'addressSize': 15, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 14, 'n_kernels': 1024, 'window_size': 0.1, 'window_step': 0.02}


 93%|█████████▎| 125/135 [55:14<03:03, 18.32s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.6907

{'addressSize': 15, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 14, 'n_kernels': 2048, 'window_size': 0.1, 'window_step': 0.02}


 96%|█████████▋| 130/135 [1:24:10<21:45, 261.11s/it]

Train acc. mean: 1.0000 | Validation acc. mean: 0.7230

{'addressSize': 15, 'append_sum': True, 'bleachingActivated': True, 'n_filters_mfcc': 14, 'n_kernels': 4096, 'window_size': 0.1, 'window_step': 0.02}


100%|██████████| 135/135 [1:27:37<00:00, 78.38s/it] 

Train acc. mean: 1.0000 | Validation acc. mean: 0.7378



In [159]:
ws_gs_results = ws_gs_results.sort_values('rank').reset_index(drop=True)

with open('results/wisard2_gs_results.pickle', 'wb') as file:
    pickle.dump(ws_gs_results, file)

ws_gs_results

Unnamed: 0,addressSize,append_sum,bleachingActivated,n_filters_mfcc,n_kernels,window_size,window_step,preprocessing_time_mean,preprocessing_time_std,training_time_mean,training_time_std,training_prediction_time_mean,training_prediction_time_std,validation_prediction_time_mean,validation_prediction_time_std,training_accuracy_mean,training_accuracy_std,validation_accuracy_mean,validation_accuracy_std,rank
0,10,True,True,12,4096,0.1,0.02,34.217883,0.19387,0.834785,0.024041,3.316096,0.083141,0.835686,0.065985,1.0,0.0,0.755556,0.134578,1.0
1,10,True,True,12,2048,0.1,0.02,20.519602,0.56007,0.386201,0.017447,1.54887,0.047177,0.397534,0.051123,0.999722,0.000227,0.750741,0.136398,2.0
2,10,True,True,14,4096,0.1,0.02,35.641275,1.769299,0.837768,0.038955,3.202037,0.050685,0.802793,0.022124,1.0,0.0,0.747407,0.156849,3.0
3,10,True,True,12,1024,0.1,0.02,14.404395,1.197035,0.162248,0.004765,0.73643,0.009323,0.179325,0.004245,1.0,0.0,0.739259,0.142626,4.0
4,10,True,True,10,4096,0.1,0.02,36.2831,3.108483,0.834777,0.103134,3.50192,0.1954,0.827572,0.030283,1.0,0.0,0.738519,0.118098,5.0
5,10,True,True,14,1024,0.1,0.02,14.68176,1.336084,0.171968,0.022872,0.794263,0.106235,0.195674,0.030458,1.0,0.0,0.737778,0.137719,6.0
6,15,True,True,14,4096,0.1,0.02,36.337497,1.894035,0.845935,0.068528,3.165967,0.196738,0.767153,0.05843,1.0,0.0,0.737778,0.143962,7.0
7,15,True,True,12,4096,0.1,0.02,34.108434,0.150173,0.792295,0.106649,3.051481,0.08524,0.736825,0.034037,1.0,0.0,0.737407,0.127191,8.0
8,15,True,True,12,2048,0.1,0.02,19.905715,0.193215,0.362232,0.028394,1.422981,0.025245,0.356856,0.022906,1.0,0.0,0.736667,0.126786,9.0
9,10,True,True,10,2048,0.1,0.02,21.201812,0.084619,0.39873,0.063502,1.606004,0.107351,0.390187,0.024133,1.0,0.0,0.736296,0.129097,10.0


In [160]:
with open('results/wisard2_gs_best_params.pickle', 'wb') as file:
    pickle.dump(ws_best_params, file)
    
ws_best_params

{'addressSize': 10,
 'append_sum': True,
 'bleachingActivated': True,
 'n_filters_mfcc': 12,
 'n_kernels': 4096,
 'window_size': 0.1,
 'window_step': 0.02}

In [161]:
with open('results/wisard2_gs_best_params.pickle', 'rb') as file:
    ws_best_params = pickle.load(file)

In [164]:
# Preprocessing
y_train_str = [str(l) for l in y_train]
y_test_str = [str(l) for l in y_test]

# 1) Extract MFCCs from audio recordings and use Appendsum (or not)
X_train_preprocessed = extract_features(X_train, sample_rates, ws_best_params['n_filters_mfcc'], ws_best_params['window_size'], 
                                        ws_best_params['window_step'], ws_best_params['append_sum'], verbose=False)
X_test_preprocessed = extract_features(X_test, sample_rates, ws_best_params['n_filters_mfcc'], ws_best_params['window_size'], 
                                       ws_best_params['window_step'], ws_best_params['append_sum'], verbose=False)

# 2) Database Z-score
z_scorer = DatabaseZScorer()
X_train_preprocessed = z_scorer.fit_transform(X_train_preprocessed)
X_test_preprocessed = z_scorer.transform(X_test_preprocessed)

# 3) Hyperbolic tangent (scaling features to (-1, +1) interval
X_train_preprocessed = tanh(X_train_preprocessed)
X_test_preprocessed = tanh(X_test_preprocessed)

# 4) Applying kernel canvas to each recordings' feature set
dimension = X_train_preprocessed[0].shape[1]
kernel_canvas = wp.KernelCanvas(dimension, ws_best_params['n_kernels'], 
                                bitsBykernel = 16, activationDegree = 0.075)

X_train_preprocessed = [kernel_canvas.transform(rec_features) for rec_features in X_train_preprocessed]
X_test_preprocessed = [kernel_canvas.transform(rec_features) for rec_features in X_test_preprocessed]

# training
model = wp.Wisard(ws_best_params['addressSize'], 
                  bleachingActivated=ws_best_params['bleachingActivated'],
                  ignoreZero=True)

model.train(X_train_preprocessed, y_train_str)

pred_train = model.classify(X_train_preprocessed)
pred_test = model.classify(X_test_preprocessed)

training_accuracy = accuracy_score(y_train_str, pred_train)
test_accuracy = accuracy_score(y_test_str, pred_test)

print(f'Training Accuracy: {training_accuracy:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')

Training Accuracy: 1.0000
Test Accuracy: 0.9233


## Results visualization

In [None]:
with open('results/wisard2_gs_results.pickle', 'rb') as file:
    ws_gs_results = pickle.load(file)

ws_gs_results

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
fig, ax = plt.subplots(figsize=(5,8))
ax = sns.boxplot(x="n_kernels", y="validation_accuracy_mean", data=ws_gs_results, ax=ax)
ax.set_ylabel('Validation Accuracy')
ax.set_title('5-folds CV results')