### Model testing
Trying to identify how models work in different configurations

In [1]:
import copy
from sklearn.preprocessing import LabelEncoder
from src.myscripts import prepare_data
import pandas as pd
import contextlib
import sys
from sklearn.metrics import accuracy_score
from src.myscripts.model import Conv1DClassifier
import os
import torch
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import config
from src.myscripts.train import ModelTrainer
@contextlib.contextmanager
def suppress_stdout():
    with open(os.devnull, 'w') as fnull:
        old_stdout = sys.stdout
        try:
            sys.stdout = fnull
            yield
        finally:
            sys.stdout = old_stdout



def evaluate_avarage_model_accuracy(model, device, data, num_epochs:int, early_stopping_rounds:int, num_of_samples:int):
    acc_sum=0
    
    for i in range(num_of_samples):
        model_trainer = ModelTrainer(copy.deepcopy(model), device)
        x_train, y_train, x_val, y_val, x_test, y_test = prepare_data.prepare_data_for_model(data, -1, LabelEncoder, [0.7, 0.2, 0.1], random_state=np.random.randint(0,10000), save_to_npy=False)
        x_train = x_train.astype(np.float32)
        x_val = x_val.astype(np.float32)
        y_train = y_train.astype("long")
        y_val = y_val.astype("long")
        x_test = x_test.astype(np.float32)
        y_test = y_test.astype("long")
        x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
        y_train_tensor = torch.tensor(y_train, dtype=torch.long)
        x_val_tensor = torch.tensor(x_val, dtype=torch.float32)
        y_val_tensor = torch.tensor(y_val, dtype=torch.long)
        train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
        val_dataset = TensorDataset(x_val_tensor, y_val_tensor)
        x_test_tensor = torch.tensor(x_test).to(device)
        if len(y_test.shape) > 1 and y_test.shape[1] > 1:
            true_labels = np.argmax(y_test, axis=1)
        else:
            true_labels = y_test
    
        with suppress_stdout():
            model_trainer.train_model(train_dataset, val_dataset, epochs=num_epochs,early_stopping_rounds=early_stopping_rounds)
        sample_model=model_trainer.get_trained_model()
        sample_model.eval()
        with torch.no_grad():
            outputs = sample_model(x_test_tensor)
            _, predictions = torch.max(outputs, 1)
        
        predictions = predictions.cpu().numpy()
        accuracy = accuracy_score(true_labels, predictions)
        acc_sum+=accuracy
        print(f"\rSamples done: {i+1}/{num_of_samples}\033[K", end="")
    print("\n--------------")
    print(f"Avarage accuracy of model in {num_of_samples} samples is: {acc_sum/num_of_samples}")
    return acc_sum/num_of_samples
    

C:\Users\Bcom_\Documents\Projekty\Rozpoznawanie_dzwiekow_gitarowych\data\prepared_data


##### Testing different dataset combinations

In [2]:
from src.myscripts import func

n_fft = 2048 # Ile próbek bierze do okna na ktorym dokonuje transformaty
hop_lenght = 1024 # O ile próbek przesuwa okno po każdej transformacie (Od tego zalezy wielkosc dataframe'a)
sr = 22050 # Liczba próbek na sekunde (Od tego zalezy wielkosc dataframe'a)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
def test_feature_and_label_combination(model_class, device, early_stopping_rounds=3, num_epochs=15, num_of_samples=3):
    labels = ['sound', 'string', 'pluck', 'sound_type']
    features= ['mel', 'chroma', 'contrast']
    label_combinations = []
    feature_combinations = []
    for i in range(len(labels)):
        for j in range(i + 1, len(labels) + 1):
            label_combinations.append(labels[i:j])
    label_combinations.sort(key=len)
    for i in range(len(features)):
        for j in range(i + 1, len(features) + 1):
            feature_combinations.append(features[i:j])
    feature_combinations.sort(key=len)
    test_results_dataframe = pd.DataFrame(columns=['label_combination', 'feature_combination', 'avarage_model_accuracy'])
    for l in label_combinations:
        for f in feature_combinations:
            data = func.get_feature_combination_dataframe(f,l,n_fft,hop_lenght,sr,config.SOUNDS_DATA_DIR_PATH)
            x_train, _, _, _, _, _ = prepare_data.prepare_data_for_model(data, -1, LabelEncoder, [0.7, 0.2, 0.1], save_to_npy=False)
            input_shape = x_train.shape[1:]
            num_of_classes = data["label"].nunique()
            model = model_class(num_classes=num_of_classes, input_shape=input_shape)
            avg_val = evaluate_avarage_model_accuracy(model, device, data, early_stopping_rounds=early_stopping_rounds, num_epochs=num_epochs, num_of_samples=num_of_samples)
            test_results_dataframe.loc[len(test_results_dataframe)]= [l,f,avg_val]
            print(f"Finished {l}+{f}")
    return test_results_dataframe

In [5]:
result = test_feature_and_label_combination(Conv1DClassifier, device, num_of_samples=3)
result

Samples done: 3/3[K
--------------
Avarage accuracy of model in 3 samples is: 0.9931506849315067
Finished ['sound']+['mel']
Samples done: 3/3[K
--------------
Avarage accuracy of model in 3 samples is: 0.8447488584474886
Finished ['sound']+['chroma']
Samples done: 3/3[K
--------------
Avarage accuracy of model in 3 samples is: 0.1735159817351598
Finished ['sound']+['contrast']
Samples done: 3/3[K
--------------
Avarage accuracy of model in 3 samples is: 0.9863013698630136
Finished ['sound']+['mel', 'chroma']
Samples done: 3/3[K
--------------
Avarage accuracy of model in 3 samples is: 0.7191780821917808
Finished ['sound']+['chroma', 'contrast']
Samples done: 3/3[K
--------------
Avarage accuracy of model in 3 samples is: 0.9931506849315067
Finished ['sound']+['mel', 'chroma', 'contrast']
Samples done: 3/3[K
--------------
Avarage accuracy of model in 3 samples is: 0.9794520547945206
Finished ['string']+['mel']
Samples done: 3/3[K
--------------
Avarage accuracy of model in 3 sa

ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.