Imports

In [1]:
import sys
sys.path.append("../src") 

In [2]:
import pandas as pd
from data.wav_data_reader import WavDataReader
from data.signal_dataset import SignalDataset
from features.independent_wav_feature_extractor import IndependentWavFeatureExtractor
from features.features_dataset import FeaturesDataset
import data.preparation_eurythmy_data as ped
from models.fully_connected_classifier import FullyConnectedClassifier
from evaluation.hyperparameter_tuner import HyperparameterTuner

Reader

In [3]:
test_folder= r"..\data\interim\testing"

In [4]:
reader= WavDataReader(folder= test_folder) 

Signal Dataset

In [5]:
signal_dataset= SignalDataset(signals= reader.get_values(), labels= reader.get_keys(), sample_rate= reader.get_sample_rate())

In [6]:
signal_dataset.process_dataset_adding_eurythmy_labels()

In [7]:
signal_dataset.set_index_target(5)
targets= signal_dataset.get_targets()
normalized_targets= [1 if x > 0 else x for x in targets]

In [8]:
signals, labels= signal_dataset.get_data()

Feature Extractor

In [9]:
%%time
extractor= IndependentWavFeatureExtractor(sample_rate= 10000)
all_features, feature_labels= extractor.extract_features_multiple_waveforms(waveforms= signals, mfccs= True, temporal= True, statistical= True)

CPU times: total: 5min 50s
Wall time: 1min 59s


Features Dataset

In [10]:
feat_dataset= FeaturesDataset(features= all_features, targets=normalized_targets, feature_labels= feature_labels)
feat_dataset.process_features()
feat_dataset.shape()

Removed columns with NaNs: ['hurst', 'hurst_r2']
The Features were properly normalized using 'zscore' method.
Outliers have been treated based on the 1.5 * IQR criterion.
Reduced features from 38 to 6.
Preprocessing complete. Features have been cleaned, normalized, outliers treated, and reduced.


  svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / df


(1348, 6)

In [18]:
num_feat= len(feat_dataset.get_labels())
feat_dataset.head()

Unnamed: 0,mfcc_1_avg,zero_crossing_rate,root_mean_square_energy,mean,skewness,kurtosis
0,-0.966794,-0.498299,0.030602,0.824374,-1.468362,0.110034
1,0.052383,-0.498299,-0.104927,0.72283,-0.361677,-0.092082
2,-0.428445,-0.498299,0.153809,0.904056,1.64112,0.110034
3,0.653225,-0.498299,0.512618,1.15212,-0.207797,-0.101856
4,-1.04836,-0.498299,-0.003933,0.799949,1.162457,0.012307


In [12]:
train_loader, val_loader, test_loader = feat_dataset.split_dataset_in_loaders(test_size=0.3, val_size=0.5, random_state=42)

## Search

In [20]:
input_size= num_feat
output_size= 2
num_epochs = 5

param_grid = {
    'learning_rate': [0.1, 0.01],
    'dense_units': [64, 128],
    'dense_layers': [1, 2],
    'dropout_rate': [0.25, 0.5]
}

tuner = HyperparameterTuner(FullyConnectedClassifier, param_grid, train_loader, val_loader, num_epochs, input_size, output_size)
best_params, all_results = tuner.tune()

Epoch 1/5, Training Loss: 0.4968, Training Accuracy: 0.7561, Validation Loss: 0.4181
Epoch 2/5, Training Loss: 0.3919, Training Accuracy: 0.8070, Validation Loss: 0.4245
Epoch 3/5, Training Loss: 0.3662, Training Accuracy: 0.8250, Validation Loss: 0.6467
Epoch 4/5, Training Loss: 0.4330, Training Accuracy: 0.8176, Validation Loss: 0.3990
Epoch 5/5, Training Loss: 0.3651, Training Accuracy: 0.8356, Validation Loss: 0.3861
Epoch 1/5, Training Loss: 0.5348, Training Accuracy: 0.7190, Validation Loss: 0.3824
Epoch 2/5, Training Loss: 0.4174, Training Accuracy: 0.8028, Validation Loss: 0.4389
Epoch 3/5, Training Loss: 0.4103, Training Accuracy: 0.8049, Validation Loss: 0.5529
Epoch 4/5, Training Loss: 0.4137, Training Accuracy: 0.8038, Validation Loss: 0.3675
Epoch 5/5, Training Loss: 0.4219, Training Accuracy: 0.7996, Validation Loss: 0.4612
Epoch 1/5, Training Loss: 0.6602, Training Accuracy: 0.6299, Validation Loss: 0.4180
Epoch 2/5, Training Loss: 0.5101, Training Accuracy: 0.7253, Vali

In [None]:
print("Best Hyperparameters: \n", best_params)

In [None]:
all_results.head(15)

In [None]:
klk

In [None]:
print(best_hyperparameters)

## Model

In [None]:
num_epochs = 5  # Number of training epochs

In [None]:
model = FullyConnectedClassifier(
    input_size=5,  # Example input size
    hidden_layers=[128, 64],  # Two hidden layers with 128 and 64 units respectively
    output_size=2,  # Example output size
    dropout_rate=0.5,
    learning_rate=0.001
)

In [None]:
model.train_model(train_loader, val_loader, num_epochs)

In [None]:
test_predictions = model.predict(test_loader)

In [None]:
actual_labels = []
for _, labels in test_loader:
    actual_labels.extend(labels.tolist())

In [None]:
correct_predictions = sum(p == t for p, t in zip(test_predictions, actual_labels))
accuracy = correct_predictions / len(actual_labels)
print(f"Accuracy: {accuracy:.4f}")


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Assuming your task is a classification task
precision = precision_score(actual_labels, test_predictions, average='macro')
recall = recall_score(actual_labels, test_predictions, average='macro')
f1 = f1_score(actual_labels, test_predictions, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(actual_labels, test_predictions)
print("Confusion Matrix:\n", conf_matrix)
