# Best Model Neural Network

In [1]:
import sys
sys.path.append("../src")

In [2]:
from features.features_dataset import FeaturesDataset
from models.fully_connected_classifier import FullyConnectedClassifier
from collections import Counter
from torch.utils.data import DataLoader

## Import Dataset

In [3]:
csv_path= r"..\data\processed\features_dataset.csv"
feat_dataset = FeaturesDataset.load_from_csv(csv_path, label_columns= ['id_measurement', 'id_performance', 'datetime', 'plant', 'generation', 'num_eurythmy', 'initial_second', 'eurythmy_letter'],
                                        variable_columns= ['mfcc_1_avg', 'mfcc_2_avg', 'mfcc_3_avg', 'mfcc_4_avg', 'mfcc_5_avg', 'mfcc_6_avg', 'mfcc_7_avg', 'mfcc_8_avg', 'mfcc_9_avg', 'mfcc_10_avg', 'mfcc_11_avg', 'mfcc_12_avg', 'mfcc_13_avg', 'mfcc_1_std', 'mfcc_2_std', 'mfcc_3_std', 'mfcc_4_std', 'mfcc_5_std', 'mfcc_6_std', 'mfcc_7_std', 'mfcc_8_std', 'mfcc_9_std', 'mfcc_10_std', 'mfcc_11_std', 'mfcc_12_std', 'mfcc_13_std', 'zero_crossing_rate', 'root_mean_square_energy', 'slope_sign_changes_ratio', 'hjorth_mobility', 'hjorth_complexity', 'mean', 'variance', 'standard_deviation', 'interquartile_range', 'skewness', 'kurtosis', 'dfa'],
                                        target_column=None)

In [4]:
feat_dataset.features.shape

(148682, 52)

In [5]:
feat_dataset_path= r"..\data\processed\features_dataset"
#feat_dataset = FeaturesDataset.load(file_path= feat_dataset_path)
feat_dataset.features.shape

(148682, 52)

## Prepare Data

In [6]:
feat_dataset.prepare_dataset(drop_constant= True, drop_flatness= True)

## RQ1

Is there any difference in the signals when someone is performing eurythmy?

In [7]:
rq1= feat_dataset.return_subset_given_research_question(1)

In [8]:
train_feat_dataset, _, test_feat_dataset= rq1.split_dataset(split_by_wav= False,test_size= 0.2, val_size= 0, random_state= True)

### Data preparation

In [9]:
# Reduce the features that are correlated in the training data
train_cols= train_feat_dataset.reduce_features(corr_threshold=0.8)
test_feat_dataset.keep_only_specified_variable_columns(train_cols)

# Normalize features ?????
normalization_params= train_feat_dataset.normalize_features()
test_feat_dataset.apply_normalization(normalization_params)

Reduced variable features from 38 to 14.
Variable features were properly normalized using 'zscore' method.
Applied z-score normalization.


In [10]:
train_feat_dataset.objective_features.head()

Unnamed: 0,mfcc_1_avg,mfcc_2_avg,mfcc_1_std,mfcc_2_std,zero_crossing_rate,root_mean_square_energy,slope_sign_changes_ratio,hjorth_mobility,hjorth_complexity,mean,variance,skewness,kurtosis,dfa
0,0.490502,0.521933,-1.153778,-0.729785,0.07279,-0.748774,-0.322884,0.6277,-0.551484,0.188651,-0.161166,-0.159396,-0.059971,-0.56062
1,0.519042,0.651012,0.089378,-0.716642,-0.092952,0.23542,-0.306998,-0.207315,-0.234163,0.994964,-0.169605,-0.079065,-0.039311,-0.076734
2,-0.285698,0.094385,0.798018,0.803377,0.07279,-0.877135,-0.370543,1.318135,-0.70399,-0.007876,-0.16367,-0.472937,-0.046654,-0.554425
3,0.252752,0.078736,0.269404,-0.289341,-0.092952,-0.038785,0.268876,0.379892,-0.603772,0.743943,-0.128771,0.793485,-0.028507,1.211738
4,0.551977,0.517722,-0.762758,-0.703435,-0.010081,-0.398437,-0.171966,0.125735,-0.420576,-0.464539,-0.146951,1.014841,-0.002596,0.03015


In [13]:
test_feat_dataset.objective_features.head()

Unnamed: 0,mfcc_1_avg,mfcc_2_avg,mfcc_1_std,mfcc_2_std,zero_crossing_rate,root_mean_square_energy,slope_sign_changes_ratio,hjorth_mobility,hjorth_complexity,mean,variance,skewness,kurtosis,dfa
0,-1.821271,-0.160021,1.272391,-0.045063,-0.092952,-0.56691,0.030583,-0.354435,-0.111343,0.381587,-0.176078,-1.56364,0.070001,1.244266
1,-0.589727,0.610047,-0.806714,-0.732582,-0.092952,-0.715897,-0.299055,-0.297326,-0.007276,-0.251565,-0.172534,-0.677664,-0.018627,-0.044473
2,-1.006415,0.675447,-0.725802,-0.762587,-0.092952,-0.82533,-0.3626,-0.365141,0.110774,-0.179422,-0.17721,-0.58296,-0.031017,-1.029386
3,-0.294411,0.002671,-1.812635,-0.256293,0.114226,-0.716813,0.161644,-0.201959,0.093364,-0.08404,-0.133621,0.805695,-0.008,1.416964
4,-0.11109,0.654667,0.137952,-0.762173,-0.092952,-0.22481,-0.283169,-0.162103,-0.286673,0.643638,-0.174237,0.859114,-0.015812,-0.808486


In [14]:
train_feat_dataset.print_target_distribution()
test_feat_dataset.print_target_distribution()

Counts and Percentages:
Class 0: Count = 34956, Percentage = 53.78%
Class 1: Count = 30047, Percentage = 46.22%
Counts and Percentages:
Class 0: Count = 8535, Percentage = 52.52%
Class 1: Count = 7716, Percentage = 47.48%


In [15]:
train_loader= DataLoader(train_feat_dataset, batch_size=32, shuffle=True)
test_loader= DataLoader(test_feat_dataset, batch_size=32, shuffle=True)

### Training

In [16]:
num_epochs = 50

In [17]:
input_size= len(train_feat_dataset.variable_columns)
output_size= 2
num_epochs = 50

parameters = {
    'learning_rate': 0.001,
    'dense_units': 128,
    'dense_layers': 1,
    'dropout_rate': 0.2,
    'early_stopping_patience': 7
}

model = FullyConnectedClassifier(input_size= input_size, output_size= output_size, parameters= parameters)

In [18]:
model.train_model(train_loader, test_loader, num_epochs)

Epoch 1/50, Training Loss: 0.6142, Training Accuracy: 0.6646, Validation Loss: 0.6064
Epoch 2/50, Training Loss: 0.6062, Training Accuracy: 0.6686, Validation Loss: 0.5996
Epoch 3/50, Training Loss: 0.6027, Training Accuracy: 0.6738, Validation Loss: 0.5986
Epoch 4/50, Training Loss: 0.6015, Training Accuracy: 0.6740, Validation Loss: 0.6006
Epoch 5/50, Training Loss: 0.6000, Training Accuracy: 0.6768, Validation Loss: 0.6007
Epoch 6/50, Training Loss: 0.5998, Training Accuracy: 0.6749, Validation Loss: 0.5956
Epoch 7/50, Training Loss: 0.5981, Training Accuracy: 0.6760, Validation Loss: 0.5932
Epoch 8/50, Training Loss: 0.5973, Training Accuracy: 0.6759, Validation Loss: 0.5937
Epoch 9/50, Training Loss: 0.5965, Training Accuracy: 0.6787, Validation Loss: 0.5924
Epoch 10/50, Training Loss: 0.5948, Training Accuracy: 0.6798, Validation Loss: 0.5934
Epoch 11/50, Training Loss: 0.5944, Training Accuracy: 0.6810, Validation Loss: 0.5909
Epoch 12/50, Training Loss: 0.5933, Training Accurac

### Evaluation

In [19]:
test_predictions = model.predict(test_loader)

In [20]:
actual_labels = []
for _, labels in test_loader:
    actual_labels.extend(labels.tolist())

In [21]:
correct_predictions = sum(p == t for p, t in zip(test_predictions, actual_labels))
accuracy = correct_predictions / len(actual_labels)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.4979


In [22]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Assuming your task is a classification task
precision = precision_score(actual_labels, test_predictions, average='macro')
recall = recall_score(actual_labels, test_predictions, average='macro')
f1 = f1_score(actual_labels, test_predictions, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(actual_labels, test_predictions)
print("Confusion Matrix:\n", conf_matrix)

Precision: 0.4989
Recall: 0.4989
F1-Score: 0.4979
Confusion Matrix:
 [[4085 4450]
 [3710 4006]]


In [23]:
klk

NameError: name 'klk' is not defined

## RQ2

In [None]:
### Data preparation

# Reduce the Dataset for the datapoints concerning rq2

rq2_train_feat_dataset= train_feat_dataset.copy()
processed_train_indexes, train_targets= ped.get_indexes_and_targets_by_rq(2, rq2_train_feat_dataset.features)
rq2_train_feat_dataset.features= rq2_train_feat_dataset.features.iloc[processed_train_indexes]
rq2_train_feat_dataset.features.reset_index(drop=True, inplace=True)

rq2_test_feat_dataset= test_feat_dataset.copy()
processed_test_indexes, test_targets= ped.get_indexes_and_targets_by_rq(2, rq2_test_feat_dataset.features)
rq2_test_feat_dataset.features= rq2_test_feat_dataset.features.iloc[processed_test_indexes]
rq2_test_feat_dataset.features.reset_index(drop=True, inplace=True)

# Reduce the features that are correlated in the training data
train_cols= rq2_train_feat_dataset.reduce_features(targets= train_targets, corr_threshold=0.75)
rq2_test_feat_dataset.keep_only_specified_variable_columns(train_cols)

# Normalize features
normalization_params= rq2_train_feat_dataset.normalize_features()
rq2_test_feat_dataset.apply_normalization(normalization_params)

rq2_train_feat_dataset.objective_features.head()

rq2_test_feat_dataset.objective_features.head()

train_loader= rq2_train_feat_dataset.get_variable_features_loader(train_targets)
test_loader= rq2_test_feat_dataset.get_variable_features_loader(test_targets)

# Train dataset
print_counts_and_percentages(train_targets)

# Test dataset
print_counts_and_percentages(test_targets)

### Training

num_epochs = 50

input_size= len(rq2_train_feat_dataset.variable_columns)
output_size= 3
num_epochs = 50

parameters = {
    'learning_rate': 0.0003,
    'dense_units': 128,
    'dense_layers': 2,
    'dropout_rate': 0.2
}

model = FullyConnectedClassifier(input_size= input_size, output_size= output_size, parameters= parameters)

model.train_model(train_loader, test_loader, num_epochs)

### Evaluation

test_predictions = model.predict(test_loader)

actual_labels = []
for _, labels in test_loader:
    actual_labels.extend(labels.tolist())

correct_predictions = sum(p == t for p, t in zip(test_predictions, actual_labels))
accuracy = correct_predictions / len(actual_labels)
print(f"Accuracy: {accuracy:.4f}")


from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Assuming your task is a classification task
precision = precision_score(actual_labels, test_predictions, average='macro')
recall = recall_score(actual_labels, test_predictions, average='macro')
f1 = f1_score(actual_labels, test_predictions, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(actual_labels, test_predictions)
print("Confusion Matrix:\n", conf_matrix)

## RQ3

In [None]:
### Data preparation

# Reduce the Dataset for the datapoints concerning rq3

rq3_train_feat_dataset= train_feat_dataset.copy()
processed_train_indexes, train_targets= ped.get_indexes_and_targets_by_rq(3, rq3_train_feat_dataset.features)
rq3_train_feat_dataset.features= rq3_train_feat_dataset.features.iloc[processed_train_indexes]
rq3_train_feat_dataset.features.reset_index(drop=True, inplace=True)

rq3_test_feat_dataset= test_feat_dataset.copy()
processed_test_indexes, test_targets= ped.get_indexes_and_targets_by_rq(3, rq3_test_feat_dataset.features)
rq3_test_feat_dataset.features= rq3_test_feat_dataset.features.iloc[processed_test_indexes]
rq3_test_feat_dataset.features.reset_index(drop=True, inplace=True)

# Reduce the features that are correlated in the training data
train_cols= rq3_train_feat_dataset.reduce_features(targets= train_targets, corr_threshold=0.75)
rq3_test_feat_dataset.keep_only_specified_variable_columns(train_cols)

# Normalize features
normalization_params= rq3_train_feat_dataset.normalize_features()
rq3_test_feat_dataset.apply_normalization(normalization_params)

rq3_train_feat_dataset.objective_features.head()

rq3_test_feat_dataset.objective_features.head()

train_loader= rq3_train_feat_dataset.get_variable_features_loader(train_targets)
test_loader= rq3_test_feat_dataset.get_variable_features_loader(test_targets)

# Train dataset
print_counts_and_percentages(train_targets)

# Test dataset
print_counts_and_percentages(test_targets)

### Training

num_epochs = 50

input_size= len(rq3_train_feat_dataset.variable_columns)
output_size= 4
num_epochs = 50

parameters = {
    'learning_rate': 0.0003,
    'dense_units': 64,
    'dense_layers': 1,
    'dropout_rate': 0
}

model = FullyConnectedClassifier(input_size= input_size, output_size= output_size, parameters= parameters)

model.train_model(train_loader, test_loader, num_epochs)

### Evaluation

test_predictions = model.predict(test_loader)

actual_labels = []
for _, labels in test_loader:
    actual_labels.extend(labels.tolist())

correct_predictions = sum(p == t for p, t in zip(test_predictions, actual_labels))
accuracy = correct_predictions / len(actual_labels)
print(f"Accuracy: {accuracy:.4f}")


from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Assuming your task is a classification task
precision = precision_score(actual_labels, test_predictions, average='macro')
recall = recall_score(actual_labels, test_predictions, average='macro')
f1 = f1_score(actual_labels, test_predictions, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(actual_labels, test_predictions)
print("Confusion Matrix:\n", conf_matrix)

## RQ4

In [None]:
### Data preparation

# Reduce the Dataset for the datapoints concerning rq4

rq4_train_feat_dataset= train_feat_dataset.copy()
processed_train_indexes, train_targets= ped.get_indexes_and_targets_by_rq(4, rq4_train_feat_dataset.features)
rq4_train_feat_dataset.features= rq4_train_feat_dataset.features.iloc[processed_train_indexes]
rq4_train_feat_dataset.features.reset_index(drop=True, inplace=True)

rq4_test_feat_dataset= test_feat_dataset.copy()
processed_test_indexes, test_targets= ped.get_indexes_and_targets_by_rq(4, rq4_test_feat_dataset.features)
rq4_test_feat_dataset.features= rq4_test_feat_dataset.features.iloc[processed_test_indexes]
rq4_test_feat_dataset.features.reset_index(drop=True, inplace=True)

# Reduce the features that are correlated in the training data
train_cols= rq4_train_feat_dataset.reduce_features(targets= train_targets, corr_threshold=0.75)
rq4_test_feat_dataset.keep_only_specified_variable_columns(train_cols)

# Normalize features
normalization_params= rq4_train_feat_dataset.normalize_features()
rq4_test_feat_dataset.apply_normalization(normalization_params)

rq4_train_feat_dataset.objective_features.head()

rq4_test_feat_dataset.objective_features.head()

train_loader= rq4_train_feat_dataset.get_variable_features_loader(train_targets)
test_loader= rq4_test_feat_dataset.get_variable_features_loader(test_targets)

# Train dataset
print_counts_and_percentages(train_targets)

# Test dataset
print_counts_and_percentages(test_targets)

### Training

num_epochs = 50

input_size= len(rq4_train_feat_dataset.variable_columns)
output_size= 4
num_epochs = 50

parameters = {
    'learning_rate': 0.0003,
    'dense_units': 64,
    'dense_layers': 2,
    'dropout_rate': 0.2
}

model = FullyConnectedClassifier(input_size= input_size, output_size= output_size, parameters= parameters)

model.train_model(train_loader, test_loader, num_epochs)

### Evaluation

test_predictions = model.predict(test_loader)

actual_labels = []
for _, labels in test_loader:
    actual_labels.extend(labels.tolist())

correct_predictions = sum(p == t for p, t in zip(test_predictions, actual_labels))
accuracy = correct_predictions / len(actual_labels)
print(f"Accuracy: {accuracy:.4f}")


from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Assuming your task is a classification task
precision = precision_score(actual_labels, test_predictions, average='macro')
recall = recall_score(actual_labels, test_predictions, average='macro')
f1 = f1_score(actual_labels, test_predictions, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(actual_labels, test_predictions)
print("Confusion Matrix:\n", conf_matrix)