# Best Model Neural Network

In [1]:
import sys
sys.path.append("../src")

In [2]:
import data.preparation_eurythmy_data as ped
from features.features_dataset import FeaturesDataset
from models.fully_connected_classifier import FullyConnectedClassifier
from collections import Counter

## Import Dataset

In [3]:
feat_dataset_path= r"..\data\processed\features_dataset"
feat_dataset = FeaturesDataset.load(file_path= feat_dataset_path)

In [4]:
feat_dataset.features.shape

(148682, 52)

In [5]:
feat_dataset.features.head()

Unnamed: 0,id_measurement,id_performance,datetime,plant,generation,num_eurythmy,initial_second,eurythmy_letter,mfcc_1_avg,mfcc_2_avg,...,flatness_ratio_100,hjorth_mobility,hjorth_complexity,mean,variance,standard_deviation,interquartile_range,skewness,kurtosis,dfa
0,1,1,2023-04-29,salad,1,1,0.0,,-232.006348,87.030777,...,1.0,0.000128,8171.888932,1.013423,0.013814,0.117533,0.18195,0.224347,-0.509566,1.5868
1,1,1,2023-04-29,salad,1,1,1.0,,-250.255188,85.806961,...,0.9894,0.000129,8190.134755,0.865816,0.015855,0.125916,0.18195,0.418608,-0.598494,1.466508
2,1,1,2023-04-29,salad,1,1,2.0,,-278.646332,68.209419,...,0.9941,7.7e-05,13453.068166,1.28905,0.05486,0.234221,0.454875,-0.314866,-1.620937,1.336079
3,1,1,2023-04-29,salad,1,1,3.0,,-276.146942,74.985809,...,1.0,0.000112,9082.708501,1.374193,0.010778,0.103816,0.090975,-0.524587,0.561958,1.404778
4,1,1,2023-04-29,salad,1,1,4.0,,-299.724091,62.226551,...,1.0,8.5e-05,12209.774692,1.289022,0.029832,0.172718,0.272925,-0.852706,-0.735823,1.509514


## Prepare Data

In [6]:
# Drop constant signals
indexes_constant_value = feat_dataset.features[feat_dataset.features['flatness_ratio_100'] == 1].index.tolist()
feat_dataset.drop_rows(indexes_constant_value)

# Drop columns
columns=['duration_seconds', 'flatness_ratio_10000','flatness_ratio_5000', 'flatness_ratio_1000', 'flatness_ratio_500','flatness_ratio_100']
feat_dataset.drop_columns(columns_to_drop=columns)

In [7]:
feat_dataset.features.shape

(120478, 46)

In [8]:
# Get Train, Validation and Test Indexes
train_indexes, val_indexes, test_indexes= ped.get_train_val_test_indexes(df= feat_dataset.features)
train_val_indexes= train_indexes + val_indexes

# Split the training data
train_feat_dataset= feat_dataset.copy()
train_feat_dataset.features= feat_dataset.features.iloc[train_val_indexes]
train_feat_dataset.features.reset_index(drop=True, inplace=True)

# Split the validation data
test_feat_dataset= feat_dataset.copy()
test_feat_dataset.features= feat_dataset.features.iloc[test_indexes]
test_feat_dataset.features.reset_index(drop=True, inplace=True)

In [9]:
del feat_dataset

## RQ1

Is there any difference in the signals when someone is performing eurythmy?

### Data preparation

In [10]:
# Reduce the Dataset for the datapoints concerning RQ1

rq1_train_feat_dataset= train_feat_dataset.copy()
processed_train_indexes, train_targets= ped.get_indexes_and_targets_by_rq(1, rq1_train_feat_dataset.features)
rq1_train_feat_dataset.features= rq1_train_feat_dataset.features.iloc[processed_train_indexes]
rq1_train_feat_dataset.features.reset_index(drop=True, inplace=True)

rq1_test_feat_dataset= test_feat_dataset.copy()
processed_test_indexes, test_targets= ped.get_indexes_and_targets_by_rq(1, rq1_test_feat_dataset.features)
rq1_test_feat_dataset.features= rq1_test_feat_dataset.features.iloc[processed_test_indexes]
rq1_test_feat_dataset.features.reset_index(drop=True, inplace=True)

In [11]:
# Reduce the features that are correlated in the training data
train_cols= rq1_train_feat_dataset.reduce_features(targets= train_targets, corr_threshold=0.75)
rq1_test_feat_dataset.keep_only_specified_variable_columns(train_cols)

Reduced variable features from 38 to 12.


In [12]:
# Normalize features
normalization_params= rq1_train_feat_dataset.normalize_features()
rq1_test_feat_dataset.apply_normalization(normalization_params)

Variable features were properly normalized using 'zscore' method.
Applied z-score normalization.


In [13]:
rq1_train_feat_dataset.objective_features.head()

Unnamed: 0,mfcc_1_avg,mfcc_1_std,zero_crossing_rate,root_mean_square_energy,slope_sign_changes_ratio,hjorth_mobility,hjorth_complexity,mean,standard_deviation,skewness,kurtosis,dfa
0,0.188361,1.299811,-0.64364,1.578563,-0.426619,-0.312965,-0.261353,2.028067,-0.56707,0.230537,-0.053963,-1.572177
1,0.316321,1.064484,-0.64364,1.215891,-0.367199,-0.3243,-0.250261,1.749495,-0.560078,-0.576037,-0.033285,-0.711235
2,0.431995,0.992463,-0.64364,1.434828,-0.353487,-0.489026,-0.133136,1.917698,-0.565244,0.209828,-0.02047,-0.387445
3,0.27233,1.256343,-0.64364,1.604177,-0.376341,-0.409596,-0.195232,2.047841,-0.573485,-1.291495,0.039514,-0.50619
4,0.520903,0.953421,-0.64364,1.219014,-0.307779,-0.32182,-0.249028,1.741855,-0.2686,-0.206831,-0.059281,-0.387946


In [14]:
rq1_test_feat_dataset.objective_features.head()

Unnamed: 0,mfcc_1_avg,mfcc_1_std,zero_crossing_rate,root_mean_square_energy,slope_sign_changes_ratio,hjorth_mobility,hjorth_complexity,mean,standard_deviation,skewness,kurtosis,dfa
0,0.534499,0.584997,0.815566,-0.30709,-0.412907,-0.178979,-0.144306,0.071243,0.665108,0.55766,-0.067981,-0.353997
1,0.424618,-1.543969,0.815566,-0.781438,-0.412907,1.364723,-0.575708,-0.11688,-0.346188,0.969524,-0.020295,-1.117752
2,0.797651,-1.197263,-0.64364,-0.204711,-0.380911,-0.676213,0.545226,0.644185,-0.407335,-1.076927,0.066848,-1.016913
3,1.001029,-1.041649,-0.64364,-0.416091,-0.358058,0.596411,-0.486407,0.458802,-0.294886,-0.331715,-0.032258,-0.543545
4,0.523638,0.880075,-0.64364,-0.062416,-0.399194,-0.516748,0.126405,0.696169,0.043693,-0.409966,-0.051108,-0.365152


In [15]:
train_loader= rq1_train_feat_dataset.get_variable_features_loader(train_targets)
test_loader= rq1_test_feat_dataset.get_variable_features_loader(test_targets)

In [16]:
def print_counts_and_percentages(values):
    count = Counter(values)
    total = sum(count.values())
    
    print("Counts and Percentages:")
    for key, value in count.items():
        percentage = (value / total) * 100
        print(f"Class {key}: Count = {value}, Percentage = {percentage:.2f}%")

In [17]:
# Train dataset
print_counts_and_percentages(train_targets)

Counts and Percentages:
Class 1: Count = 30155, Percentage = 46.44%
Class 0: Count = 34785, Percentage = 53.56%


In [18]:
# Test dataset
print_counts_and_percentages(test_targets)

Counts and Percentages:
Class 1: Count = 7608, Percentage = 46.63%
Class 0: Count = 8706, Percentage = 53.37%


### Training

In [19]:
num_epochs = 50

In [20]:
input_size= len(rq1_train_feat_dataset.variable_columns)
output_size= 2
num_epochs = 50

parameters = {
    'learning_rate': 0.001,
    'dense_units': 128,
    'dense_layers': 1,
    'dropout_rate': 0.2
}

model = FullyConnectedClassifier(input_size= input_size, output_size= output_size, parameters= parameters)

In [21]:
model.train_model(train_loader, test_loader, num_epochs)

Epoch 1/50, Training Loss: 0.6196, Training Accuracy: 0.6620, Validation Loss: 0.5993
Epoch 2/50, Training Loss: 0.6102, Training Accuracy: 0.6695, Validation Loss: 0.5959
Epoch 3/50, Training Loss: 0.6069, Training Accuracy: 0.6719, Validation Loss: 0.5942
Epoch 4/50, Training Loss: 0.6046, Training Accuracy: 0.6739, Validation Loss: 0.5942
Epoch 5/50, Training Loss: 0.6038, Training Accuracy: 0.6761, Validation Loss: 0.5904
Epoch 6/50, Training Loss: 0.6024, Training Accuracy: 0.6766, Validation Loss: 0.5963
Epoch 7/50, Training Loss: 0.6011, Training Accuracy: 0.6779, Validation Loss: 0.5945
Epoch 8/50, Training Loss: 0.6003, Training Accuracy: 0.6794, Validation Loss: 0.5949
Epoch 9/50, Training Loss: 0.5994, Training Accuracy: 0.6800, Validation Loss: 0.5844
Epoch 10/50, Training Loss: 0.5990, Training Accuracy: 0.6803, Validation Loss: 0.5923
Epoch 11/50, Training Loss: 0.5977, Training Accuracy: 0.6800, Validation Loss: 0.5888
Epoch 12/50, Training Loss: 0.5978, Training Accurac

### Evaluation

In [22]:
test_predictions = model.predict(test_loader)

In [23]:
actual_labels = []
for _, labels in test_loader:
    actual_labels.extend(labels.tolist())

In [24]:
correct_predictions = sum(p == t for p, t in zip(test_predictions, actual_labels))
accuracy = correct_predictions / len(actual_labels)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.4969


In [25]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Assuming your task is a classification task
precision = precision_score(actual_labels, test_predictions, average='macro')
recall = recall_score(actual_labels, test_predictions, average='macro')
f1 = f1_score(actual_labels, test_predictions, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(actual_labels, test_predictions)
print("Confusion Matrix:\n", conf_matrix)

Precision: 0.4976
Recall: 0.4976
F1-Score: 0.4966
Confusion Matrix:
 [[4248 4458]
 [3749 3859]]


## RQ2

In [26]:
### Data preparation

# Reduce the Dataset for the datapoints concerning rq2

rq2_train_feat_dataset= train_feat_dataset.copy()
processed_train_indexes, train_targets= ped.get_indexes_and_targets_by_rq(2, rq2_train_feat_dataset.features)
rq2_train_feat_dataset.features= rq2_train_feat_dataset.features.iloc[processed_train_indexes]
rq2_train_feat_dataset.features.reset_index(drop=True, inplace=True)

rq2_test_feat_dataset= test_feat_dataset.copy()
processed_test_indexes, test_targets= ped.get_indexes_and_targets_by_rq(2, rq2_test_feat_dataset.features)
rq2_test_feat_dataset.features= rq2_test_feat_dataset.features.iloc[processed_test_indexes]
rq2_test_feat_dataset.features.reset_index(drop=True, inplace=True)

# Reduce the features that are correlated in the training data
train_cols= rq2_train_feat_dataset.reduce_features(targets= train_targets, corr_threshold=0.75)
rq2_test_feat_dataset.keep_only_specified_variable_columns(train_cols)

# Normalize features
normalization_params= rq2_train_feat_dataset.normalize_features()
rq2_test_feat_dataset.apply_normalization(normalization_params)

rq2_train_feat_dataset.objective_features.head()

rq2_test_feat_dataset.objective_features.head()

train_loader= rq2_train_feat_dataset.get_variable_features_loader(train_targets)
test_loader= rq2_test_feat_dataset.get_variable_features_loader(test_targets)

# Train dataset
print_counts_and_percentages(train_targets)

# Test dataset
print_counts_and_percentages(test_targets)

### Training

num_epochs = 50

input_size= len(rq2_train_feat_dataset.variable_columns)
output_size= 3
num_epochs = 50

parameters = {
    'learning_rate': 0.0003,
    'dense_units': 128,
    'dense_layers': 2,
    'dropout_rate': 0.2
}

model = FullyConnectedClassifier(input_size= input_size, output_size= output_size, parameters= parameters)

model.train_model(train_loader, test_loader, num_epochs)

### Evaluation

test_predictions = model.predict(test_loader)

actual_labels = []
for _, labels in test_loader:
    actual_labels.extend(labels.tolist())

correct_predictions = sum(p == t for p, t in zip(test_predictions, actual_labels))
accuracy = correct_predictions / len(actual_labels)
print(f"Accuracy: {accuracy:.4f}")


from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Assuming your task is a classification task
precision = precision_score(actual_labels, test_predictions, average='macro')
recall = recall_score(actual_labels, test_predictions, average='macro')
f1 = f1_score(actual_labels, test_predictions, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(actual_labels, test_predictions)
print("Confusion Matrix:\n", conf_matrix)

Reduced variable features from 38 to 17.
Variable features were properly normalized using 'zscore' method.
Applied z-score normalization.
Counts and Percentages:
Class 0: Count = 9374, Percentage = 34.96%
Class 1: Count = 8409, Percentage = 31.36%
Class 2: Count = 9032, Percentage = 33.68%
Counts and Percentages:
Class 0: Count = 2366, Percentage = 34.54%
Class 1: Count = 2170, Percentage = 31.67%
Class 2: Count = 2315, Percentage = 33.79%
Epoch 1/50, Training Loss: 1.0999, Training Accuracy: 0.3492, Validation Loss: 1.1528
Epoch 2/50, Training Loss: 1.0972, Training Accuracy: 0.3572, Validation Loss: 1.1060
Epoch 3/50, Training Loss: 1.0959, Training Accuracy: 0.3645, Validation Loss: 1.1368
Epoch 4/50, Training Loss: 1.0954, Training Accuracy: 0.3620, Validation Loss: 1.1020
Epoch 5/50, Training Loss: 1.0947, Training Accuracy: 0.3667, Validation Loss: 1.1062
Epoch 6/50, Training Loss: 1.0937, Training Accuracy: 0.3667, Validation Loss: 1.1566
Epoch 7/50, Training Loss: 1.0937, Train

## RQ3

In [28]:
### Data preparation

# Reduce the Dataset for the datapoints concerning rq3

rq3_train_feat_dataset= train_feat_dataset.copy()
processed_train_indexes, train_targets= ped.get_indexes_and_targets_by_rq(3, rq3_train_feat_dataset.features)
rq3_train_feat_dataset.features= rq3_train_feat_dataset.features.iloc[processed_train_indexes]
rq3_train_feat_dataset.features.reset_index(drop=True, inplace=True)

rq3_test_feat_dataset= test_feat_dataset.copy()
processed_test_indexes, test_targets= ped.get_indexes_and_targets_by_rq(3, rq3_test_feat_dataset.features)
rq3_test_feat_dataset.features= rq3_test_feat_dataset.features.iloc[processed_test_indexes]
rq3_test_feat_dataset.features.reset_index(drop=True, inplace=True)

# Reduce the features that are correlated in the training data
train_cols= rq3_train_feat_dataset.reduce_features(targets= train_targets, corr_threshold=0.75)
rq3_test_feat_dataset.keep_only_specified_variable_columns(train_cols)

# Normalize features
normalization_params= rq3_train_feat_dataset.normalize_features()
rq3_test_feat_dataset.apply_normalization(normalization_params)

rq3_train_feat_dataset.objective_features.head()

rq3_test_feat_dataset.objective_features.head()

train_loader= rq3_train_feat_dataset.get_variable_features_loader(train_targets)
test_loader= rq3_test_feat_dataset.get_variable_features_loader(test_targets)

# Train dataset
print_counts_and_percentages(train_targets)

# Test dataset
print_counts_and_percentages(test_targets)

### Training

num_epochs = 50

input_size= len(rq3_train_feat_dataset.variable_columns)
output_size= 4
num_epochs = 50

parameters = {
    'learning_rate': 0.0003,
    'dense_units': 64,
    'dense_layers': 1,
    'dropout_rate': 0
}

model = FullyConnectedClassifier(input_size= input_size, output_size= output_size, parameters= parameters)

model.train_model(train_loader, test_loader, num_epochs)

### Evaluation

test_predictions = model.predict(test_loader)

actual_labels = []
for _, labels in test_loader:
    actual_labels.extend(labels.tolist())

correct_predictions = sum(p == t for p, t in zip(test_predictions, actual_labels))
accuracy = correct_predictions / len(actual_labels)
print(f"Accuracy: {accuracy:.4f}")


from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Assuming your task is a classification task
precision = precision_score(actual_labels, test_predictions, average='macro')
recall = recall_score(actual_labels, test_predictions, average='macro')
f1 = f1_score(actual_labels, test_predictions, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(actual_labels, test_predictions)
print("Confusion Matrix:\n", conf_matrix)

Reduced variable features from 38 to 14.
Variable features were properly normalized using 'zscore' method.
Applied z-score normalization.
Counts and Percentages:
Class 0: Count = 19022, Percentage = 63.08%
Class 1: Count = 4076, Percentage = 13.52%
Class 2: Count = 3822, Percentage = 12.67%
Class 3: Count = 3235, Percentage = 10.73%
Counts and Percentages:
Class 0: Count = 4110, Percentage = 54.02%
Class 1: Count = 1602, Percentage = 21.06%
Class 2: Count = 1371, Percentage = 18.02%
Class 3: Count = 525, Percentage = 6.90%
Epoch 1/50, Training Loss: 1.1116, Training Accuracy: 0.6107, Validation Loss: 2.1743
Epoch 2/50, Training Loss: 1.0468, Training Accuracy: 0.6307, Validation Loss: 1.8690
Epoch 3/50, Training Loss: 1.0361, Training Accuracy: 0.6311, Validation Loss: 1.7982
Epoch 4/50, Training Loss: 1.0295, Training Accuracy: 0.6321, Validation Loss: 1.7735
Epoch 5/50, Training Loss: 1.0251, Training Accuracy: 0.6332, Validation Loss: 1.8492
Epoch 6/50, Training Loss: 1.0211, Traini

  _warn_prf(average, modifier, msg_start, len(result))


## RQ4

In [29]:
### Data preparation

# Reduce the Dataset for the datapoints concerning rq4

rq4_train_feat_dataset= train_feat_dataset.copy()
processed_train_indexes, train_targets= ped.get_indexes_and_targets_by_rq(4, rq4_train_feat_dataset.features)
rq4_train_feat_dataset.features= rq4_train_feat_dataset.features.iloc[processed_train_indexes]
rq4_train_feat_dataset.features.reset_index(drop=True, inplace=True)

rq4_test_feat_dataset= test_feat_dataset.copy()
processed_test_indexes, test_targets= ped.get_indexes_and_targets_by_rq(4, rq4_test_feat_dataset.features)
rq4_test_feat_dataset.features= rq4_test_feat_dataset.features.iloc[processed_test_indexes]
rq4_test_feat_dataset.features.reset_index(drop=True, inplace=True)

# Reduce the features that are correlated in the training data
train_cols= rq4_train_feat_dataset.reduce_features(targets= train_targets, corr_threshold=0.75)
rq4_test_feat_dataset.keep_only_specified_variable_columns(train_cols)

# Normalize features
normalization_params= rq4_train_feat_dataset.normalize_features()
rq4_test_feat_dataset.apply_normalization(normalization_params)

rq4_train_feat_dataset.objective_features.head()

rq4_test_feat_dataset.objective_features.head()

train_loader= rq4_train_feat_dataset.get_variable_features_loader(train_targets)
test_loader= rq4_test_feat_dataset.get_variable_features_loader(test_targets)

# Train dataset
print_counts_and_percentages(train_targets)

# Test dataset
print_counts_and_percentages(test_targets)

### Training

num_epochs = 50

input_size= len(rq4_train_feat_dataset.variable_columns)
output_size= 4
num_epochs = 50

parameters = {
    'learning_rate': 0.0003,
    'dense_units': 64,
    'dense_layers': 2,
    'dropout_rate': 0.2
}

model = FullyConnectedClassifier(input_size= input_size, output_size= output_size, parameters= parameters)

model.train_model(train_loader, test_loader, num_epochs)

### Evaluation

test_predictions = model.predict(test_loader)

actual_labels = []
for _, labels in test_loader:
    actual_labels.extend(labels.tolist())

correct_predictions = sum(p == t for p, t in zip(test_predictions, actual_labels))
accuracy = correct_predictions / len(actual_labels)
print(f"Accuracy: {accuracy:.4f}")


from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Assuming your task is a classification task
precision = precision_score(actual_labels, test_predictions, average='macro')
recall = recall_score(actual_labels, test_predictions, average='macro')
f1 = f1_score(actual_labels, test_predictions, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(actual_labels, test_predictions)
print("Confusion Matrix:\n", conf_matrix)

Reduced variable features from 38 to 17.
Variable features were properly normalized using 'zscore' method.
Applied z-score normalization.
Counts and Percentages:
Class 0: Count = 8082, Percentage = 26.80%
Class 1: Count = 7558, Percentage = 25.06%
Class 2: Count = 7516, Percentage = 24.92%
Class 3: Count = 6999, Percentage = 23.21%
Counts and Percentages:
Class 0: Count = 2010, Percentage = 26.42%
Class 1: Count = 1830, Percentage = 24.05%
Class 2: Count = 1922, Percentage = 25.26%
Class 3: Count = 1846, Percentage = 24.26%
Epoch 1/50, Training Loss: 1.3809, Training Accuracy: 0.2801, Validation Loss: 1.4324
Epoch 2/50, Training Loss: 1.3743, Training Accuracy: 0.2932, Validation Loss: 1.3976
Epoch 3/50, Training Loss: 1.3715, Training Accuracy: 0.2999, Validation Loss: 1.4166
Epoch 4/50, Training Loss: 1.3706, Training Accuracy: 0.2990, Validation Loss: 1.4107
Epoch 5/50, Training Loss: 1.3702, Training Accuracy: 0.2987, Validation Loss: 1.3926
Epoch 6/50, Training Loss: 1.3690, Train