# Experiment 1: Neural Network

In [1]:
import sys
sys.path.append("../src")

In [2]:
import data.preparation_eurythmy_data as ped
from features.features_dataset import FeaturesDataset
from models.fully_connected_classifier import FullyConnectedClassifier
from evaluation.hyperparameter_tuner import HyperparameterTuner

## Import Dataset

In [3]:
feat_dataset_path= r"..\data\processed\features_dataset"
feat_dataset = FeaturesDataset.load(file_path= feat_dataset_path)

In [4]:
feat_dataset.features.shape

(148682, 52)

## Prepare Data

In [5]:
# Drop columns
columns=['duration_seconds', 'flatness_ratio_10000','flatness_ratio_5000', 'flatness_ratio_1000', 'flatness_ratio_500','flatness_ratio_100']
feat_dataset.drop_columns(columns_to_drop=columns)

In [6]:
# Get Train and Validation Indexes
train_indexes, val_indexes, _= ped.get_train_val_test_indexes(df= feat_dataset.features)

# Split the training data
train_feat_dataset= feat_dataset.copy()
train_feat_dataset.features= feat_dataset.features.iloc[train_indexes]
train_feat_dataset.features.reset_index(drop=True, inplace=True)

# Split the validation data
val_feat_dataset= feat_dataset.copy()
val_feat_dataset.features= feat_dataset.features.iloc[val_indexes]
val_feat_dataset.features.reset_index(drop=True, inplace=True)

In [7]:
#del feat_dataset

## RQ1

Is there any difference in the signals when someone is performing eurythmy?

In [8]:
# Reduce the Dataset for the datapoints concerning RQ1

processed_train_indexes, train_targets= ped.get_indexes_and_targets_by_rq(1, train_feat_dataset.features)
train_feat_dataset.features= train_feat_dataset.features.iloc[processed_train_indexes]
train_feat_dataset.features.reset_index(drop=True, inplace=True)

processed_val_indexes, val_targets= ped.get_indexes_and_targets_by_rq(1, val_feat_dataset.features)
val_feat_dataset.features= val_feat_dataset.features.iloc[processed_val_indexes]
val_feat_dataset.features.reset_index(drop=True, inplace=True)

In [9]:
len(train_targets)

64740

In [10]:
# Reduce the features that are correlated in the training data
train_cols= train_feat_dataset.reduce_features(targets= train_targets, corr_threshold=0.75)
val_feat_dataset.keep_only_specified_variable_columns(train_cols)

Reduced variable features from 38 to 12.


In [11]:
# Normalize features
normalization_params= train_feat_dataset.normalize_features()
val_feat_dataset.apply_normalization(normalization_params)

Variable features were properly normalized using 'zscore' method.
Applied z-score normalization.


In [12]:
train_feat_dataset.features.head()

Unnamed: 0,id_measurement,id_performance,datetime,plant,generation,num_eurythmy,initial_second,eurythmy_letter,mfcc_1_avg,zero_crossing_rate,root_mean_square_energy,slope_sign_changes_ratio,hjorth_mobility,hjorth_complexity,mean,variance,standard_deviation,skewness,kurtosis,dfa
0,2,1,2023-04-29,salad,1,1,17.0,A1,0.317104,-0.587715,1.830673,-0.352106,-0.508262,-0.075092,2.213391,-0.145385,-0.524891,-0.214881,-0.061647,-0.348185
1,2,1,2023-04-29,salad,1,1,18.0,A1,0.33313,-0.587715,1.720997,-0.357386,-0.324904,-0.228059,2.129181,-0.143647,-0.486652,0.203625,-0.064623,-0.988416
2,2,1,2023-04-29,salad,1,1,19.0,A1,0.426624,-0.587715,1.339804,-0.288754,-0.335506,-0.215895,1.838362,-0.143275,-0.479176,-0.517555,-0.044271,-0.340616
3,2,1,2023-04-29,salad,1,1,20.0,A1,0.511142,-0.587715,1.569921,-0.272916,-0.48957,-0.087459,2.01396,-0.143551,-0.4847,0.185108,-0.031657,-0.096986
4,2,1,2023-04-29,salad,1,1,21.0,A1,0.394482,-0.587715,1.747919,-0.299313,-0.415282,-0.155551,2.149823,-0.14398,-0.493511,-1.157265,0.027384,-0.186334


In [13]:
val_feat_dataset.features.head()

Unnamed: 0,id_measurement,id_performance,datetime,plant,generation,num_eurythmy,initial_second,eurythmy_letter,mfcc_1_avg,zero_crossing_rate,root_mean_square_energy,slope_sign_changes_ratio,hjorth_mobility,hjorth_complexity,mean,variance,standard_deviation,skewness,kurtosis,dfa
0,1,1,2023-04-29,salad,1,1,17.0,A1,0.448185,-0.587715,1.187843,-0.415458,-0.453143,-0.109875,1.724492,-0.147885,-0.596814,0.023204,-0.083323,-0.229328
1,1,1,2023-04-29,salad,1,1,18.0,A1,0.30557,-0.587715,1.322777,-0.431296,-0.458243,-0.113484,1.827075,-0.147335,-0.578232,0.891125,-0.023108,0.011637
2,1,1,2023-04-29,salad,1,1,19.0,A1,0.700812,-0.587715,1.025525,-0.4049,-0.255659,-0.260343,1.597624,-0.141238,-0.44161,0.102503,-0.056845,-0.542518
3,1,1,2023-04-29,salad,1,1,20.0,A1,0.668969,-0.587715,0.961778,-0.383782,-0.286717,-0.22932,1.533873,-0.110891,-0.105789,-0.933839,-0.001726,-0.564408
4,1,1,2023-04-29,salad,1,1,21.0,A1,0.948951,-0.587715,0.054221,-0.309872,-0.339063,-0.028826,0.826939,-0.111503,-0.11079,-0.013554,-0.062082,-0.498555


In [14]:
train_loader= train_feat_dataset.get_variable_features_loader(train_targets)
val_loader= val_feat_dataset.get_variable_features_loader(val_targets)

## Search

In [15]:
from collections import Counter

count = Counter(train_targets)
print(count)


Counter({1: 34053, 0: 30687})


In [None]:
input_size= len(train_feat_dataset.variable_columns)
output_size= 2
num_epochs = 5

param_grid = {
    'learning_rate': [0.1, 0.01],
    'dense_units': [64, 128],
    'dense_layers': [1, 2],
    'dropout_rate': [0.25, 0.5]
}

tuner = HyperparameterTuner(FullyConnectedClassifier, param_grid, train_loader, val_loader, num_epochs, input_size, output_size)
best_params, all_results = tuner.tune()

Epoch 1/5, Training Loss: nan, Training Accuracy: 0.4740, Validation Loss: nan
Epoch 2/5, Training Loss: nan, Training Accuracy: 0.4740, Validation Loss: nan
Epoch 3/5, Training Loss: nan, Training Accuracy: 0.4740, Validation Loss: nan
Early stopping triggered
Early stopping triggered after 3 epochs
Epoch 1/5, Training Loss: nan, Training Accuracy: 0.4742, Validation Loss: nan
Epoch 2/5, Training Loss: nan, Training Accuracy: 0.4740, Validation Loss: nan
Epoch 3/5, Training Loss: nan, Training Accuracy: 0.4740, Validation Loss: nan
Early stopping triggered
Early stopping triggered after 3 epochs
Epoch 1/5, Training Loss: nan, Training Accuracy: 0.4741, Validation Loss: nan
Epoch 2/5, Training Loss: nan, Training Accuracy: 0.4740, Validation Loss: nan
Epoch 3/5, Training Loss: nan, Training Accuracy: 0.4740, Validation Loss: nan
Early stopping triggered
Early stopping triggered after 3 epochs
Epoch 1/5, Training Loss: nan, Training Accuracy: 0.4740, Validation Loss: nan


In [None]:
print("Best Hyperparameters: \n", best_params)

In [None]:
all_results.head(15)

In [None]:
klk

## Model

In [None]:
num_epochs = 5  # Number of training epochs

In [None]:
model = FullyConnectedClassifier(
    input_size=5,  # Example input size
    hidden_layers=[128, 64],  # Two hidden layers with 128 and 64 units respectively
    output_size=2,  # Example output size
    dropout_rate=0.5,
    learning_rate=0.001
)

In [None]:
model.train_model(train_loader, val_loader, num_epochs)

In [None]:
test_predictions = model.predict(test_loader)

In [None]:
actual_labels = []
for _, labels in test_loader:
    actual_labels.extend(labels.tolist())

In [None]:
correct_predictions = sum(p == t for p, t in zip(test_predictions, actual_labels))
accuracy = correct_predictions / len(actual_labels)
print(f"Accuracy: {accuracy:.4f}")


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Assuming your task is a classification task
precision = precision_score(actual_labels, test_predictions, average='macro')
recall = recall_score(actual_labels, test_predictions, average='macro')
f1 = f1_score(actual_labels, test_predictions, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(actual_labels, test_predictions)
print("Confusion Matrix:\n", conf_matrix)
