# Experiment Eurythmy Letter

In [5]:
from PlantReactivityAnalysis.features.features_dataset import FeaturesDataset
from PlantReactivityAnalysis.models.experiment import Experiment

In [None]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Import Dataset

In [None]:
norm_letters_signal_dataset_path= r"..\data\processed\feat_norm_letters_1_1_dataset_mean.pkl"
feat_dataset = FeaturesDataset.load(file_path= norm_letters_signal_dataset_path)
feat_dataset.features.shape

## Prepare Data

In [None]:
feat_dataset.prepare_dataset(drop_constant= False, drop_flatness_columns= True, drop_nan_columns= True)

In [None]:
feat_dataset.features.head()

## RQ1

Is there any difference in the signals when someone is performing eurythmy?

In [None]:
rq2= feat_dataset.return_subset_given_research_question(2)

In [None]:
train_feat_dataset, _, test_feat_dataset= rq2.split_dataset(split_by_wav= False,test_size= 0.2, val_size= 0, random_state= True)

### Data preparation

In [None]:
# Reduce the features that are correlated in the training data
train_cols, feat_stats= train_feat_dataset.reduce_features_based_on_target(corr_threshold=0.8)
test_feat_dataset.keep_only_specified_variable_columns(train_cols)

# Normalize features ?????
#normalization_params= train_feat_dataset.normalize_features()
#test_feat_dataset.apply_normalization(normalization_params)

In [None]:
feat_stats.head(150)

In [None]:
train_feat_dataset.features.head()

In [None]:
test_feat_dataset.features.head()

In [None]:
train_feat_dataset.print_target_distribution()

In [None]:
test_feat_dataset.print_target_distribution()

In [None]:
train_df= train_feat_dataset.objective_features
test_df= test_feat_dataset.objective_features

In [None]:
experiment = Experiment(train_df, test_df, 'target')

In [None]:
classifier_par_dict = {
            "svm": np.array([0.001, 0.01, 0.5, 1.0, 5.0, 10.0, 20.0]),
            "svm_rbf": np.array([0.001, 0.01, 0.5, 1.0, 5.0, 10.0, 20.0]),
            "randomforest": np.array([10, 25, 50, 100, 200, 500]),
            "gradientboosting": np.array([10, 25, 50, 100, 200, 500]),
            "extratrees": np.array([10, 25, 50, 100, 200, 500]),
            "gaussiannb": []  # Naive Bayes does not require parameter tuning for this example
        }

experiment.run_all_models(classifier_par_dict)

In [None]:
experiment.print_best_result(metric='accuracy')

## RQ1

In [None]:
rq1= feat_dataset.return_subset_given_research_question(1)
train_feat_dataset, _, test_feat_dataset= rq1.split_dataset(split_by_wav= False,test_size= 0.2, val_size= 0, random_state= True)

# Reduce the features that are correlated in the training data
train_cols, feat_stats= train_feat_dataset.reduce_features_based_on_target(corr_threshold=0.8)
test_feat_dataset.keep_only_specified_variable_columns(train_cols)

# Normalize features ?????
normalization_params= train_feat_dataset.normalize_features()
test_feat_dataset.apply_normalization(normalization_params)

In [None]:
feat_stats.head(150)

In [None]:
train_feat_dataset.features.head()

In [None]:
test_feat_dataset.features.head()

In [None]:
train_feat_dataset.print_target_distribution()
test_feat_dataset.print_target_distribution()

train_df= train_feat_dataset.objective_features
test_df= test_feat_dataset.objective_features

experiment = Experiment(train_df, test_df, 'target')
experiment.run_all_models(classifier_par_dict)

In [None]:
experiment.print_best_result(metric='accuracy')

## RQ3

In [None]:
### Data preparation

# Reduce the Dataset for the datapoints concerning rq3

rq3_train_feat_dataset= train_feat_dataset.copy()
processed_train_indexes, train_targets= ped.get_indexes_and_targets_by_rq(3, rq3_train_feat_dataset.features)
rq3_train_feat_dataset.features= rq3_train_feat_dataset.features.iloc[processed_train_indexes]
rq3_train_feat_dataset.features.reset_index(drop=True, inplace=True)

rq3_test_feat_dataset= test_feat_dataset.copy()
processed_test_indexes, test_targets= ped.get_indexes_and_targets_by_rq(3, rq3_test_feat_dataset.features)
rq3_test_feat_dataset.features= rq3_test_feat_dataset.features.iloc[processed_test_indexes]
rq3_test_feat_dataset.features.reset_index(drop=True, inplace=True)

# Reduce the features that are correlated in the training data
train_cols= rq3_train_feat_dataset.reduce_features(targets= train_targets, corr_threshold=0.75)
rq3_test_feat_dataset.keep_only_specified_variable_columns(train_cols)

# Normalize features
normalization_params= rq3_train_feat_dataset.normalize_features()
rq3_test_feat_dataset.apply_normalization(normalization_params)

rq3_train_feat_dataset.objective_features.head()

rq3_test_feat_dataset.objective_features.head()

train_loader= rq3_train_feat_dataset.get_variable_features_loader(train_targets)
test_loader= rq3_test_feat_dataset.get_variable_features_loader(test_targets)

# Train dataset
print_counts_and_percentages(train_targets)

# Test dataset
print_counts_and_percentages(test_targets)

### Training

num_epochs = 50

input_size= len(rq3_train_feat_dataset.variable_columns)
output_size= 4
num_epochs = 50

parameters = {
    'learning_rate': 0.0003,
    'dense_units': 64,
    'dense_layers': 1,
    'dropout_rate': 0
}

model = FullyConnectedClassifier(input_size= input_size, output_size= output_size, parameters= parameters)

model.train_model(train_loader, test_loader, num_epochs)

### Evaluation

test_predictions = model.predict(test_loader)

actual_labels = []
for _, labels in test_loader:
    actual_labels.extend(labels.tolist())

correct_predictions = sum(p == t for p, t in zip(test_predictions, actual_labels))
accuracy = correct_predictions / len(actual_labels)
print(f"Accuracy: {accuracy:.4f}")


from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Assuming your task is a classification task
precision = precision_score(actual_labels, test_predictions, average='macro')
recall = recall_score(actual_labels, test_predictions, average='macro')
f1 = f1_score(actual_labels, test_predictions, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(actual_labels, test_predictions)
print("Confusion Matrix:\n", conf_matrix)

## RQ4

In [None]:
### Data preparation

# Reduce the Dataset for the datapoints concerning rq4

rq4_train_feat_dataset= train_feat_dataset.copy()
processed_train_indexes, train_targets= ped.get_indexes_and_targets_by_rq(4, rq4_train_feat_dataset.features)
rq4_train_feat_dataset.features= rq4_train_feat_dataset.features.iloc[processed_train_indexes]
rq4_train_feat_dataset.features.reset_index(drop=True, inplace=True)

rq4_test_feat_dataset= test_feat_dataset.copy()
processed_test_indexes, test_targets= ped.get_indexes_and_targets_by_rq(4, rq4_test_feat_dataset.features)
rq4_test_feat_dataset.features= rq4_test_feat_dataset.features.iloc[processed_test_indexes]
rq4_test_feat_dataset.features.reset_index(drop=True, inplace=True)

# Reduce the features that are correlated in the training data
train_cols= rq4_train_feat_dataset.reduce_features(targets= train_targets, corr_threshold=0.75)
rq4_test_feat_dataset.keep_only_specified_variable_columns(train_cols)

# Normalize features
normalization_params= rq4_train_feat_dataset.normalize_features()
rq4_test_feat_dataset.apply_normalization(normalization_params)

rq4_train_feat_dataset.objective_features.head()

rq4_test_feat_dataset.objective_features.head()

train_loader= rq4_train_feat_dataset.get_variable_features_loader(train_targets)
test_loader= rq4_test_feat_dataset.get_variable_features_loader(test_targets)

# Train dataset
print_counts_and_percentages(train_targets)

# Test dataset
print_counts_and_percentages(test_targets)

### Training

num_epochs = 50

input_size= len(rq4_train_feat_dataset.variable_columns)
output_size= 4
num_epochs = 50

parameters = {
    'learning_rate': 0.0003,
    'dense_units': 64,
    'dense_layers': 2,
    'dropout_rate': 0.2
}

model = FullyConnectedClassifier(input_size= input_size, output_size= output_size, parameters= parameters)

model.train_model(train_loader, test_loader, num_epochs)

### Evaluation

test_predictions = model.predict(test_loader)

actual_labels = []
for _, labels in test_loader:
    actual_labels.extend(labels.tolist())

correct_predictions = sum(p == t for p, t in zip(test_predictions, actual_labels))
accuracy = correct_predictions / len(actual_labels)
print(f"Accuracy: {accuracy:.4f}")


from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

# Assuming your task is a classification task
precision = precision_score(actual_labels, test_predictions, average='macro')
recall = recall_score(actual_labels, test_predictions, average='macro')
f1 = f1_score(actual_labels, test_predictions, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(actual_labels, test_predictions)
print("Confusion Matrix:\n", conf_matrix)