# Experiment Eurythmy Letter

In [1]:
from PlantReactivityAnalysis.features.features_dataset import FeaturesDataset
from PlantReactivityAnalysis.models.experiment import Experiment
import PlantReactivityAnalysis.models.help_experiment as he
from PlantReactivityAnalysis.config import PROCESSED_DATA_DIR

In [2]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
norm_letters_signal_dataset_path = r"../data/processed/feat_norm_letters_1_1_dataset.pkl"

In [4]:
classifier_par_dict = {
            "svm": np.array([0.001, 0.01, 0.5, 1.0, 5.0, 10.0, 20.0]),
            "svm_rbf": np.array([0.001, 0.01, 0.5, 1.0, 5.0, 10.0, 20.0]),
            "randomforest": np.array([10, 25, 50, 100, 200, 500]),
            "gradientboosting": np.array([10, 25, 50, 100, 200, 500]),
            "extratrees": np.array([10, 25, 50, 100, 200, 500])
        }

# Preparation

In [5]:
datasets= he.collect_all_rqs_data()

Processing Dataset 1
Removing columns with NaN values: ['skewness', 'kurtosis']
Reduced variable features from initial count to 44.
                          1          0        p_value
mfcc_1_std        15.452480  22.122096  1.491333e-198
mfcc_2_std         0.526064   0.744023  1.968266e-197
mfcc_2_mean        1.884291   1.506518  4.069004e-190
mfcc_4_mean        0.333911   0.268655  1.863441e-183
delta mfcc_1_std  20.816261  29.577311  1.049811e-181
mfcc_3_mean        0.235375   0.189378  2.669175e-180
mfcc_6_mean        0.159752   0.128396  3.027117e-180
delta mfcc_2_std   0.717675   1.000060  1.038333e-179
mfcc_1_mean      -43.067787 -53.738009  1.079701e-176
mfcc_8_mean        0.087171   0.070146  2.010693e-167
Variable features were properly normalized using 'zscore' method.
Applied z-score normalization.
Reduced variable features from initial count to 50.
                                 0          2          1   p_value
chroma_11_std             0.001628   0.001748   0.001668  

# RQX

In [6]:
results = {}  # Initialize an empty dictionary to store results

for ds in [1,2,3,4,5,7]:  # For each dataset
    print(f"Now processing Dataset {ds}...")  # Print the current dataset being processed
    results[ds] = {}  # Initialize a nested dictionary for each dataset
    for rq in [1, 2]:  # For each research question
        print(f"  Processing RQ {rq} within Dataset {ds}...")  # Print the current RQ being processed
        # Assuming datasets is a predefined structure with your data
        train_df, test_df = datasets[ds][rq]
        experiment = Experiment(train_df, test_df, 'target')
        experiment.run_all_models(classifier_par_dict)
        
        # Store the results of the experiment, along with the dataset and RQ identifiers
        results[ds][rq] = experiment.results


Now processing Dataset 1...
  Processing RQ 1 within Dataset 1...
Running experiments for svm
Running experiments for svm_rbf
Running experiments for randomforest
Running experiments for gradientboosting
Running experiments for extratrees
  Processing RQ 2 within Dataset 1...
Running experiments for svm
Running experiments for svm_rbf
Running experiments for randomforest
Running experiments for gradientboosting
Running experiments for extratrees
  Processing RQ 5 within Dataset 1...
Running experiments for svm
Running experiments for svm_rbf
Running experiments for randomforest
Running experiments for gradientboosting
Running experiments for extratrees
Now processing Dataset 2...
  Processing RQ 1 within Dataset 2...
Running experiments for svm
Running experiments for svm_rbf
Running experiments for randomforest
Running experiments for gradientboosting
Running experiments for extratrees
  Processing RQ 2 within Dataset 2...
Running experiments for svm
Running experiments for svm_rbf
Ru

In [7]:
import pandas as pd

# Prepare a list to hold all rows before creating the DataFrame
rows = []

# Iterate over the nested structure to access each experiment's results
for ds, rqs in results.items():
    for rq, experiments in rqs.items():
        for experiment_result in experiments:
            model_name, param, f1, accuracy, precision, recall = experiment_result
            rows.append({
                'Dataset': ds,
                'RQ': rq,
                'Model': model_name,
                'Parameter': param,
                'F1 Score': f1,
                'Accuracy': accuracy,
                'Precision': precision,
                'Recall': recall
            })

# Convert the list of dictionaries into a DataFrame
df_results = pd.DataFrame(rows)

# Save the DataFrame to a CSV file
csv_file_path = "results_wshl11_ct0.8.csv"
df_results.to_csv(csv_file_path, index=False)