# Experiment Eurythmy Letter

In [1]:
from PlantReactivityAnalysis.features.features_dataset import FeaturesDataset
from PlantReactivityAnalysis.models.experiment import Experiment
import PlantReactivityAnalysis.models.get_dataset as gd
import PlantReactivityAnalysis.models.parameters as param
from PlantReactivityAnalysis.config import PROCESSED_DATA_DIR

In [2]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Preparation

In [3]:
ct= 0.7
pca= 42
ws= 2
hl= 2
rqs= [1,2]
norm_path = r"../data/processed/feat_norm_letters_2_2_dataset.pkl"
raw_path = r"../data/processed/feat_raw_letters_2_2_dataset.pkl"

In [4]:
datasets= gd.collect_all_rqs_data(norm_path, raw_path, rqs, corr_threshold=ct, pca_dim=pca)


PROCESSING DATASET 1
Removing columns with NaN values: ['kurtosis', 'skewness']

# Research Question:  1
-Train distribution-
Counts and Percentages:
Class 1: Count = 3576, Percentage = 50.35%
Class 0: Count = 3526, Percentage = 49.65%
-Test distribution-
Counts and Percentages:
Class 1: Count = 907, Percentage = 51.07%
Class 0: Count = 869, Percentage = 48.93%
Reduced variable features from initial count to 36.
Reduced features based on correlation threshold of 0.7
Variable features were properly normalized using 'zscore' method.
Applied z-score normalization.

# Research Question:  2
-Train distribution-
Counts and Percentages:
Class 2: Count = 1013, Percentage = 33.59%
Class 0: Count = 1004, Percentage = 33.29%
Class 1: Count = 999, Percentage = 33.12%
-Test distribution-
Counts and Percentages:
Class 0: Count = 257, Percentage = 34.04%
Class 1: Count = 256, Percentage = 33.91%
Class 2: Count = 242, Percentage = 32.05%
Reduced variable features from initial count to 35.
Reduced fea

In [5]:
datasets.keys()

dict_keys([1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 15])

# RQX

In [None]:
%%time

results = {}  # Initialize an empty dictionary to store results

for ds in [1,2,3,4,5,7]:  # For each dataset
    print(f"Now processing Dataset {ds}...")  # Print the current dataset being processed
    results[ds] = {}  # Initialize a nested dictionary for each dataset
    for rq in [1, 2]:  # For each research question
        print(f"  Processing RQ {rq} within Dataset {ds}...")  # Print the current RQ being processed

        if ds in [1,2,3,4,5,7]:
            train_df, test_df = datasets[ds][rq]
            experiment = Experiment(train_df, test_df, 'target')
        else:
            train_df, train_targets, test_df, test_targets = datasets[ds][rq]
            experiment = Experiment.from_arrays(train_df, train_targets, test_df, test_targets)
            
        if ds in [2,4,6,8,10,12,14,16]:
            experiment.run_all_models(param.PARAMETER_GRID_NO_SCALING)
        else:
            experiment.run_all_models(param.PARAMETER_GRID)
        
        # Store the results of the experiment, along with the dataset and RQ identifiers
        results[ds][rq] = experiment.results

Now processing Dataset 1...
  Processing RQ 1 within Dataset 1...
Running experiments for svm with params: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}




Running experiments for svm with params: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}




Running experiments for svm with params: {'C': 0.1, 'gamma': 'auto', 'kernel': 'linear'}




Running experiments for svm with params: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}


In [None]:
# Prepare a list to hold all rows before creating the DataFrame
rows = []

# Iterate over the nested structure to access each experiment's results
for ds, rqs in results.items():
    for rq, experiments in rqs.items():
        for experiment_result in experiments:
            rows.append({
                'Dataset': ds,
                'RQ': rq,
                'Window Size': ws,
                'Hop Length': hl,
                'Corr Tres/ PCA Dim': ct if ds in [1, 2, 3, 4, 5, 7] else pca,
                'Model': experiment_result['model_name'],
                'Parameter': str(experiment_result['parameters']),
                'F1 Score': experiment_result['f1'],
                'Accuracy': experiment_result['accuracy'],
                'Precision': experiment_result['precision'],
                'Recall': experiment_result['recall']
            })

# Convert the list of dictionaries into a DataFrame
df_results = pd.DataFrame(rows)
df_results.shape

In [None]:
def update_csv_with_new_rows(csv_path, new_data_df):
    try:
        existing_data_df = pd.read_csv(csv_path)
        print("read",csv_path)
    except FileNotFoundError:
        existing_data_df = pd.DataFrame()

    combined_df = pd.concat([existing_data_df, new_data_df], ignore_index=True).drop_duplicates()

    combined_df.to_csv(csv_path, index=False)

In [None]:
results_file=  r"results.csv"
update_csv_with_new_rows(results_file, df_results)