# Experiment Eurythmy Letter

In [1]:
from PlantReactivityAnalysis.features.features_dataset import FeaturesDataset
from PlantReactivityAnalysis.models.experiment import Experiment
import PlantReactivityAnalysis.models.help_experiment as he
from PlantReactivityAnalysis.data import preparation_eurythmy_data as ped
from PlantReactivityAnalysis.config import PROCESSED_DATA_DIR

In [2]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [14]:
def split_dataframe_by_column(df, column_name):
    """
    Split a DataFrame into multiple DataFrames based on the unique values of a specified column.

    Parameters:
    - df: The input DataFrame.
    - column_name: The name of the column to split the DataFrame by.

    Returns:
    A dictionary of DataFrames, where each key is a unique value from the specified column,
    and each value is a DataFrame containing only rows from that group.
    """
    # Check if column_name exists in df
    if column_name not in df.columns:
        raise ValueError(f"Column '{column_name}' does not exist in the DataFrame")

    # Split the DataFrame
    result = {value: df_group for value, df_group in df.groupby(column_name)}

    return result

# Preparation

In [5]:
rqs = he.return_rqs_dataset8(reduce_variables=True)
train_df, test_df = rqs[2]

No columns with NaN values found.
Reduced variable features from initial count to 46.
                          0          1        p_value
mfcc_1_std        15.416838  22.146003  2.646781e-204
mfcc_2_std         0.525290   0.745021  9.891848e-203
mfcc_2_mean        1.888856   1.516630  2.184455e-189
delta mfcc_1_std  20.763484  29.587048  4.499103e-186
delta mfcc_2_std   0.716534   1.000642  8.950038e-184
mfcc_4_mean        0.334685   0.270270  2.790184e-183
mfcc_6_mean        0.160148   0.129038  1.549882e-181
mfcc_3_mean        0.235894   0.190567  4.109378e-180
mfcc_1_mean      -42.974151 -53.456395  2.173009e-175
mfcc_8_mean        0.087352   0.070636  5.356263e-165
Reduced variable features from initial count to 44.
                               1          2          0   p_value
delta chroma_11_std     0.002279   0.002419   0.002284  0.074853
chroma_11_std           0.001652   0.001749   0.001660  0.087699
chroma_5_std            0.017150   0.018078   0.017419  0.099979
spectral

In [6]:
classifier_dict = {"gradientboosting": np.array([25])}

In [12]:
experiment = Experiment(train_df, test_df, 'target')
experiment.run_all_models(classifier_dict)

Running experiments for gradientboosting


[['gradientboosting',
  25,
  0.4217330615247468,
  0.423956931359354,
  0.4269102943950642,
  0.42638033547124454]]

In [None]:
experiment.results

In [8]:
train_df.shape

(2970, 44)

In [9]:
train_df.head()

Unnamed: 0,lib_mfcc_1_avg,lib_mfcc_2_avg,lib_mfcc_4_avg,lib_mfcc_12_avg,lib_mfcc_1_std,lib_mfcc_5_std,lib_mfcc_8_std,energy_mean,mfcc_8_mean,mfcc_13_mean,delta zcr_mean,delta energy_mean,delta energy_entropy_mean,delta spectral_centroid_mean,delta spectral_flux_mean,delta mfcc_2_mean,delta mfcc_5_mean,delta mfcc_6_mean,delta mfcc_7_mean,delta mfcc_8_mean,delta mfcc_9_mean,delta mfcc_10_mean,delta mfcc_11_mean,delta mfcc_12_mean,delta mfcc_13_mean,delta chroma_6_mean,delta chroma_10_mean,delta chroma_11_mean,delta chroma_12_mean,delta chroma_std_mean,energy_std,mfcc_1_std,mfcc_6_std,delta energy_std,delta spectral_spread_std,delta mfcc_12_std,root_mean_square_energy,hjorth_complexity,mean,standard_deviation,skewness,kurtosis,dfa,target
0,-776.309998,86.060974,23.172543,7.902058,120.53434,2.312711,1.74168,0.106585,0.096086,0.030481,0.0,0.0001470027,0.000705,-0.0002497087,0.001075609,0.0002362249,0.0001023123,4.928159e-05,7.497258e-06,-1.707825e-05,-1.607678e-05,6.000628e-05,0.0001341058,0.0001072729,6.727113e-05,-6.062407e-08,-4.436804e-07,-4.661667e-06,-3.905648e-08,8.835428e-06,0.093743,14.509349,0.091944,0.059102,0.065408,0.124661,0.0044,124839.645686,-0.004377,0.000444,0.63733,0.455754,1.757668,1
1,-792.596924,73.78022,24.048126,8.708837,136.193954,2.16679,1.775125,0.104566,0.074015,0.0258,0.0,-0.002775878,2e-06,0.0001665509,6.142319e-05,0.0008636309,3.925636e-05,0.001127212,-0.003612936,0.002024341,-0.0007200446,-0.001496126,0.001617775,-0.001174874,-8.521532e-05,2.280058e-08,2.363979e-08,3.190568e-08,1.326887e-08,-4.479047e-08,0.060994,26.851827,0.117764,0.032561,0.08155,0.113877,0.003996,90587.061788,0.00398,0.000362,1.142747,0.631335,1.68427,2
2,-826.375854,90.584808,24.940731,7.33463,76.89016,0.776385,0.746597,0.151983,0.092959,0.027754,-2.258755e-21,-0.0001930891,0.000108,-0.0001912139,1.4456029999999998e-19,-0.00951404,-0.00071876,-0.0007101341,-0.0002941834,-0.0003591397,-0.0002593669,-0.0002898664,-0.0002410989,-0.0002792512,-7.141641e-05,-3.186763e-08,-3.092798e-08,-7.323363e-07,-1.03634e-08,1.26728e-06,0.070575,17.768181,0.104896,0.033476,0.062835,0.143744,0.000881,98283.385905,0.000808,0.000353,0.531268,0.18874,1.621047,2
3,-1049.983276,50.861671,15.531037,6.613408,77.024918,5.343124,1.680854,0.040055,0.008891,0.002699,0.0,-3.144186e-19,0.0,-3.469447e-20,-2.7755579999999997e-19,-6.661469e-18,-4.1660259999999997e-19,-1.387779e-18,2.776574e-19,6.352747e-23,-6.958376e-20,-6.94567e-20,-2.0837009999999997e-19,5.550077999999999e-19,3.121158e-19,-8.470328999999999e-24,1.355253e-22,2.5410990000000003e-23,-3.176374e-24,0.0,0.04165,9.988303,0.044767,0.019474,0.042211,0.042299,3.4e-05,32533.609203,-1.8e-05,2.9e-05,2.64477,6.587729,1.468923,0
4,-881.179993,90.793633,25.05669,7.507318,69.704567,0.846274,0.834742,0.179755,0.054859,0.017813,0.0,0.001498211,0.000995,-0.0002283996,9.443432e-07,-0.0005688721,3.47695e-05,-9.219531e-06,-1.91947e-05,-2.818086e-05,1.788957e-06,-0.0001159624,-0.0002193859,-9.538159e-05,0.0002396018,-6.079562e-08,-3.235218e-07,-4.324128e-06,-5.892047e-08,1.422494e-05,0.078562,32.385263,0.119483,0.043177,0.108367,0.129496,0.000514,112075.42597,0.000497,0.000134,-0.61581,-0.987699,1.489194,0


In [10]:
test_df.shape

(743, 44)

In [11]:
test_df.head()

Unnamed: 0,lib_mfcc_1_avg,lib_mfcc_2_avg,lib_mfcc_4_avg,lib_mfcc_12_avg,lib_mfcc_1_std,lib_mfcc_5_std,lib_mfcc_8_std,energy_mean,mfcc_8_mean,mfcc_13_mean,delta zcr_mean,delta energy_mean,delta energy_entropy_mean,delta spectral_centroid_mean,delta spectral_flux_mean,delta mfcc_2_mean,delta mfcc_5_mean,delta mfcc_6_mean,delta mfcc_7_mean,delta mfcc_8_mean,delta mfcc_9_mean,delta mfcc_10_mean,delta mfcc_11_mean,delta mfcc_12_mean,delta mfcc_13_mean,delta chroma_6_mean,delta chroma_10_mean,delta chroma_11_mean,delta chroma_12_mean,delta chroma_std_mean,energy_std,mfcc_1_std,mfcc_6_std,delta energy_std,delta spectral_spread_std,delta mfcc_12_std,root_mean_square_energy,hjorth_complexity,mean,standard_deviation,skewness,kurtosis,dfa,target
0,-768.647644,66.990852,23.601892,8.82514,139.091187,2.007238,1.707127,0.27586,0.073849,0.030248,0.0,0.0006892289,6e-06,-4.754639e-05,1.675585e-19,-0.01049239,-0.0008120072,-0.0008931953,-0.0004812198,-0.0004991088,-0.0003329097,-0.0003228703,-0.000169,-0.0001495171,-8.440908e-05,-1.780974e-09,-4.841088e-10,-2.48668e-08,-4.843327e-10,7.924979e-08,0.060812,26.358909,0.113785,0.016807,0.038336,0.119083,0.005927,127144.209617,-0.00579,0.001264,0.291101,-1.320139,1.804212,2
1,-856.196533,88.784843,24.8967,8.193879,122.625008,1.56159,1.278324,0.050538,0.046057,0.012623,0.0,-6.473683e-05,0.000162,-0.0002791167,-2.8460309999999997e-19,-0.01098183,-0.0008119108,-0.0008761976,-0.0004526582,-0.0004366514,-0.0001567509,-0.0001416648,-0.000126,-0.0001326772,5.740205e-05,-6.432865e-08,-1.398401e-07,-1.210684e-06,-2.226893e-08,2.560992e-06,0.046224,30.738998,0.105628,0.021271,0.114482,0.070904,0.001295,105612.230275,0.001284,0.000165,2.843798,8.700087,1.629488,2
2,-940.576904,56.885136,18.783936,8.416083,152.826523,4.36981,1.25589,0.02045,0.020828,0.006496,-2.464096e-21,-3.942553e-20,0.0,-1.5770209999999998e-19,-1.892426e-18,-2.523234e-18,-1.5770209999999998e-19,-5.992680999999999e-19,3.154043e-19,-7.885106999999999e-19,-7.885106999999999e-20,-5.519575e-19,0.0,-3.154043e-19,7.096595999999999e-19,6.314246e-22,-4.312168e-21,2.2792889999999998e-20,-3.542138e-22,-9.856383e-21,0.035277,15.500842,0.048919,0.03398,0.051486,0.040664,0.000904,108339.337432,0.000903,3.4e-05,-3.569493,14.383784,1.487234,0
3,-894.871033,83.199226,23.424358,7.308936,112.452423,2.316043,1.477738,0.04017,0.05684,0.015853,0.0,0.0002710215,-3.2e-05,1.35059e-05,0.0001570236,-0.0004306191,0.000218619,0.000283239,0.0002844917,-7.758419e-05,-0.0001840912,-0.0003103421,-0.000408,-0.0009487186,-0.001303548,1.595696e-09,-2.679286e-09,-4.718513e-09,4.223158e-09,-3.899067e-07,0.036395,25.000644,0.104595,0.014297,0.080747,0.097805,0.000473,64582.153821,-0.000337,0.000331,3.189482,10.77359,1.604007,0
4,-811.067993,91.078011,25.483477,7.821971,92.668579,1.033853,1.103412,0.117457,0.099872,0.035709,0.0,0.000195002,-9.1e-05,0.0001421597,3.161428e-06,0.009611083,0.0007251604,0.0007725882,0.0003983535,0.0004395243,0.0002774826,0.0002906857,0.000134,9.065136e-05,3.684287e-05,9.654571e-09,5.030737e-08,2.512017e-07,5.396347e-09,-1.065091e-06,0.08397,18.972635,0.107904,0.039336,0.066117,0.142091,0.001636,127138.888515,-0.001621,0.000222,0.203918,-0.341088,1.654199,2


In [15]:
train_dfs= split_dataframe_by_column(train_df,'plant')

ValueError: Column 'plant' does not exist in the DataFrame

In [None]:
klk

# RQX

In [None]:
results = {}  # Initialize an empty dictionary to store results

for ds in range(1, 9):  # For each dataset
    print(f"Now processing Dataset {ds}...")  # Print the current dataset being processed
    results[ds] = {}  # Initialize a nested dictionary for each dataset
    for rq in [1, 2, 5]:  # For each research question
        print(f"  Processing RQ {rq} within Dataset {ds}...")  # Print the current RQ being processed
        # Assuming datasets is a predefined structure with your data
        train_df, test_df = datasets[ds][rq]
        experiment = Experiment(train_df, test_df, 'target')
        experiment.run_all_models(classifier_par_dict)
        
        # Store the results of the experiment, along with the dataset and RQ identifiers
        results[ds][rq] = experiment.results


In [None]:
import pandas as pd

# Prepare a list to hold all rows before creating the DataFrame
rows = []

# Iterate over the nested structure to access each experiment's results
for ds, rqs in results.items():
    for rq, experiments in rqs.items():
        for experiment_result in experiments:
            model_name, param, f1, accuracy, precision, recall = experiment_result
            rows.append({
                'Dataset': ds,
                'RQ': rq,
                'Model': model_name,
                'Parameter': param,
                'F1 Score': f1,
                'Accuracy': accuracy,
                'Precision': precision,
                'Recall': recall
            })

# Convert the list of dictionaries into a DataFrame
df_results = pd.DataFrame(rows)

# Save the DataFrame to a CSV file
csv_file_path = "experiment_results.csv"
df_results.to_csv(csv_file_path, index=False)