# Explore Features

In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pandas.plotting import table
from scipy import stats

from PlantReactivityAnalysis.config import FEATURES_LETTERS_DIR, FEATURES_ONE_SEC_DIR, FIGURES_DIR
from PlantReactivityAnalysis.features.features_dataset import FeaturesDataset
from PlantReactivityAnalysis.visualization.visualize import export_df_to_image_formatted

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
def compute_stats_and_test(df, target_column):
    # Dynamically create column names based on unique classes for averages
    unique_classes = np.unique(df[target_column])
    avg_columns = [f'Class_{val}_Avg' for val in unique_classes]
    # Initialize an empty DataFrame to store the results with dynamic columns
    stats_df = pd.DataFrame(columns=['Feature'] + avg_columns + ['P_Value', 'Test_Used'])
    
    features = df.columns.drop(target_column)
    for feature in features:
        # Compute averages for each class
        averages = [df[df[target_column] == val][feature].mean() for val in unique_classes]
        
        # Prepare data for tests
        groups = [df[df[target_column] == val][feature].dropna() for val in unique_classes]
        
        # Check assumptions
        normality_pass = all(stats.shapiro(group)[1] > 0.05 for group in groups if len(group) > 3)  # Shapiro-Wilk test requires more than 3 values
        homogeneity_pass = stats.levene(*groups)[1] > 0.05 if len(groups) > 1 else True  # Levene's test requires at least two groups
        
        # Select and perform the appropriate test
        if len(groups) > 1:  # Only perform statistical tests if there are at least two groups to compare
            if normality_pass and homogeneity_pass:
                # Perform ANOVA
                test_statistic, p_value = stats.f_oneway(*groups)
                test_used = 'ANOVA'
            else:
                # Perform Kruskal-Wallis
                test_statistic, p_value = stats.kruskal(*groups)
                test_used = 'Kruskal-Wallis'
        else:
            p_value = np.nan
            test_used = 'Not applicable'
        
        # Append the results to the DataFrame
        row = [feature] + averages + [p_value, test_used]
        stats_df.loc[len(stats_df)] = row
    
    # Sort by P_Value while handling missing values (datasets with a single class)
    stats_df = stats_df.sort_values(by='P_Value', ascending=True, na_position='last')
    
    return stats_df

## Data Import

In [4]:
file_path = FEATURES_LETTERS_DIR / "features_dataset_norm_letters_ws1_hl0.5.pkl"
dataset= FeaturesDataset.load(file_path)

Dataset loaded from C:\Users\alvar\Documents\GitHub\Plant-Reactivity-Analysis\data\processed\segmented_by_letters\features_dataset_norm_letters_ws1_hl0.5.pkl. Shape: (8878, 187)


In [5]:
dataset.make_final_dataset()

Removing columns with NaN values: ['skewness', 'kurtosis']


In [6]:
dataset.features.shape

(8638, 179)

In [7]:
dataset.features.head()

Unnamed: 0,id_measurement,id_performance,datetime,plant,generation,num_eurythmy,eurythmy_letter,cepstra_1_avg,cepstra_2_avg,cepstra_3_avg,cepstra_4_avg,cepstra_5_avg,cepstra_6_avg,cepstra_7_avg,cepstra_8_avg,cepstra_9_avg,cepstra_10_avg,cepstra_11_avg,cepstra_12_avg,cepstra_13_avg,cepstra_1_std,cepstra_2_std,cepstra_3_std,cepstra_4_std,cepstra_5_std,cepstra_6_std,cepstra_7_std,cepstra_8_std,cepstra_9_std,cepstra_10_std,cepstra_11_std,cepstra_12_std,cepstra_13_std,zcr_mean,energy_mean,energy_entropy_mean,spectral_centroid_mean,spectral_spread_mean,spectral_entropy_mean,spectral_flux_mean,spectral_rolloff_mean,mfcc_1_mean,mfcc_2_mean,mfcc_3_mean,mfcc_4_mean,mfcc_5_mean,mfcc_6_mean,mfcc_7_mean,mfcc_8_mean,mfcc_9_mean,mfcc_10_mean,mfcc_11_mean,mfcc_12_mean,mfcc_13_mean,chroma_1_mean,chroma_2_mean,chroma_3_mean,chroma_4_mean,chroma_5_mean,chroma_6_mean,chroma_7_mean,chroma_8_mean,chroma_9_mean,chroma_10_mean,chroma_11_mean,chroma_12_mean,chroma_std_mean,delta zcr_mean,delta energy_mean,delta energy_entropy_mean,delta spectral_centroid_mean,delta spectral_spread_mean,delta spectral_entropy_mean,delta spectral_flux_mean,delta spectral_rolloff_mean,delta mfcc_1_mean,delta mfcc_2_mean,delta mfcc_3_mean,delta mfcc_4_mean,delta mfcc_5_mean,delta mfcc_6_mean,delta mfcc_7_mean,delta mfcc_8_mean,delta mfcc_9_mean,delta mfcc_10_mean,delta mfcc_11_mean,delta mfcc_12_mean,delta mfcc_13_mean,delta chroma_1_mean,delta chroma_2_mean,delta chroma_3_mean,delta chroma_4_mean,delta chroma_5_mean,delta chroma_6_mean,delta chroma_7_mean,delta chroma_8_mean,delta chroma_9_mean,delta chroma_10_mean,delta chroma_11_mean,delta chroma_12_mean,delta chroma_std_mean,zcr_std,energy_std,energy_entropy_std,spectral_centroid_std,spectral_spread_std,spectral_entropy_std,spectral_flux_std,spectral_rolloff_std,mfcc_1_std,mfcc_2_std,mfcc_3_std,mfcc_4_std,mfcc_5_std,mfcc_6_std,mfcc_7_std,mfcc_8_std,mfcc_9_std,mfcc_10_std,mfcc_11_std,mfcc_12_std,mfcc_13_std,chroma_1_std,chroma_2_std,chroma_3_std,chroma_4_std,chroma_5_std,chroma_6_std,chroma_7_std,chroma_8_std,chroma_9_std,chroma_10_std,chroma_11_std,chroma_12_std,chroma_std_std,delta zcr_std,delta energy_std,delta energy_entropy_std,delta spectral_centroid_std,delta spectral_spread_std,delta spectral_entropy_std,delta spectral_flux_std,delta spectral_rolloff_std,delta mfcc_1_std,delta mfcc_2_std,delta mfcc_3_std,delta mfcc_4_std,delta mfcc_5_std,delta mfcc_6_std,delta mfcc_7_std,delta mfcc_8_std,delta mfcc_9_std,delta mfcc_10_std,delta mfcc_11_std,delta mfcc_12_std,delta mfcc_13_std,delta chroma_1_std,delta chroma_2_std,delta chroma_3_std,delta chroma_4_std,delta chroma_5_std,delta chroma_6_std,delta chroma_7_std,delta chroma_8_std,delta chroma_9_std,delta chroma_10_std,delta chroma_11_std,delta chroma_12_std,delta chroma_std_std,zero_crossing_rate,root_mean_square_energy,slope_sign_changes_ratio,hjorth_mobility,hjorth_complexity,mean,variance,standard_deviation,interquartile_range,dfa
0,1,1,2023-04-29,salad,1,1,A1,0.389257,0.355223,0.315986,0.303726,0.29309,0.287535,0.27395,0.261706,0.257738,0.259543,0.254311,0.243789,0.235699,1.732068,1.13102,1.039882,0.974513,0.963384,0.934806,0.90145,0.919531,0.873621,0.818862,0.795026,0.768581,0.717852,3.9e-05,0.088304,3.261203,0.032223,0.081098,0.004826,0.018708,0.000211,-62.296443,1.235709,0.155466,0.2112,0.092647,0.102716,0.061738,0.059721,0.037312,0.03064,0.018006,0.026201,0.015822,4.4e-05,8e-06,0.00137,4.3e-05,0.196797,2.4e-05,0.00558,3e-06,1e-05,3.4e-05,0.000326,1.2e-05,0.054775,0.0,-7.2e-05,-0.001124,0.000538,0.001182,6.3e-05,0.0002189057,0.0,0.393135,0.013044,0.001649,0.002161,0.000401,-0.000133,-0.000671,0.000761,0.00147,0.000749,-0.00025,0.000223,5.4e-05,8.808522e-07,1.245783e-07,1.5e-05,1.390759e-07,-4.3e-05,2.6653e-07,8.3e-05,6.089822e-08,2.136081e-07,6.515224e-07,2.399732e-06,1.328375e-07,-1.396324e-05,0.000155,0.053899,0.09915,0.019595,0.044021,0.010882,0.027826,0.00089,17.71012,0.626208,0.098801,0.128861,0.079956,0.096823,0.064831,0.082795,0.058029,0.062318,0.064395,0.061512,0.060276,0.000123,2.2e-05,0.00395,0.000125,0.007404,7e-05,0.011644,6e-06,2.1e-05,6.6e-05,0.001041,3.5e-05,0.00069,0.000226,0.05179,0.124222,0.021412,0.048003,0.014509,0.033412,0.001294,20.364791,0.738132,0.124926,0.155772,0.101129,0.130712,0.09025,0.114492,0.08103,0.087288,0.094701,0.086756,0.087719,0.000167,3.1e-05,0.005555,0.000169,0.009925,9.6e-05,0.015216,8e-06,2.8e-05,8.5e-05,0.00148,4.7e-05,0.000866,1.7e-05,1.443897,0.018133,1.7e-05,80464.14567,-0.069488,2.080011,1.442224,1.082166,1.652581
1,1,1,2023-04-29,salad,1,1,G1,0.398116,0.366071,0.342363,0.339818,0.326214,0.313437,0.303411,0.285781,0.265579,0.278489,0.286063,0.280911,0.27184,1.66542,1.357589,1.297799,1.25557,1.221819,1.186201,1.19418,1.200104,1.18857,1.16916,1.162435,1.157783,1.145291,6.1e-05,0.044348,3.25253,0.03074,0.064558,0.007691,0.056778,0.000467,-73.804381,0.840326,0.103055,0.143203,0.059229,0.075125,0.042734,0.043807,0.030341,0.041312,0.024302,0.0179,0.003372,7.2e-05,1.3e-05,0.003591,5.1e-05,0.19494,3.7e-05,0.007489,8e-06,1.9e-05,0.000105,0.000353,1.8e-05,0.054922,0.0,-0.003808,-0.000826,0.000482,0.001036,4.9e-05,0.00107481,0.0,0.187979,0.006684,0.000802,0.001168,0.000668,0.000815,0.000672,0.000716,0.000428,0.000174,3.8e-05,0.000196,0.000204,5.680567e-07,3.409592e-08,2.2e-05,1.67236e-07,-3e-05,2.507267e-07,4.7e-05,6.422247e-08,7.683687e-08,1.556722e-07,5.281775e-07,7.262784e-08,-9.759584e-06,0.000222,0.027613,0.137673,0.030222,0.066607,0.01693,0.081965,0.001472,24.863888,0.840115,0.1227,0.161991,0.096781,0.091749,0.075389,0.085462,0.070977,0.089754,0.085306,0.066612,0.06941,0.000189,2.9e-05,0.01009,0.000129,0.012504,0.000106,0.017602,1.9e-05,4.4e-05,0.00026,0.000861,4.8e-05,0.001542,0.000325,0.011718,0.187065,0.037112,0.083608,0.02334,0.112736,0.002114,31.386856,1.06662,0.159812,0.204403,0.129304,0.124058,0.110494,0.118117,0.102211,0.130722,0.123358,0.092624,0.099943,0.000273,4.1e-05,0.014192,0.00018,0.017603,0.000152,0.024818,2.7e-05,6.3e-05,0.000355,0.001203,6.9e-05,0.002257,6e-06,0.732309,0.002644,2.1e-05,54194.021625,0.579218,0.200783,0.448089,0.144289,1.640983
2,1,1,2023-04-29,salad,1,1,D1,0.419543,0.390929,0.36941,0.356822,0.337849,0.322247,0.31019,0.30629,0.301611,0.300712,0.298487,0.29322,0.287879,1.74749,1.40123,1.209881,1.099018,1.072264,0.995665,0.939697,0.94076,0.956239,0.949017,0.812542,0.794373,0.735466,8.9e-05,0.199841,3.250993,0.045377,0.128382,0.007561,0.026341,0.000511,-35.565006,2.097352,0.266039,0.372387,0.174122,0.187017,0.090341,0.084718,0.049184,0.074682,0.044365,0.049061,0.041187,5.3e-05,1.2e-05,0.001889,6.2e-05,0.194729,5.1e-05,0.009594,7e-06,1.9e-05,5.5e-05,0.000522,1.4e-05,0.055066,0.0,0.000749,0.000142,-4.6e-05,-6.7e-05,-6e-06,-0.0001128802,0.0,0.007481,3e-06,7.7e-05,0.000391,0.000959,0.001465,0.001078,-0.000214,-0.001,-0.001106,-0.000771,0.000344,0.000403,-1.352994e-08,-1.526116e-08,-3e-06,-9.231683e-08,6e-06,-1.673815e-08,-1.1e-05,-6.088499e-09,-3.72732e-08,1.353775e-08,4.718429e-07,-1.064933e-08,1.856277e-06,0.000298,0.134757,0.135781,0.026961,0.046496,0.016714,0.036898,0.001637,14.466541,0.514074,0.158313,0.168099,0.125613,0.10857,0.129274,0.139768,0.129965,0.131533,0.123465,0.106441,0.099328,0.00013,3.5e-05,0.004742,0.000179,0.013179,0.000144,0.024359,1.9e-05,4.5e-05,0.00012,0.001393,3.4e-05,0.002813,0.000407,0.070984,0.154807,0.026999,0.054341,0.020091,0.048197,0.002127,21.665968,0.774335,0.236108,0.251097,0.185558,0.159615,0.186253,0.195774,0.184927,0.188494,0.186097,0.155339,0.14225,0.000161,4.7e-05,0.005858,0.000232,0.015779,0.00019,0.029291,2.5e-05,5.6e-05,0.00015,0.001731,4.2e-05,0.004106,3.3e-05,1.401224,0.010444,2e-05,59588.992615,-1.019622,0.923799,0.961145,1.515033,1.679972
3,1,1,2023-04-29,salad,1,1,A2,0.403103,0.380859,0.34552,0.296437,0.303942,0.314854,0.309574,0.298721,0.291939,0.290317,0.282787,0.272253,0.267667,1.709369,1.350423,1.236405,1.16479,1.117035,1.004801,0.99475,1.022494,0.982894,0.930718,0.901839,0.818645,0.751906,8.4e-05,0.125743,3.234316,0.044631,0.123611,0.007564,0.025688,0.000478,-37.377053,2.115316,0.2636,0.373572,0.164805,0.171138,0.0933,0.096777,0.057405,0.058771,0.042997,0.050144,0.034386,7.7e-05,1.3e-05,0.001865,4.6e-05,0.194253,3.3e-05,0.011012,6e-06,2e-05,7e-05,0.000374,1.6e-05,0.054661,0.0,-0.000213,0.001048,-0.000499,-0.001331,-5.9e-05,5.5838e-06,0.0,-0.564227,-0.019478,-0.002359,-0.003431,-0.001445,-0.001665,-0.001109,-0.000952,-0.00064,-0.000619,-0.000209,-0.000306,-0.000566,-2.282429e-07,-7.451886e-08,-1.2e-05,-5.145155e-07,4.2e-05,-1.270376e-07,-8.5e-05,-2.679488e-08,-1.250775e-07,-3.217276e-07,-1.742538e-06,-1.257309e-07,1.331985e-05,0.000211,0.074523,0.117373,0.024525,0.044786,0.012692,0.03681,0.001291,15.108996,0.524597,0.119276,0.128743,0.094552,0.10091,0.083469,0.09824,0.092863,0.104456,0.088897,0.086755,0.079926,0.000177,2.6e-05,0.003996,9.2e-05,0.011103,7.1e-05,0.02123,1.2e-05,3.9e-05,0.00014,0.000773,3e-05,0.001936,0.000255,0.034025,0.141802,0.024051,0.047024,0.01586,0.046305,0.001885,20.131753,0.70729,0.159955,0.17176,0.135574,0.156839,0.11887,0.138968,0.133419,0.139178,0.133851,0.119447,0.111485,0.000245,3.7e-05,0.005623,0.000129,0.014914,9.6e-05,0.028512,1.5e-05,5.1e-05,0.000183,0.001044,3.9e-05,0.0025,6.7e-05,1.631849,0.012356,1.5e-05,87704.209952,-0.912563,1.83016,1.352834,1.442888,1.672634
4,1,1,2023-04-29,salad,1,1,G2,0.378751,0.379956,0.361267,0.336593,0.323019,0.311692,0.303833,0.296744,0.291505,0.285436,0.279096,0.272286,0.26511,1.692541,1.057462,0.982597,0.85713,0.764011,0.762795,0.74549,0.686835,0.684741,0.689881,0.663426,0.624241,0.596304,2.8e-05,0.135619,3.299527,0.022182,0.058285,0.003291,0.019599,0.000233,-69.559411,0.981367,0.104998,0.168913,0.076558,0.085247,0.039533,0.042674,0.031397,0.025925,0.01963,0.023688,0.022486,4.4e-05,4e-06,0.00105,2e-05,0.197666,1.2e-05,0.00409,3e-06,9e-06,2.6e-05,0.000189,4e-06,0.055324,0.0,-0.001916,7.1e-05,-0.000176,-0.000689,-7e-06,-6.884562e-07,0.0,-0.380254,-0.012674,-0.00181,-0.002465,-0.00099,-0.000528,0.000197,-0.000207,-0.000662,-0.000256,0.000175,-0.000286,-0.000345,-5.427917e-08,-4.536108e-09,-2e-06,-1.631209e-08,3e-06,-2.955832e-08,-4e-06,-6.603539e-09,-1.294648e-08,-6.699816e-08,-3.129957e-07,-1.187722e-08,8.928822e-07,0.000121,0.071046,0.04664,0.021223,0.054367,0.009365,0.031474,0.000873,24.009154,0.808268,0.132273,0.158725,0.077649,0.087787,0.075609,0.059313,0.053535,0.05543,0.052982,0.051416,0.04791,0.000161,1.1e-05,0.00339,6e-05,0.007996,3.8e-05,0.014355,1.1e-05,3e-05,7.4e-05,0.000655,1.2e-05,0.001801,0.000176,0.02803,0.048219,0.02173,0.065004,0.011498,0.034853,0.001145,32.133532,1.093845,0.190261,0.218257,0.105405,0.117502,0.106197,0.083109,0.077216,0.079361,0.07351,0.075481,0.069756,0.000223,1.3e-05,0.004282,7.4e-05,0.010778,5.2e-05,0.019752,1.4e-05,3.8e-05,9.1e-05,0.000874,1.5e-05,0.002508,1.7e-05,0.77145,0.003511,2.2e-05,52998.819272,0.371457,0.457155,0.676132,0.577155,1.734061


In [8]:
df= dataset.label_features

## Exploration

In [9]:
for i, column in enumerate(df.columns, start=1):
    print(f"{i}. {column}")

1. id_measurement
2. id_performance
3. datetime
4. plant
5. generation
6. num_eurythmy
7. eurythmy_letter


## RQ1: Eurythmy/Control

In [10]:
rq = dataset.return_subset_given_research_question(1)
train_cols, _ = rq.reduce_features_based_on_target(corr_threshold=0.8)
df= rq.objective_features
df.describe()

Reduced variable features from initial count to 44.


Unnamed: 0,cepstra_4_avg,cepstra_1_std,cepstra_8_std,energy_mean,mfcc_9_mean,mfcc_11_mean,mfcc_12_mean,mfcc_13_mean,chroma_std_mean,delta zcr_mean,delta energy_mean,delta energy_entropy_mean,delta spectral_spread_mean,delta spectral_flux_mean,delta spectral_rolloff_mean,delta mfcc_2_mean,delta mfcc_3_mean,delta mfcc_5_mean,delta mfcc_6_mean,delta mfcc_7_mean,delta mfcc_8_mean,delta mfcc_9_mean,delta mfcc_10_mean,delta mfcc_11_mean,delta mfcc_12_mean,delta mfcc_13_mean,delta chroma_3_mean,delta chroma_4_mean,delta chroma_std_mean,energy_std,delta energy_std,delta spectral_centroid_std,delta spectral_spread_std,delta mfcc_12_std,delta chroma_8_std,root_mean_square_energy,slope_sign_changes_ratio,hjorth_mobility,hjorth_complexity,mean,variance,interquartile_range,dfa,target
count,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0,8638.0
mean,0.231959,1.434192,0.764238,0.142223,0.048298,0.036143,0.038542,0.027227,0.055073,-1.222496e-07,0.000102,-4e-06,-5.638984e-07,0.000117,5.290581e-08,-1.5e-05,1e-06,3.50099e-06,3.172874e-06,-1.321584e-05,-4.770658e-06,3e-06,9.827149e-06,9e-06,5e-06,-1e-06,1.113864e-06,2.483149e-08,1.602757e-07,0.082603,0.047114,0.037737,0.069874,0.119684,4.320477e-05,0.805437,0.01929,1.897173e-05,107338.8,-0.018244,0.58697,0.759324,1.667287,0.49699
std,0.05442,0.165175,0.291974,0.089543,0.01517,0.012283,0.012281,0.010203,0.001354,1.633872e-05,0.001918,0.001779,0.0005489848,0.000867,3.939303e-05,0.006784,0.001154,0.0008894905,0.0009178815,0.0008050021,0.0008626725,0.000864,0.0008499533,0.000807,0.000746,0.000694,0.0001122345,2.935303e-06,4.27788e-05,0.033434,0.026559,0.015117,0.025925,0.025451,3.492923e-05,0.509805,0.041758,2.922485e-05,93429.53,0.566863,1.107337,0.606975,0.124139,0.50002
min,0.009944,0.590801,0.140652,0.002094,-0.004899,-0.007581,-0.003782,-0.004399,0.052889,-0.000897249,-0.006225,-0.019428,-0.001919619,-0.004247,-0.001345455,-0.028958,-0.010954,-0.005533875,-0.0066899,-0.006450216,-0.00683276,-0.00826,-0.006850025,-0.006727,-0.004956,-0.004803,-0.001088257,-3.05837e-05,-0.0002692723,0.001722,0.002389,0.003753,0.014094,0.005947,4.143687e-07,0.029174,0.000218,9.278176e-07,1016.612,-3.808845,0.0001,0.0,0.764225,0.0
25%,0.197538,1.336546,0.597539,0.078015,0.03848,0.028356,0.030303,0.020677,0.054749,0.0,-0.000772,-6e-05,-0.0003630463,-0.000126,-1.1242439999999999e-20,-0.000731,-0.000581,-0.0004601364,-0.0004858454,-0.0003747912,-0.0003959626,-0.000315,-0.000340178,-0.000304,-0.000301,-0.000271,-9.85044e-06,-2.179706e-07,-1.755783e-06,0.057566,0.026966,0.026622,0.050191,0.105731,2.067888e-05,0.47107,0.004502,1.017558e-05,57033.58,-0.34413,0.088234,0.313764,1.589192,0.0
50%,0.232286,1.440971,0.731661,0.123342,0.050869,0.037886,0.040325,0.028057,0.055021,0.0,-5.9e-05,3.3e-05,-1.375512e-12,-4e-06,0.0,0.000102,2.9e-05,9.163763e-07,3.850384e-07,-2.817397e-07,-8.335121e-08,-4e-06,-5.399829e-07,-1.5e-05,-1e-05,-2.3e-05,-6.368817e-07,-1.289684e-08,2.387282e-07,0.078574,0.041561,0.036774,0.062503,0.121796,3.680635e-05,0.708769,0.007873,1.538935e-05,81712.72,-0.024362,0.271246,0.621321,1.66853,0.0
75%,0.266607,1.542298,0.869689,0.18619,0.059624,0.04497,0.047836,0.034038,0.055259,0.0,0.000562,0.000494,0.0003439316,0.000117,0.0,0.004435,0.000657,0.0005027486,0.0005050579,0.0003655214,0.0003869813,0.000322,0.0003388676,0.000299,0.000283,0.000251,7.080609e-07,1.370065e-08,4.533512e-06,0.103552,0.061728,0.047787,0.086752,0.136344,5.703008e-05,1.015839,0.017222,2.24816e-05,123016.1,0.289359,0.638104,1.045749,1.757299,1.0
max,0.532413,2.19756,3.550581,0.720828,0.183647,0.179352,0.073263,0.274418,0.09937,0.0006596526,0.009937,0.010587,0.00208721,0.007813,0.001258333,0.021624,0.005686,0.004794982,0.004833218,0.005314793,0.007078407,0.005396,0.006633932,0.004887,0.004684,0.004634,0.00138682,8.38311e-05,0.0004998233,0.245661,0.225082,0.103615,0.162913,0.220658,0.001019565,5.05252,0.704454,0.00164289,1377426.0,3.937813,23.489277,6.483383,1.982658,1.0


In [11]:
stats_df = compute_stats_and_test(df, 'target')
stats_df.columns= ['Feature', 'Control Avg', 'Eurythmy Avg', 'P-Value', 'Test Used']
stats_df = stats_df.sort_values(by='P-Value', ascending=True)
stats_df.head(30)

Unnamed: 0,Feature,Control Avg,Eurythmy Avg,P-Value,Test Used
36,slope_sign_changes_ratio,0.0297199,0.008734502,0.0,Kruskal-Wallis
4,mfcc_9_mean,0.05344916,0.04308518,1.8074609999999999e-239,Kruskal-Wallis
6,mfcc_12_mean,0.04260532,0.03442938,5.805848999999999e-225,Kruskal-Wallis
7,mfcc_13_mean,0.03033443,0.02408293,2.285189e-207,Kruskal-Wallis
5,mfcc_11_mean,0.04004473,0.03219476,6.011778e-206,Kruskal-Wallis
32,delta spectral_spread_std,0.06324601,0.07658217,8.097108e-135,Kruskal-Wallis
42,dfa,1.696072,1.638154,1.88616e-123,Kruskal-Wallis
30,delta energy_std,0.05086293,0.04332017,1.65845e-35,Kruskal-Wallis
40,variance,0.6926854,0.4799748,8.633461000000001e-33,Kruskal-Wallis
41,interquartile_range,0.8214935,0.6964012,6.493563e-27,Kruskal-Wallis


In [12]:
filtered_df = stats_df[stats_df['P-Value'] < 0.05]
col_widths = [0.17, 0.1, 0.1, 0.1, 0.1] 
export_df_to_image_formatted(filtered_df, FIGURES_DIR/'statistical_test_rq1.png', col_widths=col_widths)

DataFrame exported as image to C:\Users\alvar\Documents\GitHub\Plant-Reactivity-Analysis\reports\figures\statistical_test_rq1.png


## RQ 2: A/G/D

In [13]:
rq = dataset.return_subset_given_research_question(2)
train_cols, _ = rq.reduce_features_based_on_target(corr_threshold=0.8)
df= rq.objective_features
df.describe()

Reduced variable features from initial count to 40.


Unnamed: 0,cepstra_13_avg,cepstra_1_std,cepstra_13_std,energy_mean,mfcc_8_mean,mfcc_12_mean,chroma_std_mean,delta zcr_mean,delta energy_mean,delta energy_entropy_mean,delta spectral_spread_mean,delta spectral_flux_mean,delta mfcc_3_mean,delta mfcc_5_mean,delta mfcc_6_mean,delta mfcc_7_mean,delta mfcc_8_mean,delta mfcc_9_mean,delta mfcc_10_mean,delta mfcc_11_mean,delta mfcc_12_mean,delta mfcc_13_mean,delta chroma_2_mean,delta chroma_std_mean,energy_std,delta energy_std,delta spectral_flux_std,delta mfcc_6_std,delta mfcc_10_std,delta mfcc_12_std,delta chroma_std_std,zero_crossing_rate,slope_sign_changes_ratio,hjorth_mobility,hjorth_complexity,mean,variance,interquartile_range,dfa,target
count,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0,3616.0
mean,0.174513,1.443657,0.63016,0.154024,0.071363,0.034435,0.054992,7.689739e-08,9.9e-05,-2.7e-05,2.452891e-07,0.000115,7e-06,-5.734676e-06,-8.65125e-06,-1.287735e-05,-1.092832e-06,6e-06,1.8e-05,1e-05,8e-06,7.529046e-07,1.009644e-08,-3.666785e-07,0.082351,0.043539,0.106524,0.150478,0.136578,0.12044,0.004313,4.4e-05,0.008743,1.9e-05,99439.087701,-0.002468,0.493052,0.708254,1.638426,0.996405
std,0.050189,0.156185,0.381466,0.095398,0.023187,0.012403,0.000483,4.114426e-06,0.002012,0.001717,0.0005857751,0.000919,0.001236,0.0009363422,0.0009721632,0.0008176873,0.0009088456,0.000904,0.000876,0.000831,0.000774,0.0006945639,7.238308e-07,3.622337e-05,0.033873,0.024205,0.073252,0.028612,0.031264,0.027578,0.00295,4.9e-05,0.018475,1.3e-05,72025.316223,0.629004,0.986088,0.592286,0.119902,0.818012
min,-0.075761,0.756275,0.08131,0.006364,0.00231,-0.002345,0.053028,-1.821825e-05,-0.005625,-0.017037,-0.001919619,-0.004247,-0.006916,-0.004927854,-0.0066899,-0.005364207,-0.00683276,-0.007249,-0.005703,-0.004398,-0.004956,-0.004201057,-1.183658e-05,-0.0002363723,0.013444,0.002389,0.003358,0.01675,0.00603,0.005947,0.000113,0.0,0.000218,2e-06,11604.964763,-3.808845,0.0001,0.0,1.234314,0.0
25%,0.144119,1.345983,0.422623,0.08455,0.055328,0.025697,0.054741,0.0,-0.00088,-4e-05,-0.0004007578,-0.000123,-0.000721,-0.0005406208,-0.0005671199,-0.0004018771,-0.0004203112,-0.000307,-0.000359,-0.000292,-0.000279,-0.0002460028,-3.680576e-08,-1.192823e-06,0.05779,0.025686,0.052424,0.134378,0.118586,0.105837,0.002105,8e-06,0.003565,1e-05,56996.716963,-0.38695,0.075421,0.274521,1.559054,0.0
50%,0.177595,1.44793,0.541005,0.135107,0.073829,0.035275,0.055006,0.0,-5.2e-05,2e-05,-6.805454e-20,-2e-06,1e-06,3.407417e-20,6.623489999999999e-19,-7.07056e-08,-4.2500729999999994e-19,-9e-06,-5e-06,-9e-06,-8e-06,-2.352699e-05,-1.957651e-09,1.467224e-07,0.077129,0.038159,0.088219,0.153352,0.139655,0.124101,0.00372,3e-05,0.00551,1.5e-05,82106.266751,-0.041074,0.225921,0.563464,1.636036,1.0
75%,0.206973,1.55126,0.687633,0.201444,0.089441,0.043746,0.055223,0.0,0.000638,0.000336,0.0003781165,9.8e-05,0.00077,0.0005615642,0.0005530776,0.0003684981,0.0004067538,0.000302,0.000335,0.00028,0.000272,0.0002272288,2.942008e-09,3.173249e-06,0.102986,0.05583,0.141998,0.169386,0.157606,0.138776,0.00592,6.4e-05,0.008668,2.2e-05,118559.788605,0.352626,0.543399,0.985947,1.72046,2.0
max,0.346045,2.19756,3.55467,0.720828,0.124238,0.070404,0.058679,5.01002e-05,0.007962,0.01043,0.00208721,0.007617,0.00504,0.004775662,0.004833218,0.005314793,0.007078407,0.004889,0.005138,0.004887,0.004684,0.004167054,2.375785e-05,0.0004998233,0.245661,0.225082,0.447232,0.229727,0.237491,0.205196,0.019091,0.000423,0.362309,0.000122,838028.974783,3.937813,15.408607,6.483383,1.955223,2.0


In [14]:
stats_df = compute_stats_and_test(df, 'target')
stats_df = stats_df.sort_values(by='P_Value', ascending=True)
stats_df.columns= ['Feature', 'A Avg', 'G Avg', 'D Avg', 'P-Value', 'Test Used']
stats_df.head(9)

Unnamed: 0,Feature,A Avg,G Avg,D Avg,P-Value,Test Used
1,cepstra_1_std,1.425599,1.488751,1.41704,1.1519579999999999e-36,Kruskal-Wallis
0,cepstra_13_avg,0.170089,0.18454,0.169008,1.6742000000000002e-17,Kruskal-Wallis
35,mean,0.060413,-0.012824,-0.055726,6.290599e-05,Kruskal-Wallis
24,energy_std,0.080766,0.085103,0.081216,0.001334879,Kruskal-Wallis
34,hjorth_complexity,102281.98667,95432.981245,100551.593337,0.0034245,Kruskal-Wallis
33,hjorth_mobility,1.8e-05,2e-05,1.9e-05,0.004754483,Kruskal-Wallis
8,delta energy_mean,0.000126,-2.1e-05,0.000192,0.007168732,Kruskal-Wallis
25,delta energy_std,0.042461,0.045158,0.04302,0.01395831,Kruskal-Wallis
17,delta mfcc_9_mean,1.7e-05,-4e-05,4.1e-05,0.02420865,Kruskal-Wallis


In [15]:
filtered_df = stats_df[stats_df['P-Value'] < 0.05]
col_widths = [0.15, 0.1, 0.1, 0.1, 0.1, 0.1] 
export_df_to_image_formatted(filtered_df, FIGURES_DIR/'statistical_test_rq2.png', col_widths=col_widths)

DataFrame exported as image to C:\Users\alvar\Documents\GitHub\Plant-Reactivity-Analysis\reports\figures\statistical_test_rq2.png


## RQ 5: salad/tomato/basil

In [16]:
file_path = FEATURES_ONE_SEC_DIR / "features_dataset_norm_1s_ws0.1_hl0.1.pkl"
dataset= FeaturesDataset.load(file_path)
dataset.make_final_dataset()
dataset.features.shape

Dataset loaded from C:\Users\alvar\Documents\GitHub\Plant-Reactivity-Analysis\data\processed\segmented_by_one_second\features_dataset_norm_1s_ws0.1_hl0.1.pkl. Shape: (148682, 188)
Removing columns with NaN values: ['skewness', 'kurtosis']


(134182, 180)

In [17]:
dataset.features.head()

Unnamed: 0,id_measurement,id_performance,datetime,plant,generation,num_eurythmy,initial_second,eurythmy_letter,cepstra_1_avg,cepstra_2_avg,cepstra_3_avg,cepstra_4_avg,cepstra_5_avg,cepstra_6_avg,cepstra_7_avg,cepstra_8_avg,cepstra_9_avg,cepstra_10_avg,cepstra_11_avg,cepstra_12_avg,cepstra_13_avg,cepstra_1_std,cepstra_2_std,cepstra_3_std,cepstra_4_std,cepstra_5_std,cepstra_6_std,cepstra_7_std,cepstra_8_std,cepstra_9_std,cepstra_10_std,cepstra_11_std,cepstra_12_std,cepstra_13_std,zcr_mean,energy_mean,energy_entropy_mean,spectral_centroid_mean,spectral_spread_mean,spectral_entropy_mean,spectral_flux_mean,spectral_rolloff_mean,mfcc_1_mean,mfcc_2_mean,mfcc_3_mean,mfcc_4_mean,mfcc_5_mean,mfcc_6_mean,mfcc_7_mean,mfcc_8_mean,mfcc_9_mean,mfcc_10_mean,mfcc_11_mean,mfcc_12_mean,mfcc_13_mean,chroma_1_mean,chroma_2_mean,chroma_3_mean,chroma_4_mean,chroma_5_mean,chroma_6_mean,chroma_7_mean,chroma_8_mean,chroma_9_mean,chroma_10_mean,chroma_11_mean,chroma_12_mean,chroma_std_mean,delta zcr_mean,delta energy_mean,delta energy_entropy_mean,delta spectral_centroid_mean,delta spectral_spread_mean,delta spectral_entropy_mean,delta spectral_flux_mean,delta spectral_rolloff_mean,delta mfcc_1_mean,delta mfcc_2_mean,delta mfcc_3_mean,delta mfcc_4_mean,delta mfcc_5_mean,delta mfcc_6_mean,delta mfcc_7_mean,delta mfcc_8_mean,delta mfcc_9_mean,delta mfcc_10_mean,delta mfcc_11_mean,delta mfcc_12_mean,delta mfcc_13_mean,delta chroma_1_mean,delta chroma_2_mean,delta chroma_3_mean,delta chroma_4_mean,delta chroma_5_mean,delta chroma_6_mean,delta chroma_7_mean,delta chroma_8_mean,delta chroma_9_mean,delta chroma_10_mean,delta chroma_11_mean,delta chroma_12_mean,delta chroma_std_mean,zcr_std,energy_std,energy_entropy_std,spectral_centroid_std,spectral_spread_std,spectral_entropy_std,spectral_flux_std,spectral_rolloff_std,mfcc_1_std,mfcc_2_std,mfcc_3_std,mfcc_4_std,mfcc_5_std,mfcc_6_std,mfcc_7_std,mfcc_8_std,mfcc_9_std,mfcc_10_std,mfcc_11_std,mfcc_12_std,mfcc_13_std,chroma_1_std,chroma_2_std,chroma_3_std,chroma_4_std,chroma_5_std,chroma_6_std,chroma_7_std,chroma_8_std,chroma_9_std,chroma_10_std,chroma_11_std,chroma_12_std,chroma_std_std,delta zcr_std,delta energy_std,delta energy_entropy_std,delta spectral_centroid_std,delta spectral_spread_std,delta spectral_entropy_std,delta spectral_flux_std,delta spectral_rolloff_std,delta mfcc_1_std,delta mfcc_2_std,delta mfcc_3_std,delta mfcc_4_std,delta mfcc_5_std,delta mfcc_6_std,delta mfcc_7_std,delta mfcc_8_std,delta mfcc_9_std,delta mfcc_10_std,delta mfcc_11_std,delta mfcc_12_std,delta mfcc_13_std,delta chroma_1_std,delta chroma_2_std,delta chroma_3_std,delta chroma_4_std,delta chroma_5_std,delta chroma_6_std,delta chroma_7_std,delta chroma_8_std,delta chroma_9_std,delta chroma_10_std,delta chroma_11_std,delta chroma_12_std,delta chroma_std_std,zero_crossing_rate,root_mean_square_energy,slope_sign_changes_ratio,hjorth_mobility,hjorth_complexity,mean,variance,standard_deviation,interquartile_range,dfa
0,1,1,2023-04-29,salad,1,1,1.0,,0.443816,0.385157,0.321426,0.302246,0.274912,0.261801,0.24501,0.233681,0.222931,0.212461,0.204595,0.196226,0.188478,1.472362,1.850312,1.86243,1.873392,1.888793,1.899191,1.911235,1.934584,1.940255,1.944491,1.959687,1.964461,1.968887,0.000301,0.164519,3.184429,0.052803,0.089168,0.030024,0.368747,0.003,-69.735895,0.882347,0.108476,0.168822,0.088998,0.084346,0.026148,0.037402,0.026578,0.013253,0.016368,0.024552,0.014478,0.000148,5.1e-05,0.008176,0.000177,0.173422,0.000211,0.051121,2.5e-05,0.000136,0.000371,0.002072,5.7e-05,0.058895,0.0,0.009676,0.054039,-0.006269,-0.011371,-0.002882,-2.775558e-18,-0.0004,-3.650123,-0.112098,-0.012534,-0.019378,-0.007897,-0.00749,-0.00226,-0.002381,-0.001655,-0.002445,-0.000171,0.000373,0.000328,-5.130712e-06,-3.478963e-06,-0.001428,-2.21071e-05,0.001937,-2.097858e-05,-0.002818,-1.526609e-06,-5.174397e-06,-6.074244e-05,-0.0001528351,-2.499173e-06,0.000494,0.000301,0.052613,0.130032,0.04076,0.068871,0.029409,0.099481,0.003,21.965489,0.683487,0.083404,0.130043,0.073204,0.069373,0.034448,0.032612,0.027463,0.019879,0.016577,0.025293,0.035041,0.000144,5e-05,0.008052,0.000175,0.026274,0.00021,0.050575,2.5e-05,0.000134,0.000368,0.002045,5.6e-05,0.007499,0.000601,0.058558,0.213492,0.083294,0.146668,0.056551,0.137043,0.0056,47.58111,1.453645,0.179341,0.279466,0.154305,0.146238,0.054668,0.067612,0.048761,0.030588,0.032819,0.050217,0.0631,0.000287,9.7e-05,0.0148,0.00033,0.050915,0.000399,0.098879,4.9e-05,0.000265,0.000679,0.003964,0.00011,0.012545,0.0,0.684184,0.0024,0.000102,10430.355407,0.677048,0.009713,0.098554,0.216433,1.477967
1,1,1,2023-04-29,salad,1,1,3.0,,0.461998,0.403159,0.340146,0.319861,0.29371,0.279697,0.263125,0.252419,0.240283,0.231621,0.222047,0.214773,0.206867,1.536649,3.551294,3.601346,3.608772,3.650179,3.661265,3.680525,3.673886,3.705169,3.707612,3.717843,3.711027,3.733217,0.000301,0.09162,3.233607,0.028,0.047857,0.01955,0.149778,0.0022,-80.916525,0.576465,0.075878,0.10552,0.045656,0.038823,0.011815,0.021011,0.022596,0.017585,0.002661,0.015452,0.016334,0.000246,3.4e-05,0.008891,0.000292,0.185402,9e-05,0.02193,2.2e-05,6.1e-05,0.0003,0.001591,4.4e-05,0.056155,0.0,0.035539,-0.001155,0.002033,0.007109,8.8e-05,0.0001415231,0.0,3.589483,0.112679,0.013008,0.020185,0.009251,0.009756,0.004466,0.003597,0.001653,0.003066,0.00138,0.001502,0.000766,4.577879e-07,4.251761e-08,3.2e-05,6.169778e-07,-4.1e-05,1.163833e-07,6.4e-05,2.105381e-08,3.33939e-07,4.901655e-07,8.296955e-07,1.541847e-07,-1.4e-05,0.000301,0.024035,0.076543,0.014082,0.01692,0.018934,0.024339,0.0022,3.721991,0.119955,0.021353,0.02664,0.012469,0.01864,0.018443,0.011421,0.013556,0.007493,0.012011,0.007328,0.012772,0.000244,3.4e-05,0.008794,0.000289,0.014189,8.7e-05,0.02114,2.2e-05,5.9e-05,0.000298,0.001556,4.3e-05,0.002432,0.000601,0.01258,0.156338,0.030791,0.041677,0.038069,0.173203,0.0044,11.033465,0.352589,0.055714,0.072743,0.031693,0.029633,0.033239,0.015862,0.028765,0.018053,0.022815,0.016157,0.02631,0.000488,6.8e-05,0.017629,0.00058,0.028484,0.000176,0.042391,4.3e-05,0.000119,0.000596,0.003122,8.7e-05,0.004898,0.0,0.856583,0.0016,9.6e-05,10756.775509,0.848875,0.013146,0.114655,0.0,1.466434
2,1,1,2023-04-29,salad,1,1,4.0,,0.503524,0.443089,0.375127,0.356373,0.333688,0.322915,0.30614,0.292204,0.276309,0.267278,0.260839,0.257271,0.25036,1.600153,0.570633,0.393736,0.327095,0.366903,0.338681,0.299444,0.250763,0.26256,0.27385,0.265762,0.30246,0.283598,0.000902,0.301865,2.898995,0.086641,0.163474,0.040499,0.173609,0.0036,-40.556759,1.799058,0.189652,0.349123,0.130694,0.15776,0.044183,0.0796,0.035975,0.060196,0.031798,0.05175,0.031367,0.000489,4.8e-05,0.015003,0.000256,0.164799,0.000275,0.064128,5.4e-05,0.000153,0.000617,0.00177,0.000104,0.055432,0.0,0.020069,0.001729,-0.000576,-0.000766,-9.4e-05,5.426765e-06,0.0,-0.000543,-0.000122,0.000826,0.001531,0.001314,-0.000602,-0.003563,-0.004664,-0.001209,0.003378,0.003228,-0.000695,-0.001102,-2.586023e-07,-1.254436e-07,-4.2e-05,-6.121025e-07,6.8e-05,-4.444606e-07,-0.000113,3.05006e-08,-7.368124e-08,-1.785582e-06,-3.379681e-06,-8.230836e-08,2.2e-05,0.000301,0.069519,0.264191,0.019,0.028963,0.016309,0.063684,0.002,7.61268,0.295892,0.099493,0.060655,0.057355,0.043607,0.049868,0.036084,0.046832,0.021821,0.036633,0.015629,0.02723,0.000211,3.4e-05,0.009455,0.000176,0.018829,0.000182,0.037123,2.8e-05,7.2e-05,0.000432,0.001255,7.7e-05,0.003922,0.000802,0.139085,0.543647,0.047274,0.075865,0.039039,0.120318,0.0048,22.313688,0.817349,0.204259,0.145564,0.119993,0.105226,0.113521,0.073597,0.093907,0.050559,0.069454,0.038091,0.05243,0.000435,7.3e-05,0.018591,0.000345,0.032056,0.000359,0.055246,3.5e-05,0.000104,0.000809,0.002687,0.000165,0.006479,0.0,1.095662,0.005,0.000117,8896.555762,1.084073,0.025261,0.158937,0.216433,1.572073
3,1,1,2023-04-29,salad,1,1,6.0,,0.459805,0.401018,0.334116,0.310582,0.279953,0.263509,0.245208,0.235656,0.2259,0.221053,0.215237,0.211273,0.20557,1.382767,2.724857,2.767472,2.85323,2.834479,2.850008,2.822976,2.815756,2.844341,2.903739,2.860925,2.858915,2.888661,0.0001,0.19439,3.267505,0.033522,0.072002,0.006145,0.057922,0.0004,-70.502302,0.900905,0.140061,0.17957,0.100482,0.093122,0.038081,0.01401,0.001823,0.026215,0.040258,0.029401,0.007903,5.6e-05,9e-06,0.001456,5.9e-05,0.19709,2.8e-05,0.004637,7e-06,1.9e-05,8e-05,0.00027,7e-06,0.054423,0.0,0.045559,0.001249,-0.002644,-0.007995,-0.000164,0.002242049,0.0,-3.487587,-0.10845,-0.01212,-0.019913,-0.007053,-0.007628,-0.003099,-0.003741,-0.001735,-0.00187,-0.000392,-0.00199,-0.001492,-1.636026e-06,-1.30779e-07,-4.4e-05,-1.005706e-06,5.9e-05,-6.141069e-08,-7.3e-05,-1.884305e-07,-3.213274e-07,-8.028811e-07,-1.125677e-05,-1.028443e-07,2e-05,0.0001,0.043794,0.022007,0.014902,0.037118,0.002645,0.043889,0.0,14.214582,0.443047,0.082612,0.09952,0.063029,0.056043,0.039379,0.047241,0.052462,0.040814,0.022439,0.034267,0.011226,2.5e-05,4e-06,0.000561,4.2e-05,0.000826,1.3e-05,0.000852,6e-06,1.7e-05,6.2e-05,0.000136,2e-06,0.000261,0.0002,0.037851,0.0218,0.02716,0.06624,0.006116,0.076764,0.0004,24.74204,0.769821,0.152366,0.179144,0.11902,0.097849,0.060481,0.071042,0.079594,0.076616,0.042257,0.063382,0.01619,3.1e-05,9e-06,0.00134,9.6e-05,0.002397,2.5e-05,0.003225,1.2e-05,3.3e-05,0.000128,0.000274,7e-06,0.000683,0.00015,0.651818,0.0046,0.000123,8861.589754,0.556077,0.115645,0.340066,0.288578,1.582356
4,1,1,2023-04-29,salad,1,1,7.0,,0.37587,0.307929,0.2239,0.210054,0.180304,0.183507,0.171052,0.135535,0.060958,0.077808,0.067893,0.078155,0.084868,1.505017,0.730727,0.406101,0.342104,0.376048,0.353054,0.347724,0.311127,0.32293,0.251039,0.23182,0.263277,0.261275,0.0001,0.350812,3.207922,0.065882,0.159765,0.019219,0.03608,0.0016,-31.001112,2.197635,0.289094,0.39802,0.197757,0.214455,0.110285,0.124878,0.065293,0.081207,0.080729,0.037003,-0.014297,0.000332,2.7e-05,0.003575,0.000163,0.189001,7.7e-05,0.018613,2.2e-05,7.1e-05,0.000301,0.001256,6.2e-05,0.05525,0.0,0.016904,0.002181,-0.001328,-0.001628,-0.000217,0.00107659,0.0,-0.035102,-0.004012,-0.001222,0.001762,0.009574,0.010083,0.001003,-0.003292,0.008889,0.011255,0.006015,0.010968,0.005663,-8.358992e-07,-2.511769e-07,-4e-05,-7.389726e-07,8e-05,-2.020531e-06,-0.000116,-1.25924e-07,-5.325569e-07,-3.791884e-06,-1.202293e-05,-5.202581e-07,2.6e-05,0.0001,0.095461,0.056385,0.015422,0.021588,0.015034,0.014914,0.0016,4.081132,0.139982,0.048133,0.05016,0.037542,0.038786,0.041463,0.070169,0.072403,0.044619,0.056413,0.073035,0.033518,0.000296,2.4e-05,0.002698,0.000139,0.008805,6.1e-05,0.014675,1.8e-05,6.5e-05,0.000261,0.00111,5.7e-05,0.001444,0.0002,0.104267,0.055666,0.01584,0.028217,0.024478,0.023345,0.0032,7.910899,0.266712,0.093074,0.096041,0.059437,0.064295,0.087738,0.116041,0.130228,0.090843,0.066942,0.107365,0.071901,0.000563,3.8e-05,0.004367,0.000236,0.015351,0.000114,0.025731,3.2e-05,0.000113,0.000469,0.00215,0.000108,0.002658,0.0001,1.140662,0.0124,5.3e-05,25708.144298,-0.948113,0.402191,0.634185,1.154311,1.656375


In [18]:
rq = dataset.return_subset_given_research_question(5)
train_cols, _ = rq.reduce_features_based_on_target(corr_threshold=0.8)
df= rq.objective_features
df.describe()

  df.loc[:, "plant"] = df["plant"].replace(plant_mapping).astype(int)


Reduced variable features from initial count to 61.


Unnamed: 0,cepstra_4_avg,cepstra_1_std,zcr_mean,energy_mean,mfcc_3_mean,mfcc_7_mean,mfcc_8_mean,mfcc_9_mean,mfcc_10_mean,mfcc_11_mean,mfcc_12_mean,mfcc_13_mean,chroma_std_mean,delta zcr_mean,delta energy_mean,delta energy_entropy_mean,delta spectral_centroid_mean,delta spectral_flux_mean,delta mfcc_3_mean,delta mfcc_4_mean,delta mfcc_5_mean,delta mfcc_6_mean,delta mfcc_7_mean,delta mfcc_8_mean,delta mfcc_9_mean,delta mfcc_10_mean,delta mfcc_11_mean,delta mfcc_12_mean,delta mfcc_13_mean,delta chroma_8_mean,delta chroma_11_mean,delta chroma_12_mean,delta chroma_std_mean,energy_std,energy_entropy_std,spectral_spread_std,mfcc_5_std,mfcc_6_std,mfcc_9_std,mfcc_10_std,mfcc_13_std,delta energy_std,delta spectral_flux_std,delta spectral_rolloff_std,delta mfcc_3_std,delta mfcc_7_std,delta mfcc_8_std,delta mfcc_11_std,delta mfcc_12_std,delta chroma_2_std,delta chroma_3_std,delta chroma_11_std,root_mean_square_energy,slope_sign_changes_ratio,hjorth_mobility,hjorth_complexity,mean,variance,interquartile_range,dfa,target
count,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0,134182.0
mean,0.259014,1.337116,0.00049,0.219701,0.231794,0.084105,0.085871,0.051566,0.056913,0.038511,0.041128,0.029058,0.055078,1.056651e-07,-0.0001832448,-9.1e-05,1e-06,0.004945652,-3e-05,-1.4e-05,-1e-05,1.4e-05,2.2e-05,-2e-06,-1.4e-05,1.3e-05,2.3e-05,6e-06,-5e-06,1.392027e-08,-2.736484e-07,-1.073233e-08,-1e-06,0.06223,0.1300926,0.032366,0.051588,0.053772,0.04696,0.04786,0.03929,0.0813,0.113849,0.004191,0.109564,0.080451,0.084512,0.077979,0.073932,7.866991e-05,0.013579,0.002572458,0.744447,0.020873,0.000134,11517.510795,0.007463,0.1895401,0.393519,1.651291,0.940573
std,0.107282,0.275986,0.002507,0.10386,0.063868,0.031284,0.032014,0.027264,0.027962,0.02519,0.023744,0.021466,0.002502,0.000137943,0.02391764,0.021365,0.003308,0.008694181,0.010371,0.012371,0.007972,0.008251,0.007363,0.007742,0.007727,0.007749,0.007395,0.006899,0.00631,5.967317e-06,0.0002837043,1.213557e-05,0.00053,0.027234,0.07965894,0.016238,0.020256,0.021095,0.020428,0.020566,0.017133,0.043881,0.078158,0.004088,0.050724,0.033385,0.035349,0.034986,0.032156,7.267296e-05,0.009578,0.001943882,0.710136,0.047907,0.000108,9173.113112,0.932137,1.215252,0.570432,0.178901,0.808287
min,-0.563071,0.110639,0.0,0.001101,-0.349651,-0.080875,-0.160629,-0.171677,-0.183981,-0.103568,-0.12681,-0.178124,0.047377,-0.007715431,-0.04999845,-0.166093,-0.023984,-1.5265570000000002e-17,-0.114326,-0.061046,-0.057859,-0.06096,-0.0708,-0.058014,-0.064805,-0.064944,-0.065445,-0.054382,-0.051932,-0.0002913588,-0.004264865,-0.0004142274,-0.004326,6.3e-05,1.709553e-08,3e-05,3.6e-05,6.8e-05,6e-06,0.000112,1.4e-05,0.000169,0.003931,0.0,0.000647,0.000731,0.00022,4.5e-05,1.2e-05,1.003297e-07,1.9e-05,7.330272e-07,0.005233,0.0002,3e-06,185.766976,-11.236789,6.129511e-07,0.0,-2.537159,0.0
25%,0.200312,1.163892,0.0001,0.147265,0.195039,0.064807,0.065851,0.03458,0.039273,0.022524,0.025484,0.014963,0.053688,0.0,-0.01779898,-0.004327,-0.001964,0.0001522494,-0.003715,-0.003652,-0.003668,-0.003911,-0.003597,-0.003753,-0.002954,-0.003093,-0.002634,-0.002595,-0.002367,-2.376822e-07,-1.442763e-05,-5.188101e-07,-5.9e-05,0.041728,0.07204166,0.02058,0.037235,0.038837,0.032646,0.033394,0.027233,0.047749,0.05785,0.002,0.071221,0.057082,0.059561,0.053419,0.051389,3.302672e-05,0.006271,0.001118136,0.305239,0.0046,9.1e-05,7348.691794,-0.452603,0.00723791,0.107444,1.520411,0.0
50%,0.275749,1.358303,0.000301,0.204555,0.250278,0.087823,0.089964,0.05312,0.058787,0.039117,0.041662,0.028656,0.054611,0.0,-1.387779e-18,0.0,0.0,0.001149831,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058532,0.1183441,0.027837,0.049351,0.051386,0.044568,0.045473,0.037934,0.075325,0.093373,0.0036,0.102779,0.075626,0.07933,0.074218,0.070594,6.253443e-05,0.011726,0.00213141,0.556527,0.0084,0.000114,9544.55599,-0.00997,0.0296317,0.236599,1.621,1.0
75%,0.332744,1.529284,0.000601,0.274323,0.280227,0.105535,0.108521,0.069562,0.075597,0.054743,0.056851,0.042425,0.056084,0.0,0.01701739,0.004245,0.001965,0.005545723,0.003636,0.003653,0.003645,0.003925,0.003615,0.003717,0.00293,0.00314,0.002668,0.002598,0.002353,2.356492e-07,1.402446e-05,5.255619e-07,5.9e-05,0.078158,0.1695572,0.04077,0.063753,0.066451,0.058748,0.059752,0.049722,0.107157,0.147137,0.0056,0.140952,0.098962,0.104548,0.098451,0.092915,0.0001035925,0.0188,0.003540654,0.948728,0.0182,0.000154,12348.681545,0.445461,0.1082047,0.473954,1.781714,2.0
max,0.611167,2.269204,0.200501,0.999508,0.530982,0.462162,0.415453,0.324845,0.23226,0.43348,0.161948,0.368223,0.114353,0.008817635,0.04999925,0.166042,0.01952,0.05325878,0.087053,0.06475,0.05851,0.064817,0.067318,0.059594,0.069063,0.060438,0.05581,0.054482,0.050417,0.0003016455,0.004993892,0.000428852,0.004271,0.278265,0.845097,0.117044,0.229381,0.20654,0.202759,0.196531,0.147768,0.424611,0.601972,0.1686,0.510559,0.332832,0.320493,0.339028,0.292712,0.002024261,0.141258,0.02090869,13.631042,0.7185,0.009244,341549.108094,11.428372,144.3455,17.253806,3.098123,2.0


In [23]:
stats_df = compute_stats_and_test(df, 'target')
stats_df = stats_df.sort_values(by='P_Value', ascending=True)
stats_df.columns= ['Feature', 'Salad Avg', 'Tomato Avg', 'Basil Avg', 'P-Value', 'Test Used']
stats_df.head(9)



Unnamed: 0,Feature,Salad Avg,Tomato Avg,Basil Avg,P-Value,Test Used
59,dfa,1.651937,1.627146,1.678253,0.0,Kruskal-Wallis
53,slope_sign_changes_ratio,0.021972,0.015237,0.02603,3.539622e-198,Kruskal-Wallis
44,delta mfcc_3_std,0.108462,0.11364,0.106202,1.151653e-118,Kruskal-Wallis
37,mfcc_6_std,0.053131,0.055436,0.052632,1.11023e-105,Kruskal-Wallis
36,mfcc_5_std,0.050976,0.053142,0.050536,1.9216790000000002e-95,Kruskal-Wallis
2,zcr_mean,0.000376,0.000413,0.000714,4.920630999999999e-91,Kruskal-Wallis
39,mfcc_10_std,0.047171,0.049369,0.046953,3.366576e-83,Kruskal-Wallis
34,energy_entropy_std,0.125985,0.134221,0.130276,9.851245000000001e-83,Kruskal-Wallis
46,delta mfcc_8_std,0.083671,0.086957,0.082713,7.630383e-80,Kruskal-Wallis


In [25]:
stats_df.head(39)

Unnamed: 0,Feature,Salad Avg,Tomato Avg,Basil Avg,P-Value,Test Used
59,dfa,1.651937,1.627146,1.678253,0.0,Kruskal-Wallis
53,slope_sign_changes_ratio,0.021972,0.015237,0.02603,3.539622e-198,Kruskal-Wallis
44,delta mfcc_3_std,0.108462,0.11364,0.106202,1.151653e-118,Kruskal-Wallis
37,mfcc_6_std,0.053131,0.055436,0.052632,1.11023e-105,Kruskal-Wallis
36,mfcc_5_std,0.050976,0.053142,0.050536,1.9216790000000002e-95,Kruskal-Wallis
2,zcr_mean,0.000376,0.000413,0.000714,4.920630999999999e-91,Kruskal-Wallis
39,mfcc_10_std,0.047171,0.049369,0.046953,3.366576e-83,Kruskal-Wallis
34,energy_entropy_std,0.125985,0.134221,0.130276,9.851245000000001e-83,Kruskal-Wallis
46,delta mfcc_8_std,0.083671,0.086957,0.082713,7.630383e-80,Kruskal-Wallis
45,delta mfcc_7_std,0.07945,0.082771,0.078985,5.930491000000001e-75,Kruskal-Wallis
