In [10]:
# Inspect Cookie Theft Feature Set

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import math
from IPython.display import display

# show full dataframes and outputs
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 1000)
pd.set_option("display.max_colwidth", None)

# paths
file_path = "/Users/gilanorup/Desktop/Studium/MSc/MA/code/masters_thesis_gn/results/features/cookieTheft.csv"
save_dir = "/Users/gilanorup/Desktop/Studium/MSc/MA/code/masters_thesis_gn/results/plots/feature_distribution_cookieTheft"
os.makedirs(save_dir, exist_ok=True)

# load data
df = pd.read_csv(file_path)


# 1. Basic Information
print("Dataset shape:", df.shape)
print("\nFirst few rows:")
display(df.head())

# 2. Description 
print("\nDescriptive statistics (with variance):")
desc_stats = df.describe()
desc_stats.loc["var"] = df.var(numeric_only=True)
# coefficient of variation (std / mean)
mean = desc_stats.loc["mean"]
std = desc_stats.loc["std"]
cv = std / mean
desc_stats.loc["cv"] = cv
# display
display(desc_stats)
# save
desc_stats_path = os.path.join(save_dir, "descriptive_statistics.csv")
desc_stats.to_csv(desc_stats_path)
print(f"Saved descriptive statistics to: {desc_stats_path}")

# 3. Features with low variance 
std_threshold = 0.01
low_var_cols = df.loc[:, df.std() < std_threshold]
print(f"\nFeatures with low variance (std < {std_threshold}):")
display(pd.DataFrame(low_var_cols.columns, columns=["Low-Variance Features"]))


# 4. Distribution Plots (save all) 
df_subset = df.select_dtypes(include=[np.number]).drop(columns=["Subject_ID"], errors="ignore")
n_features = df_subset.shape[1]

# Plot settings
cols_per_row = 4
rows_per_page = 5
features_per_page = cols_per_row * rows_per_page
n_pages = math.ceil(n_features / features_per_page)

print(f"\nPlotting {n_features} features in {n_pages} pages...")

for i in range(n_pages):
    start = i * features_per_page
    end = min(start + features_per_page, n_features)
    df_page = df_subset.iloc[:, start:end]

    n_cols = min(df_page.shape[1], cols_per_row)
    n_rows = math.ceil(df_page.shape[1] / cols_per_row)

    fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 4, n_rows * 3))
    axes = axes.flatten()

    for j, col in enumerate(df_page.columns):
        ax = axes[j]
        ax.hist(df_page[col].dropna(), bins=30, color="steelblue", edgecolor="black")
        ax.set_title(col, fontsize=9)
        ax.tick_params(axis='x', labelsize=8)
        ax.tick_params(axis='y', labelsize=8)

    # Hide any empty axes
    for k in range(j + 1, len(axes)):
        fig.delaxes(axes[k])

    plt.tight_layout()
    save_path = os.path.join(save_dir, f"feature_distribution_page_{i + 1}.png")
    plt.savefig(save_path, dpi=300)
    plt.close()
    print(f"Saved: {save_path}")


Dataset shape: (1002, 129)

First few rows:


Unnamed: 0,Subject_ID,n_words,ttr,mattr,filler_word_ratio,concreteness_score,aoa_average,average_word_length,brunets_index,honores_statistic,guirauds_statistic,ADJ,ADP,ADV,AUX,CCONJ,DET,INTJ,NOUN,NUM,PART,PRON,PROPN,PUNCT,SCONJ,SYM,VERB,OTHER,NOUN/VERB,PRON/NOUN,DET/NOUN,AUX/VERB,OPEN/CLOSED,POS_ENTROPY,LEXICAL_DENSITY,speech_rate,pause_ratio,n_pauses,avg_pause_duration,articulation_rate,hesitation_ratio,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_amean,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_stddevNorm,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_percentile20.0,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_percentile50.0,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_percentile80.0,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,eGeMAPS_loudness_sma3_amean,eGeMAPS_loudness_sma3_stddevNorm,eGeMAPS_loudness_sma3_percentile20.0,eGeMAPS_loudness_sma3_percentile50.0,eGeMAPS_loudness_sma3_percentile80.0,eGeMAPS_loudness_sma3_pctlrange0-2,eGeMAPS_loudness_sma3_meanRisingSlope,eGeMAPS_loudness_sma3_stddevRisingSlope,eGeMAPS_loudness_sma3_meanFallingSlope,eGeMAPS_loudness_sma3_stddevFallingSlope,eGeMAPS_spectralFlux_sma3_amean,eGeMAPS_spectralFlux_sma3_stddevNorm,eGeMAPS_mfcc1_sma3_amean,eGeMAPS_mfcc1_sma3_stddevNorm,eGeMAPS_mfcc2_sma3_amean,eGeMAPS_mfcc2_sma3_stddevNorm,eGeMAPS_mfcc3_sma3_amean,eGeMAPS_mfcc3_sma3_stddevNorm,eGeMAPS_mfcc4_sma3_amean,eGeMAPS_mfcc4_sma3_stddevNorm,eGeMAPS_jitterLocal_sma3nz_amean,eGeMAPS_jitterLocal_sma3nz_stddevNorm,eGeMAPS_shimmerLocaldB_sma3nz_amean,eGeMAPS_shimmerLocaldB_sma3nz_stddevNorm,eGeMAPS_HNRdBACF_sma3nz_amean,eGeMAPS_HNRdBACF_sma3nz_stddevNorm,eGeMAPS_logRelF0-H1-H2_sma3nz_amean,eGeMAPS_logRelF0-H1-H2_sma3nz_stddevNorm,eGeMAPS_logRelF0-H1-A3_sma3nz_amean,eGeMAPS_logRelF0-H1-A3_sma3nz_stddevNorm,eGeMAPS_F1frequency_sma3nz_amean,eGeMAPS_F1frequency_sma3nz_stddevNorm,eGeMAPS_F1bandwidth_sma3nz_amean,eGeMAPS_F1bandwidth_sma3nz_stddevNorm,eGeMAPS_F1amplitudeLogRelF0_sma3nz_amean,eGeMAPS_F1amplitudeLogRelF0_sma3nz_stddevNorm,eGeMAPS_F2frequency_sma3nz_amean,eGeMAPS_F2frequency_sma3nz_stddevNorm,eGeMAPS_F2bandwidth_sma3nz_amean,eGeMAPS_F2bandwidth_sma3nz_stddevNorm,eGeMAPS_F2amplitudeLogRelF0_sma3nz_amean,eGeMAPS_F2amplitudeLogRelF0_sma3nz_stddevNorm,eGeMAPS_F3frequency_sma3nz_amean,eGeMAPS_F3frequency_sma3nz_stddevNorm,eGeMAPS_F3bandwidth_sma3nz_amean,eGeMAPS_F3bandwidth_sma3nz_stddevNorm,eGeMAPS_F3amplitudeLogRelF0_sma3nz_amean,eGeMAPS_F3amplitudeLogRelF0_sma3nz_stddevNorm,eGeMAPS_alphaRatioV_sma3nz_amean,eGeMAPS_alphaRatioV_sma3nz_stddevNorm,eGeMAPS_hammarbergIndexV_sma3nz_amean,eGeMAPS_hammarbergIndexV_sma3nz_stddevNorm,eGeMAPS_slopeV0-500_sma3nz_amean,eGeMAPS_slopeV0-500_sma3nz_stddevNorm,eGeMAPS_slopeV500-1500_sma3nz_amean,eGeMAPS_slopeV500-1500_sma3nz_stddevNorm,eGeMAPS_spectralFluxV_sma3nz_amean,eGeMAPS_spectralFluxV_sma3nz_stddevNorm,eGeMAPS_mfcc1V_sma3nz_amean,eGeMAPS_mfcc1V_sma3nz_stddevNorm,eGeMAPS_mfcc2V_sma3nz_amean,eGeMAPS_mfcc2V_sma3nz_stddevNorm,eGeMAPS_mfcc3V_sma3nz_amean,eGeMAPS_mfcc3V_sma3nz_stddevNorm,eGeMAPS_mfcc4V_sma3nz_amean,eGeMAPS_mfcc4V_sma3nz_stddevNorm,eGeMAPS_alphaRatioUV_sma3nz_amean,eGeMAPS_hammarbergIndexUV_sma3nz_amean,eGeMAPS_slopeUV0-500_sma3nz_amean,eGeMAPS_slopeUV500-1500_sma3nz_amean,eGeMAPS_spectralFluxUV_sma3nz_amean,eGeMAPS_loudnessPeaksPerSec,eGeMAPS_VoicedSegmentsPerSec,eGeMAPS_MeanVoicedSegmentLengthSec,eGeMAPS_StddevVoicedSegmentLengthSec,eGeMAPS_MeanUnvoicedSegmentLength,eGeMAPS_StddevUnvoicedSegmentLength,eGeMAPS_equivalentSoundLevel_dBp
0,41,136,0.507353,0.655632,0.066176,2.687043,4.330152,3.779412,11.506391,1412.38828,5.916701,0.029412,0.117647,0.022059,0.080882,0.044118,0.169118,0.066176,0.242647,0.022059,0.022059,0.073529,0.007353,0.0,0.014706,0.0,0.088235,0.0,2.75,0.30303,0.69697,0.916667,0.837838,2.27715,0.382353,1.954023,0.068534,5.0,0.954,2.098765,0.076293,30.817009,0.22373,28.411087,30.041494,32.661629,4.250542,427.188049,774.085144,174.086609,239.472656,0.862499,0.993167,0.086763,0.617918,1.522618,1.435855,16.277802,9.562093,10.811151,7.656923,0.514631,1.166487,18.421049,1.049356,10.575593,1.729366,12.798391,1.195275,6.610946,2.653372,0.043027,2.176285,1.252774,0.888459,5.438361,0.951855,-1.257739,-10.412514,14.940067,0.969057,568.254272,0.409201,1134.467285,0.249589,-86.784866,-1.014613,1632.154419,0.187431,849.561279,0.557897,-87.889977,-0.949533,2743.325439,0.101919,861.864685,0.746682,-91.086067,-0.885981,-14.526149,-0.837063,24.530426,0.522762,0.025526,1.447703,-0.021225,-0.70584,0.742242,0.843809,26.884993,0.575364,8.909099,1.982747,13.76005,1.261844,1.545999,11.198686,-6.210778,15.74011,-0.001507,7.1e-05,0.125035,2.600949,2.04199,0.300915,0.280535,0.194508,0.307409,-24.3531
1,43,239,0.598326,0.815158,0.033473,2.668586,4.643175,4.205021,11.186823,1957.83572,9.249901,0.046414,0.097046,0.059072,0.050633,0.042194,0.151899,0.033755,0.21097,0.004219,0.025316,0.109705,0.004219,0.0,0.016878,0.0,0.147679,0.0,1.428571,0.52,0.72,0.342857,1.008475,2.282526,0.464135,1.908641,0.162913,25.0,0.816,2.280752,0.15333,27.873039,0.244742,25.812769,27.155924,28.554972,2.742203,340.562378,537.497681,171.846725,464.85025,0.658265,0.951751,0.039011,0.551731,1.199214,1.160203,12.832315,7.387523,9.482431,6.010565,0.42203,1.058222,21.021639,1.003992,5.698454,2.815284,13.828911,1.267389,5.929585,2.648521,0.037601,2.02411,1.272696,0.774195,3.835264,1.214915,10.790902,1.027611,26.562941,0.535634,516.8255,0.489323,1210.08374,0.254181,-95.479485,-0.97358,1501.913086,0.211274,888.608704,0.38609,-97.749763,-0.896627,2577.019531,0.125055,785.443237,0.470797,-101.432716,-0.828769,-14.132645,-0.901226,25.145662,0.508163,0.038142,1.015851,-0.016618,-0.943615,0.646414,0.637726,33.077835,0.507497,5.693821,3.022819,14.535766,1.369926,2.816983,6.242447,-3.342535,13.453954,0.007203,0.006449,0.130028,2.795304,2.021413,0.270435,0.265281,0.224605,0.406454,-27.294552
2,44,198,0.575758,0.750067,0.040404,2.697018,4.451806,4.015152,11.252675,1883.94513,8.101627,0.046154,0.107692,0.035897,0.071795,0.015385,0.148718,0.046154,0.215385,0.0,0.041026,0.112821,0.0,0.0,0.015385,0.0,0.14359,0.0,1.5,0.52381,0.690476,0.5,0.95,2.230852,0.441026,2.419355,0.071114,8.0,0.7275,2.605606,0.071114,27.918575,0.361907,22.132271,24.227198,29.356083,7.223812,395.853729,658.716797,271.64505,512.599976,0.840524,1.043727,0.089164,0.555793,1.502993,1.413829,17.56761,11.005445,11.612098,7.944348,0.598704,1.26351,21.268024,1.113358,-4.088694,-4.350368,12.058125,1.828281,-7.565667,-2.495934,0.050745,1.492144,1.835078,0.665404,0.775902,5.435767,2.328639,5.52858,12.959708,1.344755,646.973999,0.501138,1104.198853,0.271441,-79.409836,-1.181893,1688.564453,0.234748,827.918762,0.398757,-78.959396,-1.113927,2699.875,0.160427,731.925598,0.396179,-79.028091,-1.103977,-7.491331,-1.669789,15.236876,0.892119,0.066417,0.550295,-0.017872,-0.959661,0.861576,0.938217,29.339031,0.794296,-7.625741,-2.461489,16.847227,1.427333,-12.151366,-1.549742,-0.460116,9.874438,0.051444,0.003439,0.14277,2.700721,2.7146,0.22536,0.194449,0.13974,0.21142,-24.828165
3,46,114,0.578947,0.680923,0.0,2.93875,4.328808,3.868421,10.724106,1645.205777,6.181466,0.04386,0.096491,0.035088,0.096491,0.026316,0.201754,0.0,0.254386,0.008772,0.026316,0.078947,0.0,0.0,0.008772,0.0,0.122807,0.0,2.071429,0.310345,0.793103,0.785714,0.83871,2.109641,0.45614,1.737901,0.262515,8.0,2.1525,2.35732,0.262515,25.038744,0.415952,19.999454,21.999908,24.687174,4.687719,624.014343,1083.536011,214.607788,493.373718,0.50175,0.992862,0.116312,0.291725,0.896589,0.780277,9.721425,7.404996,6.338171,4.631199,0.392455,1.347278,19.764307,0.93771,16.357203,0.690979,11.345924,1.183371,7.931947,1.80724,0.042715,1.637136,1.556482,0.687217,2.170117,1.858959,1.787336,8.576129,19.959976,0.819292,562.142517,0.537153,1147.540894,0.263045,-103.525993,-0.867837,1607.603271,0.232354,923.04248,0.479,-107.413284,-0.775127,2690.977783,0.147963,793.748779,0.480733,-108.697632,-0.751778,-18.130322,-0.811879,27.312174,0.563118,-0.018858,-2.14143,-0.018098,-0.858722,0.636629,0.925651,25.857025,0.779876,19.861088,0.607542,15.795684,0.981238,11.436509,1.517492,-8.518676,17.756212,0.000107,-0.005141,0.10588,2.119549,2.122137,0.243885,0.240412,0.215833,0.602715,-26.579708
4,49,123,0.674797,0.790811,0.03252,2.649515,4.605562,4.211382,10.186902,1997.056507,7.483858,0.03252,0.130081,0.056911,0.073171,0.03252,0.154472,0.03252,0.219512,0.00813,0.01626,0.097561,0.01626,0.0,0.00813,0.0,0.121951,0.0,1.8,0.444444,0.703704,0.6,0.921875,2.271229,0.430894,1.840215,0.048923,5.0,0.654,1.935788,0.044434,32.08876,0.316499,27.064182,28.450684,32.090248,5.026066,423.792267,828.299561,149.555603,143.719803,0.848443,0.844547,0.124208,0.690294,1.500597,1.37639,14.351561,7.650517,8.859582,5.93637,0.579782,1.021999,13.509811,1.775005,10.025176,1.70415,10.605463,1.672386,0.768006,20.14069,0.02845,1.757459,1.22742,0.739075,4.654982,1.069395,6.582744,1.660607,23.384291,0.756738,567.487976,0.56182,1070.817627,0.302767,-85.266678,-1.074322,1612.893066,0.240663,808.200012,0.48581,-85.960426,-0.968526,2721.64624,0.143145,782.349304,0.521367,-88.234657,-0.922448,-12.709201,-1.263602,23.15102,0.681525,0.050326,0.844885,-0.014914,-1.130568,0.7669,0.744587,22.16778,1.010629,15.216177,1.061156,14.804762,1.260087,0.469611,36.32309,3.770078,5.809832,0.056543,0.00508,0.252928,2.409098,2.291105,0.269477,0.27779,0.164519,0.294733,-23.506567



Descriptive statistics (with variance):


Unnamed: 0,Subject_ID,n_words,ttr,mattr,filler_word_ratio,concreteness_score,aoa_average,average_word_length,brunets_index,honores_statistic,guirauds_statistic,ADJ,ADP,ADV,AUX,CCONJ,DET,INTJ,NOUN,NUM,PART,PRON,PROPN,PUNCT,SCONJ,SYM,VERB,OTHER,NOUN/VERB,PRON/NOUN,DET/NOUN,AUX/VERB,OPEN/CLOSED,POS_ENTROPY,LEXICAL_DENSITY,speech_rate,pause_ratio,n_pauses,avg_pause_duration,articulation_rate,hesitation_ratio,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_amean,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_stddevNorm,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_percentile20.0,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_percentile50.0,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_percentile80.0,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,eGeMAPS_F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,eGeMAPS_loudness_sma3_amean,eGeMAPS_loudness_sma3_stddevNorm,eGeMAPS_loudness_sma3_percentile20.0,eGeMAPS_loudness_sma3_percentile50.0,eGeMAPS_loudness_sma3_percentile80.0,eGeMAPS_loudness_sma3_pctlrange0-2,eGeMAPS_loudness_sma3_meanRisingSlope,eGeMAPS_loudness_sma3_stddevRisingSlope,eGeMAPS_loudness_sma3_meanFallingSlope,eGeMAPS_loudness_sma3_stddevFallingSlope,eGeMAPS_spectralFlux_sma3_amean,eGeMAPS_spectralFlux_sma3_stddevNorm,eGeMAPS_mfcc1_sma3_amean,eGeMAPS_mfcc1_sma3_stddevNorm,eGeMAPS_mfcc2_sma3_amean,eGeMAPS_mfcc2_sma3_stddevNorm,eGeMAPS_mfcc3_sma3_amean,eGeMAPS_mfcc3_sma3_stddevNorm,eGeMAPS_mfcc4_sma3_amean,eGeMAPS_mfcc4_sma3_stddevNorm,eGeMAPS_jitterLocal_sma3nz_amean,eGeMAPS_jitterLocal_sma3nz_stddevNorm,eGeMAPS_shimmerLocaldB_sma3nz_amean,eGeMAPS_shimmerLocaldB_sma3nz_stddevNorm,eGeMAPS_HNRdBACF_sma3nz_amean,eGeMAPS_HNRdBACF_sma3nz_stddevNorm,eGeMAPS_logRelF0-H1-H2_sma3nz_amean,eGeMAPS_logRelF0-H1-H2_sma3nz_stddevNorm,eGeMAPS_logRelF0-H1-A3_sma3nz_amean,eGeMAPS_logRelF0-H1-A3_sma3nz_stddevNorm,eGeMAPS_F1frequency_sma3nz_amean,eGeMAPS_F1frequency_sma3nz_stddevNorm,eGeMAPS_F1bandwidth_sma3nz_amean,eGeMAPS_F1bandwidth_sma3nz_stddevNorm,eGeMAPS_F1amplitudeLogRelF0_sma3nz_amean,eGeMAPS_F1amplitudeLogRelF0_sma3nz_stddevNorm,eGeMAPS_F2frequency_sma3nz_amean,eGeMAPS_F2frequency_sma3nz_stddevNorm,eGeMAPS_F2bandwidth_sma3nz_amean,eGeMAPS_F2bandwidth_sma3nz_stddevNorm,eGeMAPS_F2amplitudeLogRelF0_sma3nz_amean,eGeMAPS_F2amplitudeLogRelF0_sma3nz_stddevNorm,eGeMAPS_F3frequency_sma3nz_amean,eGeMAPS_F3frequency_sma3nz_stddevNorm,eGeMAPS_F3bandwidth_sma3nz_amean,eGeMAPS_F3bandwidth_sma3nz_stddevNorm,eGeMAPS_F3amplitudeLogRelF0_sma3nz_amean,eGeMAPS_F3amplitudeLogRelF0_sma3nz_stddevNorm,eGeMAPS_alphaRatioV_sma3nz_amean,eGeMAPS_alphaRatioV_sma3nz_stddevNorm,eGeMAPS_hammarbergIndexV_sma3nz_amean,eGeMAPS_hammarbergIndexV_sma3nz_stddevNorm,eGeMAPS_slopeV0-500_sma3nz_amean,eGeMAPS_slopeV0-500_sma3nz_stddevNorm,eGeMAPS_slopeV500-1500_sma3nz_amean,eGeMAPS_slopeV500-1500_sma3nz_stddevNorm,eGeMAPS_spectralFluxV_sma3nz_amean,eGeMAPS_spectralFluxV_sma3nz_stddevNorm,eGeMAPS_mfcc1V_sma3nz_amean,eGeMAPS_mfcc1V_sma3nz_stddevNorm,eGeMAPS_mfcc2V_sma3nz_amean,eGeMAPS_mfcc2V_sma3nz_stddevNorm,eGeMAPS_mfcc3V_sma3nz_amean,eGeMAPS_mfcc3V_sma3nz_stddevNorm,eGeMAPS_mfcc4V_sma3nz_amean,eGeMAPS_mfcc4V_sma3nz_stddevNorm,eGeMAPS_alphaRatioUV_sma3nz_amean,eGeMAPS_hammarbergIndexUV_sma3nz_amean,eGeMAPS_slopeUV0-500_sma3nz_amean,eGeMAPS_slopeUV500-1500_sma3nz_amean,eGeMAPS_spectralFluxUV_sma3nz_amean,eGeMAPS_loudnessPeaksPerSec,eGeMAPS_VoicedSegmentsPerSec,eGeMAPS_MeanVoicedSegmentLengthSec,eGeMAPS_StddevVoicedSegmentLengthSec,eGeMAPS_MeanUnvoicedSegmentLength,eGeMAPS_StddevUnvoicedSegmentLength,eGeMAPS_equivalentSoundLevel_dBp
count,1002.0,1002.0,999.0,961.0,999.0,999.0,999.0,1002.0,1002.0,1002.0,1002.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,998.0,997.0,997.0,998.0,998.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
mean,721.701597,147.91517,0.584001,0.731351,0.031709,2.763207,4.378679,3.964941,10.812802,1731.550202,6.674944,0.03833,0.116583,0.035184,0.063501,0.044764,0.157695,0.027602,0.221233,0.009773,0.024884,0.106224,0.001547,0.0,0.017831,0.0,0.134812,3.851497e-05,1.72693,0.505571,0.717448,0.487117,0.85588,2.183118,0.429559,2.03159,0.151113,11.278278,0.986352,2.393821,0.152969,30.687908,0.293637,25.900163,28.88864,33.285928,7.385765,440.37168,688.442301,187.884092,327.927839,0.799479,0.948906,0.122794,0.603119,1.388581,1.265787,15.789807,10.337918,10.857401,7.537171,0.57845,1.228543,18.258408,1.473125,7.008408,0.877061,10.121738,4.686316,-1.152971,-1.113707,0.047398,1.846323,1.432928,0.76268,3.735566,1.614839,3.926487,2.491838,20.289908,0.979519,616.45495,0.462193,1152.080352,0.277581,-87.410896,-1.094146,1664.400314,0.212239,861.998798,0.44177,-88.201519,-0.998629,2741.672251,0.130502,788.31053,0.518988,-90.603875,-0.943413,-12.756828,-1.196716,22.77011,0.673631,0.059664,0.671771,-0.019197,-1.135804,0.837311,0.862491,26.704623,0.783151,7.851156,3.928345,12.64956,1.578481,-3.750608,17.744801,-2.013953,10.31514,0.041985,0.000168,0.169661,2.540357,2.059495,0.30839,0.289255,0.213714,0.36298,-24.498304
std,370.587246,171.9884,0.083442,0.046054,0.026547,0.12401,0.196983,0.285838,1.24143,467.1059,0.983161,0.019513,0.028181,0.022744,0.023662,0.019394,0.029729,0.040124,0.03713,0.010432,0.014495,0.034281,0.004908,0.0,0.013677,0.0,0.027442,0.0005465256,0.535701,0.21802,0.101829,0.199902,0.144519,0.133561,0.038618,0.418505,0.109489,30.956635,0.479574,0.414174,0.11247,3.921794,0.084735,4.763644,4.471256,5.626732,5.003764,155.862423,239.292212,61.172737,170.469634,0.218166,0.181521,0.081132,0.238295,0.375638,0.357266,4.205316,3.038243,3.084061,2.319197,0.180093,0.244214,6.250093,4.171362,6.527348,25.172502,6.884,69.382996,7.528588,44.290071,0.013445,0.254001,0.238343,0.101168,2.047607,14.239587,3.709689,65.878247,6.306128,1.440155,75.198316,0.080876,85.738619,0.045182,21.020903,0.235895,89.812125,0.026879,76.898981,0.061064,20.115457,0.196758,98.757792,0.01829,77.720408,0.079456,19.748387,0.181793,4.823966,3.623668,5.470171,0.331778,0.031311,4.495263,0.007744,2.062475,0.210555,0.145954,6.965392,1.089543,8.116801,43.712558,8.669622,13.855658,9.714406,513.371514,4.243905,4.67562,0.035635,0.005776,0.109895,0.539107,0.501484,0.168089,0.151535,0.121729,0.237219,2.617509
min,41.0,0.0,0.159326,0.46,0.0,2.14,2.310598,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.547009,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,18.452993,0.019474,12.769335,13.93343,21.383392,0.632465,0.0,0.0,-0.250567,0.0,0.042498,0.27719,0.001034,0.011393,0.033528,0.022397,0.767735,0.738782,0.569846,0.463305,0.009641,0.631363,-12.571771,-57.892872,-25.583231,-521.15802,-17.166439,-247.863876,-38.294849,-732.760437,0.0,0.0,0.931132,0.0,-4.009131,-242.655701,-17.707893,-1642.789795,-9.344255,-20.510687,472.149231,0.131236,694.506287,0.154604,-199.056351,-2.063615,1443.991821,0.093633,639.643677,0.180327,-199.063492,-1.751575,2502.074707,0.077588,434.125793,0.283649,-199.07843,-1.875676,-29.072258,-70.026283,-3.619826,-3.72392,-0.055191,-68.398827,-0.043531,-44.551914,0.048561,0.355168,-12.598892,-25.70694,-26.782782,-401.409576,-19.405558,-208.370056,-45.81699,-1694.050171,-22.09968,-4.643277,-0.067482,-0.023781,0.009275,0.603809,0.204745,0.03,0.0,0.055,0.025357,-54.582317
25%,404.5,112.25,0.534785,0.703214,0.012308,2.685513,4.262447,3.86622,10.412358,1463.157075,6.21623,0.025641,0.098888,0.018869,0.048387,0.032129,0.137423,0.006734,0.19838,0.0,0.014652,0.083969,0.0,0.0,0.007752,0.0,0.11642,0.0,1.36,0.354839,0.65,0.352941,0.761364,2.128419,0.407374,1.802873,0.070645,5.0,0.7045,2.147363,0.070507,27.775671,0.229291,21.996672,25.158352,29.259828,4.475284,332.05571,521.027557,144.043533,187.676399,0.66734,0.842104,0.06661,0.449263,1.193603,1.069348,13.200726,8.422399,8.897526,6.081458,0.484758,1.08252,14.505459,0.911735,3.292038,1.083831,5.507392,1.139704,-5.97757,-3.630025,0.038056,1.678127,1.264504,0.6981,2.390848,1.001252,1.621981,1.757192,16.590982,0.707356,563.507202,0.408612,1103.395325,0.248521,-99.588787,-1.219617,1604.982544,0.194619,814.304535,0.401731,-99.649864,-1.110101,2674.569092,0.117506,738.614014,0.464542,-101.902283,-1.045426,-15.914062,-1.397464,19.471414,0.527958,0.039338,0.420828,-0.024202,-1.200583,0.746774,0.763879,22.85454,0.591476,2.920722,0.846313,7.05386,0.958226,-10.094559,-2.897672,-4.559797,7.229814,0.017991,-0.003399,0.096531,2.196876,1.717828,0.244571,0.226176,0.148096,0.227051,-25.24888
50%,720.5,136.5,0.580645,0.733758,0.027397,2.759223,4.372503,3.971427,10.910388,1663.811063,6.764749,0.036232,0.117241,0.03252,0.061538,0.044444,0.157233,0.022222,0.219298,0.007752,0.023077,0.106557,0.0,0.0,0.015748,0.0,0.132948,0.0,1.647059,0.482759,0.72,0.470588,0.830254,2.204398,0.426829,2.03471,0.126115,9.0,0.879474,2.393696,0.125595,31.057894,0.276412,26.396784,29.820126,33.353642,5.727871,417.484161,687.715393,180.435791,297.35788,0.790288,0.923698,0.106253,0.612055,1.381434,1.26418,15.622244,10.004435,10.708197,7.256003,0.57588,1.191317,18.092093,1.176428,7.094447,1.731526,10.290643,1.60156,-0.89484,-1.379061,0.044801,1.843769,1.393304,0.757305,3.927483,1.267744,3.886805,2.741019,20.24959,0.861219,606.546692,0.468309,1155.844482,0.27547,-85.696037,-1.079582,1658.057861,0.211711,858.838318,0.444979,-86.632217,-0.97954,2734.971191,0.130298,789.818604,0.512057,-89.057121,-0.931326,-12.71522,-1.038622,22.532202,0.626166,0.061463,0.594598,-0.018856,-0.884103,0.834108,0.837909,26.980495,0.733602,7.936471,1.481673,12.672912,1.392429,-3.464989,-1.335409,-1.810858,10.119082,0.038833,3.1e-05,0.143985,2.533505,2.051061,0.29,0.268763,0.193377,0.312047,-24.240032
75%,1036.75,164.75,0.623164,0.761644,0.044709,2.837397,4.492005,4.090795,11.379011,1909.568836,7.243956,0.05,0.133739,0.048826,0.078478,0.056818,0.175983,0.039216,0.243243,0.01487,0.033585,0.128549,0.0,0.0,0.025316,0.0,0.151515,0.0,2.0,0.625,0.782609,0.6,0.919885,2.263444,0.449638,2.301025,0.202449,14.0,1.130119,2.650216,0.204508,33.718733,0.350961,29.833924,32.420788,36.260778,8.15231,529.886627,848.898987,220.131584,447.25206,0.914145,1.023064,0.164562,0.752084,1.586041,1.458466,18.092408,11.765285,12.342248,8.602327,0.660773,1.320061,22.248954,1.523197,11.26396,2.960296,14.484247,2.598837,4.105841,2.847843,0.054546,2.004632,1.556578,0.823395,5.190836,1.840226,6.336734,4.910144,24.280066,1.092827,653.052521,0.517332,1206.472778,0.302753,-73.933491,-0.948592,1713.569885,0.229835,911.256409,0.482552,-75.344398,-0.87293,2801.734375,0.142684,837.166046,0.566565,-78.256359,-0.829568,-9.928081,-0.785134,26.017357,0.755835,0.08279,0.990696,-0.014073,-0.678539,0.915663,0.92833,31.075162,0.923467,13.65563,2.996244,18.178354,2.165242,3.145819,2.052135,0.680723,12.975149,0.063973,0.003714,0.213447,2.872283,2.347495,0.346153,0.325491,0.245072,0.432107,-23.302952
max,1370.0,5222.0,1.0,0.850127,0.2,3.281311,5.106319,4.770492,16.8245,6584.537853,11.513434,0.14,0.233333,0.2,0.2,0.142857,0.304348,1.0,0.411765,0.090909,0.083333,0.208333,0.08,0.0,0.077922,0.0,0.333333,0.009090909,6.0,1.875,1.166667,2.0,1.8125,2.400292,0.590164,3.292568,0.814815,964.0,4.83,3.914989,0.935185,45.153229,0.629856,36.840073,49.378487,59.438019,39.233871,1154.244019,1464.762207,476.788879,1039.122314,2.075066,2.587249,0.579608,1.855637,3.77909,3.540408,40.847351,28.445217,30.26539,23.350899,2.438034,3.270432,36.436878,63.548008,27.794102,199.436691,37.27478,2149.995117,21.807888,801.908447,0.10366,2.91936,3.009204,1.134808,8.838799,193.59404,17.260689,619.411377,44.649761,20.664413,1069.874756,0.681511,1473.808105,0.557482,-26.75223,-0.07718,2250.544922,0.29892,1141.806763,0.661615,-35.315765,-0.076304,3242.627197,0.186783,1035.949951,0.866299,-38.507496,-0.075278,10.70048,61.47839,44.05373,4.612053,0.143928,39.498081,0.011373,29.497206,2.835877,1.886671,52.511398,7.670276,32.175331,855.819031,46.760551,181.123886,24.828976,15238.969727,12.814614,32.226513,0.148006,0.040268,1.034698,4.764786,4.045202,4.391176,3.830493,2.25,3.552456,-12.999346
var,137334.906871,29580.00978,0.006963,0.002121,0.000705,0.015378,0.038802,0.081704,1.541149,218187.921577,0.966605,0.000381,0.000794,0.000517,0.00056,0.000376,0.000884,0.00161,0.001379,0.000109,0.00021,0.001175,2.4e-05,0.0,0.000187,0.0,0.000753,2.986902e-07,0.286976,0.047533,0.010369,0.039961,0.020886,0.017838,0.001491,0.175146,0.011988,958.313265,0.229991,0.17154,0.01265,15.380468,0.00718,22.692307,19.992132,31.660112,25.037653,24293.095015,57260.762919,3742.103711,29059.895978,0.047596,0.03295,0.006582,0.056784,0.141104,0.127639,17.684687,9.230921,9.511433,5.378676,0.032434,0.059641,39.063659,17.400263,42.606267,633.654852,47.389454,4814.000178,56.679643,1961.610405,0.000181,0.064516,0.056807,0.010235,4.192694,202.765843,13.761789,4339.943455,39.767255,2.074047,5654.786749,0.006541,7351.110704,0.002041,441.878346,0.055647,8066.217754,0.000722,5913.453238,0.003729,404.631609,0.038714,9753.101494,0.000335,6040.461828,0.006313,389.998776,0.033049,23.270647,13.13097,29.92277,0.110077,0.00098,20.207386,6e-05,4.253804,0.044334,0.021303,48.516684,1.187103,65.882464,1910.787764,75.162338,191.979252,94.369682,263550.311459,18.010733,21.86142,0.00127,3.3e-05,0.012077,0.290637,0.251486,0.028254,0.022963,0.014818,0.056273,6.851355
cv,0.513491,1.16275,0.142881,0.062971,0.83722,0.044879,0.044987,0.072091,0.114811,0.269762,0.147291,0.509077,0.241729,0.64643,0.37262,0.433248,0.188522,1.453658,0.167833,1.067477,0.58251,0.322719,3.173021,,0.76702,,0.203558,14.18995,0.310204,0.431234,0.141932,0.410377,0.168854,0.061179,0.089902,0.205999,0.724552,2.744802,0.48621,0.173018,0.73525,0.127796,0.288571,0.183923,0.154776,0.169042,0.677488,0.353934,0.347585,0.325588,0.519839,0.272885,0.191295,0.660716,0.395104,0.270519,0.282248,0.266331,0.293893,0.284051,0.307701,0.311338,0.198783,0.342313,2.831642,0.93136,28.700962,0.68012,14.805445,-6.529728,-39.768142,0.28366,0.137571,0.166333,0.132649,0.548138,8.81796,0.944786,26.437616,0.310801,1.470267,0.121985,0.174984,0.074421,0.162772,-0.240484,-0.215597,0.053961,0.126646,0.08921,0.138226,-0.228062,-0.197028,0.036021,0.14015,0.098591,0.153099,-0.217964,-0.192697,-0.378148,-3.028009,0.240235,0.492522,0.524785,6.691656,-0.403387,-1.815872,0.251466,0.169224,0.260831,1.391229,1.033835,11.127474,0.685369,8.777845,-2.590088,28.930812,-2.107251,0.453277,0.848761,34.385458,0.647732,0.212217,0.243498,0.545055,0.52388,0.569588,0.653533,-0.106845


Saved descriptive statistics to: /Users/gilanorup/Desktop/Studium/MSc/MA/code/masters_thesis_gn/results/plots/feature_distribution_cookieTheft/descriptive_statistics.csv

Features with low variance (std < 0.01):


Unnamed: 0,Low-Variance Features
0,PROPN
1,PUNCT
2,SYM
3,OTHER
4,eGeMAPS_slopeV500-1500_sma3nz_amean
5,eGeMAPS_slopeUV500-1500_sma3nz_amean
