In [1]:
import pandas as pd
from collections import defaultdict
import librosa as lr
import ast
from scipy.stats import f_oneway
from tqdm import tqdm
import numpy as np
import os

In [2]:
class_mappings = {"cel": 0, "cla": 1, "flu": 2, "gac": 3, "gel": 4, "org": 5, "pia": 6, "sax": 7, "tru": 8, "vio": 9,
                  "voi": 10}
class_mappings_inv = {v: k for k, v in class_mappings.items()}

df_train = pd.read_csv("/home/anonymous/Documents/lumen/Dataset/datalists/train.csv")
# shuffle the data
df_train = df_train.sample(frac=1, random_state=42).reset_index(drop=True)
sr = 44100
data_root_dir = "/home/anonymous/Documents/lumen/Dataset/"

In [3]:
feature_means = defaultdict(dict)
for i, row in tqdm(df_train.iterrows(), total=len(df_train)):
    y, sr = lr.load(os.path.join(data_root_dir, row['file_path']), sr=sr)
    classes_id = ast.literal_eval(row['classes_id'])

    zcr = lr.feature.zero_crossing_rate(y=y)
    mfcc = lr.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    chroma = lr.feature.chroma_stft(y=y, sr=sr)
    tonnetz = lr.feature.tonnetz(y=y, sr=sr)
    rmse = lr.feature.rms(y=y)
    spec_centroid = lr.feature.spectral_centroid(y=y, sr=sr)
    spec_contrast = lr.feature.spectral_contrast(y=y, sr=sr)
    spec_bandwidth = lr.feature.spectral_bandwidth(y=y, sr=sr)
    spec_rolloff = lr.feature.spectral_rolloff(y=y, sr=sr)
    spec_flatness = lr.feature.spectral_flatness(y=y)
    poly_features = lr.feature.poly_features(y=y, sr=sr)
    spectrogram = lr.feature.melspectrogram(y=y, sr=sr)
    for class_id in classes_id:
        feature_means['zcr'][class_id] = feature_means['zcr'].get(class_id, []) + [zcr.mean()]
        feature_means['mfcc'][class_id] = feature_means['mfcc'].get(class_id, []) + [np.mean(mfcc, axis=1)]
        feature_means['chroma'][class_id] = feature_means['chroma'].get(class_id, []) + [np.mean(chroma, axis=1)]
        feature_means['tonnetz'][class_id] = feature_means['tonnetz'].get(class_id, []) + [np.mean(tonnetz, axis=1)]
        feature_means['rmse'][class_id] = feature_means['rmse'].get(class_id, []) + [np.mean(rmse)]
        feature_means['spec_centroid'][class_id] = feature_means['spec_centroid'].get(class_id, []) + [np.mean(spec_centroid)]
        feature_means['spec_contrast'][class_id] = feature_means['spec_contrast'].get(class_id, []) + [np.mean(spec_contrast, axis=1)]
        feature_means['spec_bandwidth'][class_id] = feature_means['spec_bandwidth'].get(class_id, []) + [np.mean(spec_bandwidth)]
        feature_means['spec_rolloff'][class_id] = feature_means['spec_rolloff'].get(class_id, []) + [np.mean(spec_rolloff)]
        feature_means['spec_flatness'][class_id] = feature_means['spec_flatness'].get(class_id, []) + [np.mean(spec_flatness)]
        feature_means['poly_features'][class_id] = feature_means['poly_features'].get(class_id, []) + [np.mean(poly_features, axis=1)]
        feature_means['spectrogram'][class_id] = feature_means['spectrogram'].get(class_id, []) + [np.mean(spectrogram, axis=1)]


100%|██████████| 6705/6705 [56:42<00:00,  1.97it/s]


In [4]:
zcr_anova = f_oneway(*feature_means['zcr'].values())
zcr_anova

F_onewayResult(statistic=98.3682622650316, pvalue=7.219058358624183e-191)

In [5]:
mfcc_anova = f_oneway(*feature_means['mfcc'].values())
mfcc_anova

F_onewayResult(statistic=array([263.41249626, 111.89580813, 143.77657161, 192.32016538,
        77.85490668, 166.16300598,  67.96450158, 134.45499913,
        28.37737807,  70.54627709,  21.33032391,  58.86691445,
        36.22663587]), pvalue=array([0.00000000e+000, 4.60698840e-216, 7.82504395e-274, 0.00000000e+000,
       7.33708519e-152, 4.18496843e-313, 1.04927318e-132, 3.59864258e-257,
       6.58725529e-054, 9.97177052e-138, 1.25701151e-039, 6.90946655e-115,
       9.13722580e-070]))

In [6]:
chroma_anova = f_oneway(*feature_means['chroma'].values())
chroma_anova

F_onewayResult(statistic=array([69.42959586, 71.47277001, 54.51550848, 70.02797273, 65.53690288,
       56.84114643, 89.02374546, 75.83277691, 85.21376006, 62.36744044,
       59.66775957, 88.23468   ]), pvalue=array([1.47595556e-135, 1.58596888e-139, 2.68015631e-106, 1.01333507e-136,
       5.71609017e-128, 6.80181886e-111, 3.21260223e-173, 5.80064487e-148,
       5.75918061e-166, 9.14101639e-122, 1.83384057e-116, 1.01468787e-171]))

In [7]:
tonnetz_anova = f_oneway(*feature_means['tonnetz'].values())
tonnetz_anova

F_onewayResult(statistic=array([20.88617951, 24.90772735,  3.39897447,  1.89207563,  1.3530165 ,
        3.26862617]), pvalue=array([9.98207001e-39, 7.05371202e-47, 1.90253953e-04, 4.14814497e-02,
       1.95801589e-01, 3.14706777e-04]))

In [8]:
rmse_anova = f_oneway(*feature_means['rmse'].values())
rmse_anova

F_onewayResult(statistic=205.05600595826243, pvalue=0.0)

In [9]:
spec_centroid_anova = f_oneway(*feature_means['spec_centroid'].values())
spec_centroid_anova

F_onewayResult(statistic=141.9143720266767, pvalue=1.6122161486406474e-270)

In [10]:
spec_contrast_anova = f_oneway(*feature_means['spec_contrast'].values())
spec_contrast_anova

F_onewayResult(statistic=array([ 46.73170666,  58.60064023, 135.64555024, 202.35370421,
       171.59339937, 144.05921697, 227.68815341]), pvalue=array([7.84273921e-091, 2.31099022e-114, 2.61588721e-259, 0.00000000e+000,
       1.77863633e-322, 2.46115305e-274, 0.00000000e+000]))

In [11]:
spec_bandwidth_anova = f_oneway(*feature_means['spec_bandwidth'].values())
spec_bandwidth_anova

F_onewayResult(statistic=131.40235421585385, pvalue=1.1308754453114454e-251)

In [12]:
spec_rolloff_anova = f_oneway(*feature_means['spec_rolloff'].values())
spec_rolloff_anova

F_onewayResult(statistic=139.9069098507866, pvalue=6.139326253894501e-267)

In [13]:
spec_flatness_anova = f_oneway(*feature_means['spec_flatness'].values())
spec_flatness_anova

F_onewayResult(statistic=11.987688973453903, pvalue=8.509910284345387e-21)

In [14]:
poly_features_anova = f_oneway(*feature_means['poly_features'].values())
poly_features_anova

F_onewayResult(statistic=array([240.94773674, 242.83137278]), pvalue=array([0., 0.]))

In [15]:
spectrogram_anova = f_oneway(*feature_means['spectrogram'].values())
spectrogram_anova

F_onewayResult(statistic=array([41.75537364, 57.85840896, 57.42179583, 74.10208221, 91.66183861,
       74.75890879, 66.67781839, 74.73530628, 48.97513719, 38.89599972,
       49.53307349, 50.85118873, 38.58762198, 28.70970918, 28.38156956,
       37.42859212, 22.17178745, 29.54694631, 24.38518576, 22.91719479,
       22.99692871, 13.48991648, 11.10020987, 32.42836409, 30.0710868 ,
       26.39941706, 14.13202522,  9.74356475, 13.57779376, 11.93816256,
       18.78920444, 16.35053819, 15.92645646, 13.95640905, 14.17127979,
       15.07094709, 16.01326494, 10.93770957, 12.52261668, 22.57151183,
       13.39415706, 26.9746194 , 27.04471798, 27.65561136, 27.23581139,
       39.15385052, 17.23918481, 10.37905718, 18.65002922, 10.78040782,
       31.68823778, 12.01877714, 34.96198076, 39.67000647, 30.21316197,
       27.78004476, 59.28579067, 44.5039136 , 49.09986832, 80.09744816,
       19.6319797 , 59.56810149, 61.59261151, 62.94239944, 62.62148562,
       48.25335277, 72.753461  , 39.810