As a last analysis step, we control maximally for all metadata like software versioning, codec, bitrate etc. To do this we take the 10 recordings with the most submissions (all beatles songs) since these have around 90-100 submissions (which is still a relatively low sample size, but the best we can do given the current dataset)

In [26]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import numpy as np
# Use seaborn style defaults and set the default figure size
sns.set(rc={'figure.figsize':(15, 15)})

from tqdm.notebook import tqdm
tqdm().pandas()

# Non-pooled variance
def get_variance(submission):
    variances = submission.var()
    samplesizes = len(submission)
    
    a = variances.repeat(samplesizes)
    return a

# Classifier columns to study
cols = [('danceability', 'danceable'), ('mood_acoustic', 'acoustic'), ('mood_aggressive', 'aggressive'),
('mood_electronic', 'electronic'), ('mood_happy', 'happy'), ('mood_party', 'party'),
('mood_relaxed', 'relaxed'), ('mood_sad', 'sad')]

# Load in the acousticbrainz dataset into the variable 'acousticbrainz'
acousticbrainz = pd.read_hdf(Path.cwd() / 'datasets' / 'acousticbrainzV3.h5')

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

In [79]:
## WITHOUT FILTERING ON METADATA, JUST THE VARIANCE OVER THE TOP 10

# Indexes for the top 10 songs
indexes = (acousticbrainz.groupby(level=0).size().sort_values(ascending=False)[0:10]).index

# DataFrame to generate tabular data
res_table = pd.DataFrame()

# For each of the recordings in the top-10 most submitted, calculate the variance
for idx in indexes:
    # For table
    var_table = pd.DataFrame(acousticbrainz.loc[idx][cols].var())
    var_table['n'] = len(acousticbrainz.loc[idx][cols])
    var_table['mbid'] = idx
    res_table = res_table.append(var_table)
    
res_table = res_table.reset_index()
res_table.columns = ['classifier', 'variance', 'n', 'mbid']
display(res_table[['mbid', 'n']].drop_duplicates())
res_table.pivot(index='mbid', columns='classifier', values='variance')


Unnamed: 0,mbid,n
0,ee898790-133f-445a-874f-d996abd843af,126
8,b2b50082-0bd1-4702-9a95-3499a4e5781b,108
16,659b4269-fe81-40e4-86e9-12879c09c9e6,96
24,c1d63906-f64a-4cd1-9873-9f3a9f98883c,95
32,15127932-c879-466e-b0f8-a1c5022d16e7,94
40,b849acd4-0638-49ea-8e40-7391613d4890,93
48,917f2be3-065e-4d1e-8a76-1b50abd1ad95,92
56,00c47ea6-3a10-4a32-b1f1-990ac756c6a0,91
64,485bbe7f-d0f7-4ffe-8adb-0f1093dd2dbf,89
72,63dd7ef6-6d6e-44d5-a4d9-190e49223077,88


classifier,"(danceability, danceable)","(mood_acoustic, acoustic)","(mood_aggressive, aggressive)","(mood_electronic, electronic)","(mood_happy, happy)","(mood_party, party)","(mood_relaxed, relaxed)","(mood_sad, sad)"
mbid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
00c47ea6-3a10-4a32-b1f1-990ac756c6a0,0.04854,0.001512,0.050878,0.06807,0.002336,0.052556,0.062423,0.002365
15127932-c879-466e-b0f8-a1c5022d16e7,0.040385,0.00506,0.025516,0.019757,0.078789,0.041087,0.035886,0.013528
485bbe7f-d0f7-4ffe-8adb-0f1093dd2dbf,0.042691,0.001068,0.02499,0.003214,0.025167,0.029937,0.00816,0.000409
63dd7ef6-6d6e-44d5-a4d9-190e49223077,0.074302,0.017655,0.08222,0.091115,0.015626,0.068895,0.087663,0.016154
659b4269-fe81-40e4-86e9-12879c09c9e6,0.087369,0.026016,0.064329,0.093443,0.035157,0.050382,0.102194,0.009033
917f2be3-065e-4d1e-8a76-1b50abd1ad95,0.042939,0.119838,0.067951,0.0578,0.054998,0.033003,0.078627,0.069582
b2b50082-0bd1-4702-9a95-3499a4e5781b,0.020254,0.005618,0.023863,0.034075,0.047929,0.02732,0.063736,0.003393
b849acd4-0638-49ea-8e40-7391613d4890,0.007093,0.038768,0.008743,0.064232,0.091888,0.021096,0.021642,0.023737
c1d63906-f64a-4cd1-9873-9f3a9f98883c,0.069221,0.028242,0.063083,0.065344,0.024942,0.047327,0.074604,0.022236
ee898790-133f-445a-874f-d996abd843af,0.019651,0.043427,0.045461,0.03171,0.02935,0.040726,0.030455,0.009407


In [159]:
## FILTERING ON METADATA TO MAXIMALLY CONTROL THE DATA
filtered = acousticbrainz.loc[indexes]
filtered.index.set_names(['mbid', 'subno'], inplace=True)

# Find groups with same version and codec (can not group by bitrate without losing too much data) that have at least 50 submissions
groupers = ['codec', 'mbid', 'essentia_low', 'essentia_git_sha_low', 'essentia_build_sha_low']
grouped = filtered.groupby(groupers).size()
groups_to_analyze = pd.DataFrame(grouped[grouped > 50].sort_values(ascending=False))

display(groups_to_analyze)
groups_to_analyze = groups_to_analyze.reset_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,0
codec,mbid,essentia_low,essentia_git_sha_low,essentia_build_sha_low,Unnamed: 5_level_1
aac,ee898790-133f-445a-874f-d996abd843af,2.1-beta2,v2.1_beta2-1-ge3940c0,2d9f1f26377add8aeb1075a9c2973f962c4f09fd,118
aac,b2b50082-0bd1-4702-9a95-3499a4e5781b,2.1-beta2,v2.1_beta2-1-ge3940c0,2d9f1f26377add8aeb1075a9c2973f962c4f09fd,97
aac,00c47ea6-3a10-4a32-b1f1-990ac756c6a0,2.1-beta2,v2.1_beta2-1-ge3940c0,2d9f1f26377add8aeb1075a9c2973f962c4f09fd,63
aac,c1d63906-f64a-4cd1-9873-9f3a9f98883c,2.1-beta2,v2.1_beta2-1-ge3940c0,2d9f1f26377add8aeb1075a9c2973f962c4f09fd,58
aac,63dd7ef6-6d6e-44d5-a4d9-190e49223077,2.1-beta2,v2.1_beta2-1-ge3940c0,2d9f1f26377add8aeb1075a9c2973f962c4f09fd,53
aac,15127932-c879-466e-b0f8-a1c5022d16e7,2.1-beta2,v2.1_beta2-1-ge3940c0,2d9f1f26377add8aeb1075a9c2973f962c4f09fd,52


Thus, we can filter on v2.1_beta2-1-ge3940c0, 2d9f1f26377add8aeb1075a9c2973f962c4f09fd with codec aac to control the metadata as much as possible while retaining some samples

In [172]:
filt = ((filtered.index.get_level_values(level=0).isin(groups_to_analyze['mbid'])) 
        & (filtered['codec'] == 'aac') & (filtered['essentia_low'] == '2.1-beta2')
       & (filtered['essentia_git_sha_low'] == 'v2.1_beta2-1-ge3940c0') 
       & (filtered['essentia_build_sha_low'] == '2d9f1f26377add8aeb1075a9c2973f962c4f09fd'))

controlled = filtered[filt][cols].groupby('mbid').var()
display(controlled)

Unnamed: 0_level_0,"(danceability, danceable)","(mood_acoustic, acoustic)","(mood_aggressive, aggressive)","(mood_electronic, electronic)","(mood_happy, happy)","(mood_party, party)","(mood_relaxed, relaxed)","(mood_sad, sad)"
mbid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
00c47ea6-3a10-4a32-b1f1-990ac756c6a0,0.052516,0.001594,0.041407,0.046502,0.002352,0.038993,0.049583,0.001583
15127932-c879-466e-b0f8-a1c5022d16e7,0.036074,0.003263,0.020883,0.014707,0.098657,0.014955,0.023904,0.007468
63dd7ef6-6d6e-44d5-a4d9-190e49223077,0.070704,0.021278,0.079049,0.065768,0.01748,0.061032,0.076256,0.016546
b2b50082-0bd1-4702-9a95-3499a4e5781b,0.017601,0.005771,0.022203,0.009202,0.038446,0.012613,0.045961,0.003347
c1d63906-f64a-4cd1-9873-9f3a9f98883c,0.060338,0.033907,0.07653,0.057413,0.03119,0.041066,0.059357,0.027474
ee898790-133f-445a-874f-d996abd843af,0.020848,0.04634,0.040026,0.023046,0.024237,0.029127,0.021352,0.009983
