In [293]:
# Need only first time
#conda install -c conda-forge python-dotenv
#conda install -c conda-forge pydot

In [294]:
%matplotlib inline

import os

import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import datetime
import librosa
import librosa.display
import plotly
import plotly.express as px
import plotly.io as pio
  
import sys
import pathlib
sys.path.insert(1, './fma_metadata/')
import utils
import warnings
warnings.filterwarnings('ignore')

plt.rcParams['figure.figsize'] = (17, 5)

In [295]:
# Load metadata and features.
genres = utils.load('/Users/gaetanoantonicchio/Desktop/dataset_DM2/fma_metadata/genres.csv')
features = utils.load('/Users/gaetanoantonicchio/Desktop/dataset_DM2/fma_metadata/features.csv')
echonest = utils.load('/Users/gaetanoantonicchio/Desktop/dataset_DM2/fma_metadata/echonest.csv')
tracks = utils.load('/Users/gaetanoantonicchio/Desktop/dataset_DM2/fma_metadata/tracks.csv')


np.testing.assert_array_equal(features.index, tracks.index)
assert echonest.index.isin(tracks.index).all()

tracks.shape, genres.shape, features.shape, echonest.shape

((106574, 52), (163, 4), (106574, 518), (13129, 249))

# ** Tracks **

In [296]:
# checks percentage of available data
print('Available data - track subset - Tracks.csv (%)')
((abs(tracks['track'].isnull().sum()-tracks.shape[0])/tracks.shape[0])*100).round(2).sort_values(ascending=False)

Available data - track subset - Tracks.csv (%)


bit_rate         100.00
genres           100.00
tags             100.00
number           100.00
listens          100.00
interest         100.00
comments         100.00
genres_all       100.00
title            100.00
favorites        100.00
duration         100.00
date_created     100.00
license           99.92
genre_top         46.54
language_code     14.10
date_recorded      5.78
composer           3.44
information        2.20
publisher          1.19
lyricist           0.29
dtype: float64

In [297]:
print('Available data - album subset - Tracks.csv (%)')
((abs(tracks['album'].isnull().sum()-tracks.shape[0])/tracks.shape[0])*100).round(2).sort_values(ascending=False)

Available data - album subset - Tracks.csv (%)


comments         100.00
favorites        100.00
id               100.00
listens          100.00
tags             100.00
tracks           100.00
title             99.04
date_created      96.69
type              93.89
information       78.02
date_released     65.96
producer          16.95
engineer          14.35
dtype: float64

In [298]:
print('Available data - artist subset - Tracks.csv (%)')
((abs(tracks['artist'].isnull().sum()-tracks.shape[0])/tracks.shape[0])*100).round(2).sort_values(ascending=False)

Available data - artist subset - Tracks.csv (%)


name                 100.00
comments             100.00
favorites            100.00
id                   100.00
tags                 100.00
date_created          99.20
website               74.37
bio                   66.77
location              65.88
members               43.96
latitude              41.80
longitude             41.80
active_year_begin     21.31
associated_labels     13.39
related_projects      12.34
wikipedia_page         5.24
active_year_end        5.04
dtype: float64

In [299]:
tracks['track'].describe()

Unnamed: 0,bit_rate,comments,duration,favorites,interest,listens,number
count,106574.0,106574.0,106574.0,106574.0,106574.0,106574.0,106574.0
mean,263274.695048,0.031621,277.8491,3.182521,3541.31,2329.353548,8.260945
std,67623.443584,0.321993,305.518553,13.51382,19017.43,8028.070647,15.243271
min,-1.0,0.0,0.0,0.0,2.0,0.0,0.0
25%,192000.0,0.0,149.0,0.0,599.0,292.0,2.0
50%,299914.0,0.0,216.0,1.0,1314.0,764.0,5.0
75%,320000.0,0.0,305.0,3.0,3059.0,2018.0,9.0
max,448000.0,37.0,18350.0,1482.0,3293557.0,543252.0,255.0


In [300]:
tracks['track']

Unnamed: 0_level_0,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,genres_all,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2,256000,0,,2008-11-26 01:48:12,2008-11-26,168,2,Hip-Hop,[21],[21],,4656,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1293,,3,,[],Food
3,256000,0,,2008-11-26 01:48:14,2008-11-26,237,1,Hip-Hop,[21],[21],,1470,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,514,,4,,[],Electric Ave
5,256000,0,,2008-11-26 01:48:20,2008-11-26,206,6,Hip-Hop,[21],[21],,1933,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1151,,6,,[],This World
10,192000,0,Kurt Vile,2008-11-25 17:49:06,2008-11-26,161,178,Pop,[10],[10],,54881,en,Attribution-NonCommercial-NoDerivatives (aka M...,50135,,1,,[],Freeway
20,256000,0,,2008-11-26 01:48:56,2008-01-01,311,0,,"[76, 103]","[17, 10, 76, 103]",,978,en,Attribution-NonCommercial-NoDerivatives (aka M...,361,,3,,[],Spiritual Level
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155316,320000,0,,2017-03-30 15:23:34,NaT,162,1,Rock,[25],"[25, 12]",,122,,Creative Commons Attribution-NonCommercial-NoD...,102,,3,,[],The Auger
155317,320000,0,,2017-03-30 15:23:36,NaT,217,1,Rock,[25],"[25, 12]",,194,,Creative Commons Attribution-NonCommercial-NoD...,165,,4,,[],Let's Skin Ruby
155318,320000,0,,2017-03-30 15:23:37,NaT,404,2,Rock,[25],"[25, 12]",,214,,Creative Commons Attribution-NonCommercial-NoD...,168,,6,,[],My House Smells Like Kim Deal/Pulp
155319,320000,0,,2017-03-30 15:23:39,NaT,146,0,Rock,[25],"[25, 12]",,336,,Creative Commons Attribution-NonCommercial-NoD...,294,,5,,[],The Man With Two Mouths


In [301]:
tracks['album'].describe()

Unnamed: 0,comments,favorites,id,listens,tracks
count,106574.0,106574.0,106574.0,106574.0,106574.0
mean,0.394946,1.286927,12826.933914,32120.31,19.721452
std,2.268915,3.133035,6290.261805,147853.2,39.943673
min,-1.0,-1.0,-1.0,-1.0,-1.0
25%,0.0,0.0,7793.0,3361.0,7.0
50%,0.0,0.0,13374.0,8982.0,11.0
75%,0.0,1.0,18203.0,23635.0,17.0
max,53.0,61.0,22940.0,3564243.0,652.0


In [302]:
tracks['artist'].describe()

Unnamed: 0,comments,favorites,id,latitude,longitude
count,106574.0,106574.0,106574.0,44544.0,44544.0
mean,1.894702,30.041915,12036.770404,39.901626,-38.668642
std,6.297679,100.511408,6881.420867,18.24086,65.23722
min,-1.0,-1.0,1.0,-45.87876,-157.526855
25%,0.0,1.0,6443.0,39.271398,-79.997459
50%,0.0,5.0,12029.5,41.387917,-73.554431
75%,1.0,16.0,18011.0,48.85693,4.35171
max,79.0,963.0,24357.0,67.286005,175.277


In [303]:
# display partitions of tracks.csv
ipd.display(tracks['track'].head())
ipd.display(tracks['album'].head())
ipd.display(tracks['artist'].head())
ipd.display(tracks['set'].head())

Unnamed: 0_level_0,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,genres_all,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2,256000,0,,2008-11-26 01:48:12,2008-11-26,168,2,Hip-Hop,[21],[21],,4656,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1293,,3,,[],Food
3,256000,0,,2008-11-26 01:48:14,2008-11-26,237,1,Hip-Hop,[21],[21],,1470,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,514,,4,,[],Electric Ave
5,256000,0,,2008-11-26 01:48:20,2008-11-26,206,6,Hip-Hop,[21],[21],,1933,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1151,,6,,[],This World
10,192000,0,Kurt Vile,2008-11-25 17:49:06,2008-11-26,161,178,Pop,[10],[10],,54881,en,Attribution-NonCommercial-NoDerivatives (aka M...,50135,,1,,[],Freeway
20,256000,0,,2008-11-26 01:48:56,2008-01-01,311,0,,"[76, 103]","[17, 10, 76, 103]",,978,en,Attribution-NonCommercial-NoDerivatives (aka M...,361,,3,,[],Spiritual Level


Unnamed: 0_level_0,comments,date_created,date_released,engineer,favorites,id,information,listens,producer,tags,title,tracks,type
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2,0,2008-11-26 01:44:45,2009-01-05,,4,1,<p></p>,6073,,[],AWOL - A Way Of Life,7,Album
3,0,2008-11-26 01:44:45,2009-01-05,,4,1,<p></p>,6073,,[],AWOL - A Way Of Life,7,Album
5,0,2008-11-26 01:44:45,2009-01-05,,4,1,<p></p>,6073,,[],AWOL - A Way Of Life,7,Album
10,0,2008-11-26 01:45:08,2008-02-06,,4,6,,47632,,[],Constant Hitmaker,2,Album
20,0,2008-11-26 01:45:05,2009-01-06,,2,4,"<p> ""spiritual songs"" from Nicky Cook</p>",2710,,[],Niris,13,Album


Unnamed: 0_level_0,active_year_begin,active_year_end,associated_labels,bio,comments,date_created,favorites,id,latitude,location,longitude,members,name,related_projects,tags,website,wikipedia_page
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2,2006-01-01,NaT,,"<p>A Way Of Life, A Collective of Hip-Hop from...",0,2008-11-26 01:42:32,9,1,40.058324,New Jersey,-74.405661,"Sajje Morocco,Brownbum,ZawidaGod,Custodian of ...",AWOL,The list of past projects is 2 long but every1...,[awol],http://www.AzillionRecords.blogspot.com,
3,2006-01-01,NaT,,"<p>A Way Of Life, A Collective of Hip-Hop from...",0,2008-11-26 01:42:32,9,1,40.058324,New Jersey,-74.405661,"Sajje Morocco,Brownbum,ZawidaGod,Custodian of ...",AWOL,The list of past projects is 2 long but every1...,[awol],http://www.AzillionRecords.blogspot.com,
5,2006-01-01,NaT,,"<p>A Way Of Life, A Collective of Hip-Hop from...",0,2008-11-26 01:42:32,9,1,40.058324,New Jersey,-74.405661,"Sajje Morocco,Brownbum,ZawidaGod,Custodian of ...",AWOL,The list of past projects is 2 long but every1...,[awol],http://www.AzillionRecords.blogspot.com,
10,NaT,NaT,"Mexican Summer, Richie Records, Woodsist, Skul...","<p><span style=""font-family:Verdana, Geneva, A...",3,2008-11-26 01:42:55,74,6,,,,"Kurt Vile, the Violators",Kurt Vile,,"[philly, kurt vile]",http://kurtvile.com,
20,1990-01-01,2011-01-01,,<p>Songs written by: Nicky Cook</p>\n<p>VOCALS...,2,2008-11-26 01:42:52,10,4,51.895927,Colchester England,0.891874,Nicky Cook\n,Nicky Cook,,"[instrumentals, experimental pop, post punk, e...",,


Unnamed: 0_level_0,split,subset
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2,training,small
3,training,medium
5,training,small
10,training,small
20,training,large


##  Data cleaning & preparation - tracks.csv
1. all unnecessary rows were removed (after they've been carefully analysed)
2. <b>tracks_df</b> is the base dataset for tracks

In [304]:
# extract year from date_created --> to new col: year_created in track
tracks['track','year_created']= pd.DatetimeIndex(tracks['track','date_created']).year
# remove col date_created
tracks.drop(('track','date_created'),axis=1, inplace=True)
#tracks['track'].head()

In [305]:
tracks['track'].columns

Index(['bit_rate', 'comments', 'composer', 'date_recorded', 'duration',
       'favorites', 'genre_top', 'genres', 'genres_all', 'information',
       'interest', 'language_code', 'license', 'listens', 'lyricist', 'number',
       'publisher', 'tags', 'title', 'year_created'],
      dtype='object')

In [306]:
tracks['artist'].columns

Index(['active_year_begin', 'active_year_end', 'associated_labels', 'bio',
       'comments', 'date_created', 'favorites', 'id', 'latitude', 'location',
       'longitude', 'members', 'name', 'related_projects', 'tags', 'website',
       'wikipedia_page'],
      dtype='object')

In [307]:
tracks['album'].columns

Index(['comments', 'date_created', 'date_released', 'engineer', 'favorites',
       'id', 'information', 'listens', 'producer', 'tags', 'title', 'tracks',
       'type'],
      dtype='object')

In [308]:
# generating new dataset with filtered columns
tracks_df = tracks[
    [('artist','name'),('track','title'),('track','year_created'),('track','duration'),
     ('track','listens'),('track','favorites'),('track','bit_rate'),('track','interest'),('track','license'),
     ('track','genre_top'),('track','genres_all'),('album','title'),('album','tracks'),('album','listens'),
    ('set','split'),('set','subset')]
      ]
# display final dataset 
tracks_df.head()

Unnamed: 0_level_0,artist,track,track,track,track,track,track,track,track,track,track,album,album,album,set,set
Unnamed: 0_level_1,name,title,year_created,duration,listens,favorites,bit_rate,interest,license,genre_top,genres_all,title,tracks,listens,split,subset
track_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
2,AWOL,Food,2008,168,1293,2,256000,4656,Attribution-NonCommercial-ShareAlike 3.0 Inter...,Hip-Hop,[21],AWOL - A Way Of Life,7,6073,training,small
3,AWOL,Electric Ave,2008,237,514,1,256000,1470,Attribution-NonCommercial-ShareAlike 3.0 Inter...,Hip-Hop,[21],AWOL - A Way Of Life,7,6073,training,medium
5,AWOL,This World,2008,206,1151,6,256000,1933,Attribution-NonCommercial-ShareAlike 3.0 Inter...,Hip-Hop,[21],AWOL - A Way Of Life,7,6073,training,small
10,Kurt Vile,Freeway,2008,161,50135,178,192000,54881,Attribution-NonCommercial-NoDerivatives (aka M...,Pop,[10],Constant Hitmaker,2,47632,training,small
20,Nicky Cook,Spiritual Level,2008,311,361,0,256000,978,Attribution-NonCommercial-NoDerivatives (aka M...,,"[17, 10, 76, 103]",Niris,13,2710,training,large


In [309]:
# check missing values in tracks_df.csv
tracks_df.isnull().sum()

artist  name                0
track   title               1
        year_created        0
        duration            0
        listens             0
        favorites           0
        bit_rate            0
        interest            0
        license            87
        genre_top       56976
        genres_all          0
album   title            1025
        tracks              0
        listens             0
set     split               0
        subset              0
dtype: int64

In [310]:
# drop rows with missing title (album) as it is not possible to retreive it 
tracks_df.drop(index = tracks_df[tracks_df['album','title'].isnull()].index, inplace=True)
# drop single row with missing values in title (track)
tracks_df.drop(index = tracks_df[tracks_df['track','title'].isnull()].index, inplace=True)

In [311]:
barchart=px.bar(
data_frame=tracks_df['track'],
x='year_created',
y='listens',
color='year_created',
opacity=0.9,
orientation='v',
barmode='relative',
title="tracks per top-genre")
barchart

# <span style="color:red">TODO:</span>  REPLACE MISSING VALUES IN GENRES_ALL AND DECIDE IF IT'S WORTH IT TO KEEP LICENSE 

## ** Generes ** 

- Information about features

<b>top_level</b>: assigns a values based on the genre hierarchy. The lowest the number the oldest the genre (hence at the top of the hierarchy). Values with high number i.e. [genre_id: 1; title: Avant-Grade; top_level: 38] means that is derivative genre.

<b>parent</b>: every genre has its top_level "number", genres at the top of the hierarchical tree have parent = 0, while the derivatives genres have parent number = to the top_level of the genre they derive from.


In [312]:
genres.head()

Unnamed: 0_level_0,#tracks,parent,title,top_level
genre_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,8693,38,Avant-Garde,38
2,5271,0,International,2
3,1752,0,Blues,3
4,4126,0,Jazz,4
5,4106,0,Classical,5


In [313]:
genres.shape

(163, 4)

In [314]:
# check missing values
genres.isnull().sum()

#tracks      0
parent       0
title        0
top_level    0
dtype: int64

In [315]:
genres['top_level'].unique()

array([  38,    2,    3,    4,    5,   20,    8,    9,   10,   14,   12,
         13,   15,   17, 1235,   21])

In [316]:
genres['parent'].unique()

array([  38,    0,   20,   14,    6, 1235,   12,   17,    4,   15,   65,
          2,   45,    9,   25,   10,   92,   21,   53,   31,   26,   13,
         86,    5,  297,  109,   19,  182,  181,   85,  468,  651,   46,
        130,   18,    3,   79,   16,  102,  763])

In [317]:
# grouping genres 
rootGen_lst = genres['title'][genres['parent']== 0].tolist()
rootTop_lev= genres['top_level'][genres['parent']== 0].tolist()
root_gen ={}

def GroupGenres(gen_lst, lev_lst, dict_out):
    for gen,level in zip(gen_lst,lev_lst):
        dict_out[level]=gen
    return dict_out

def GenTopGenres(x):
    for lev in rootTop_lev:
        for rows in x[x['top_level']==lev]:
            x.loc[x['top_level']==lev,'title_top'] = root_gen[lev]
        
print(GroupGenres(rootGen_lst, rootTop_lev,root_gen))
GenTopGenres(genres)
    

{2: 'International', 3: 'Blues', 4: 'Jazz', 5: 'Classical', 8: 'Old-Time / Historic', 9: 'Country', 10: 'Pop', 12: 'Rock', 13: 'Easy Listening', 14: 'Soul-RnB', 15: 'Electronic', 17: 'Folk', 20: 'Spoken', 21: 'Hip-Hop', 38: 'Experimental', 1235: 'Instrumental'}


In [318]:
genres.head()

Unnamed: 0_level_0,#tracks,parent,title,top_level,title_top
genre_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,8693,38,Avant-Garde,38,Experimental
2,5271,0,International,2,International
3,1752,0,Blues,3,Blues
4,4126,0,Jazz,4,Jazz
5,4106,0,Classical,5,Classical


In [319]:
fig = px.sunburst(genres, path=['title_top', 'title'], color='title_top', title='Genres parent/child') 
fig.show()

In [354]:
# barchart in plotly - Top genres
barchart = px.bar(
data_frame=genres,
x='title_top',
y='#tracks',
color='title_top',
opacity=0.9,
orientation='v',
barmode='relative',
title="tracks per top-genre")
barchart

In [321]:
barchart = px.bar(
data_frame=genres,
x='title',
y='#tracks',
color='title',
opacity=0.9,
orientation='v',
barmode='relative',
title="tracks per genre")
barchart

# ** Features **

<b>UNDERSTANDING FEATURES FOR SIGNAL RECOGNITION:</b>

1. <b>zcr</b>:  ZERO-CROSSING-RATE is the rate at which a signal changes from positive to zero to negative or from negative to zero to positive --> useful for classifying percussive sounds.

2. <b>tonnetz</b>:  is a conceptual lattice diagram representing tonal space; Tonnetz can be used to show traditional harmonic relationships in European classical music;

3. <b>chroma_cens</b>:  relates to the twelve different pitch classes; are a powerful tool for analyzing music whose pitches can be meaningfully categorized --> MAIN CHARACT: they capture harmonic and melodic characteristics of music, while being robust to changes in timbre and instrumentation;

4. <b>spectral_bandwidth</b>: is the wavelength interval over which the magnitude of all spectral components is equal to or greater than a specified fraction of the magnitude of the component having the maximum value;

5.  <b>spectral_centroid</b>: is a measure used in digital signal processing to characterise a spectrum. It indicates where the center of mass of the spectrum is located. Perceptually, it has a robust connection with the impression of brightness of a sound;

6. <b>spectral roll-off</b>: The roll-off frequency is defined as the frequency under which some percentage (cutoff) of the total energy of the spectrum is contained. The roll-off frequency can be used to distinguish between harmonic (below roll-off) and noisy sounds (above roll-off);

7. <b>spectral_contrast </b>: is defined as the decibel difference between peaks and valleys in the spectrum.

In [322]:
print('{1} features for {0} tracks'.format(*features.shape))
columns = ['mfcc', 'chroma_cens', 'tonnetz', 'spectral_contrast']
columns.append(['spectral_centroid', 'spectral_bandwidth', 'spectral_rolloff'])
columns.append(['rmse', 'zcr'])
for column in columns:
    ipd.display(features[column].head().style.format('{:.2f}'))

518 features for 106574 tracks


statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,median,median,median,median,median,median,median,median,median,median,median,median,median,median,median,median,median,median,median,median,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,min,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std
number,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,16,17,18,19,20
track_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2,Unnamed: 82_level_2,Unnamed: 83_level_2,Unnamed: 84_level_2,Unnamed: 85_level_2,Unnamed: 86_level_2,Unnamed: 87_level_2,Unnamed: 88_level_2,Unnamed: 89_level_2,Unnamed: 90_level_2,Unnamed: 91_level_2,Unnamed: 92_level_2,Unnamed: 93_level_2,Unnamed: 94_level_2,Unnamed: 95_level_2,Unnamed: 96_level_2,Unnamed: 97_level_2,Unnamed: 98_level_2,Unnamed: 99_level_2,Unnamed: 100_level_2,Unnamed: 101_level_2,Unnamed: 102_level_2,Unnamed: 103_level_2,Unnamed: 104_level_2,Unnamed: 105_level_2,Unnamed: 106_level_2,Unnamed: 107_level_2,Unnamed: 108_level_2,Unnamed: 109_level_2,Unnamed: 110_level_2,Unnamed: 111_level_2,Unnamed: 112_level_2,Unnamed: 113_level_2,Unnamed: 114_level_2,Unnamed: 115_level_2,Unnamed: 116_level_2,Unnamed: 117_level_2,Unnamed: 118_level_2,Unnamed: 119_level_2,Unnamed: 120_level_2,Unnamed: 121_level_2,Unnamed: 122_level_2,Unnamed: 123_level_2,Unnamed: 124_level_2,Unnamed: 125_level_2,Unnamed: 126_level_2,Unnamed: 127_level_2,Unnamed: 128_level_2,Unnamed: 129_level_2,Unnamed: 130_level_2,Unnamed: 131_level_2,Unnamed: 132_level_2,Unnamed: 133_level_2,Unnamed: 134_level_2,Unnamed: 135_level_2,Unnamed: 136_level_2,Unnamed: 137_level_2,Unnamed: 138_level_2,Unnamed: 139_level_2,Unnamed: 140_level_2
2,3.86,1.54,0.0,0.33,0.12,-0.34,-0.26,0.15,0.41,-0.16,-0.03,0.43,-0.23,-0.3,-0.19,-0.05,-0.15,-0.0,0.08,0.0,28.66,215.54,52.42,103.29,54.6,85.16,37.84,58.17,30.03,39.13,27.74,37.24,34.15,33.54,30.84,28.61,32.68,22.62,27.04,21.43,-163.77,116.7,-41.75,29.14,-15.05,18.88,-8.92,12.0,-4.25,1.36,-2.68,-0.79,-6.92,-3.66,1.47,0.2,4.0,-2.11,0.12,-5.79,-143.59,124.86,-43.52,28.89,-13.5,19.18,-7.83,11.58,-3.64,1.09,-2.27,-0.95,-6.49,-3.21,1.11,0.03,3.86,-1.7,0.0,-5.59,-504.89,-0.0,-115.32,-51.57,-97.88,-41.52,-61.75,-39.68,-54.22,-42.56,-49.2,-35.53,-38.51,-33.41,-31.21,-32.89,-28.14,-32.31,-27.95,-34.33,-1.75,-1.19,0.32,0.04,-0.26,0.03,-0.27,0.07,-0.17,-0.1,-0.16,0.26,-0.1,0.03,0.02,-0.1,0.02,-0.17,0.07,-0.15,97.81,38.57,22.58,20.77,19.87,20.3,14.63,12.18,9.4,10.74,10.06,8.6,9.28,9.25,8.52,8.56,7.65,7.25,7.08,7.39
3,4.3,1.4,0.11,-0.21,0.03,-0.02,0.15,0.05,0.03,-0.06,0.51,0.37,0.21,0.1,0.48,0.27,0.11,0.09,0.31,0.06,29.38,207.7,76.74,137.25,53.94,105.26,55.66,59.43,36.57,40.32,26.74,49.43,25.32,20.49,37.79,27.83,32.36,29.4,30.73,31.93,-159.0,120.16,-33.23,47.34,-6.25,31.41,-5.26,11.62,-1.6,5.13,-3.42,6.95,-4.18,-3.53,0.27,-2.27,1.09,-2.34,0.47,-1.55,-140.04,128.24,-33.95,46.59,-6.29,33.55,-5.44,12.08,-1.31,5.82,-2.97,5.82,-4.06,-2.81,-0.63,-1.77,1.09,-2.19,0.63,-1.28,-546.27,-18.52,-85.01,-12.55,-87.05,-26.99,-61.85,-33.42,-47.01,-31.35,-46.1,-24.69,-35.11,-34.73,-34.03,-35.29,-27.16,-28.57,-29.91,-39.94,-1.78,-1.17,0.3,-0.03,0.05,-0.01,0.18,-0.11,0.08,-0.29,-0.27,0.49,-0.08,-0.32,0.58,-0.13,-0.22,0.09,-0.3,0.04,111.69,41.19,19.41,22.03,19.33,19.18,12.42,10.26,9.39,10.17,8.77,10.03,6.98,7.65,9.6,7.22,8.4,7.29,7.42,8.78
5,2.62,2.42,0.44,-0.78,-0.77,-0.72,0.09,0.15,0.26,-0.61,0.1,-0.25,0.16,0.64,0.19,0.29,-0.07,0.36,0.61,0.13,-40.5,218.97,50.37,112.31,51.51,66.55,29.27,57.99,48.33,43.88,27.2,32.23,37.52,35.24,45.28,33.54,31.72,30.78,36.51,24.97,-205.44,132.22,-16.09,41.51,-7.64,16.94,-5.65,9.57,0.5,8.67,-8.27,0.59,-0.34,2.38,7.9,1.95,7.44,-1.74,0.28,-5.49,-181.02,138.25,-14.51,43.08,-8.0,17.44,-3.93,9.79,0.27,8.91,-7.98,0.26,-0.11,2.25,7.51,1.87,7.34,-1.92,0.0,-5.6,-528.7,-62.28,-87.21,-24.32,-74.06,-30.45,-59.32,-33.44,-38.41,-26.08,-46.7,-35.25,-33.57,-34.8,-22.94,-33.81,-20.74,-29.76,-37.07,-30.42,-1.49,-1.26,-0.36,-0.11,0.02,0.04,-0.51,-0.02,-0.05,-0.09,-0.17,-0.03,0.11,-0.08,0.25,-0.04,0.06,0.3,0.21,0.12,95.05,39.37,18.87,24.32,23.16,17.16,13.05,10.91,9.78,11.81,8.86,9.58,8.9,8.14,8.2,7.78,7.13,7.54,8.45,7.33
10,5.08,1.16,2.1,1.37,-0.2,-0.35,-0.53,0.56,0.28,-0.15,-0.05,-0.19,0.02,0.11,0.18,-0.12,-0.03,-0.15,0.11,0.04,20.2,235.2,60.41,78.47,52.21,58.24,21.6,42.96,27.95,36.47,24.78,36.5,17.19,27.24,22.88,28.58,26.52,22.58,21.73,24.47,-135.86,157.04,-53.45,17.2,6.87,13.93,-11.75,8.36,-5.13,0.23,-5.42,1.68,-6.22,1.84,-4.1,0.78,-0.56,-1.02,-3.81,-0.68,-113.09,162.58,-59.37,18.4,5.17,15.09,-12.18,9.03,-4.42,0.4,-5.51,1.44,-5.89,1.79,-3.71,0.43,-0.35,-0.94,-3.77,-0.47,-537.59,0.0,-130.03,-50.58,-45.11,-45.51,-44.85,-43.02,-44.33,-40.17,-33.5,-27.47,-35.83,-27.61,-31.05,-29.61,-24.42,-27.49,-31.34,-25.62,-2.15,-0.95,1.33,-0.56,0.2,-0.33,0.03,-0.52,-0.32,-0.14,0.12,0.03,-0.17,-0.05,-0.26,0.08,0.0,0.05,-0.05,-0.08,102.74,44.41,29.07,14.01,15.49,16.69,10.71,11.68,9.72,11.37,8.29,7.99,7.08,6.97,7.07,7.27,7.05,6.93,6.43,6.19
20,11.88,4.09,0.0,1.52,0.18,0.34,0.37,0.07,-0.02,0.03,0.25,0.13,0.08,0.04,-0.05,0.06,0.32,0.35,0.69,0.43,-6.42,177.52,77.09,88.49,63.75,58.96,42.6,43.25,26.63,39.85,31.51,32.32,31.28,32.22,24.53,27.89,25.16,32.53,37.03,38.72,-135.14,114.81,12.35,19.76,18.67,19.64,3.57,12.12,-2.29,8.84,-0.81,4.08,0.21,3.88,-0.24,0.39,-0.57,2.78,2.43,3.03,-129.96,118.39,12.15,17.8,19.3,19.55,3.73,12.41,-1.77,8.9,-1.03,3.96,0.17,3.82,-0.12,0.65,-0.37,2.52,1.84,2.86,-484.6,-6.12,-53.16,-18.94,-37.71,-19.12,-33.69,-18.64,-35.9,-23.49,-31.45,-21.81,-25.53,-24.74,-27.98,-31.9,-29.1,-25.42,-22.94,-29.73,-2.4,-1.43,-0.04,0.92,-0.31,0.1,-0.09,-0.12,-0.24,-0.07,0.22,0.1,0.02,-0.01,-0.01,-0.15,-0.13,0.15,0.49,0.16,54.85,22.19,15.86,13.82,12.82,9.33,8.71,8.33,8.36,7.88,7.71,6.5,6.85,6.93,7.02,6.98,6.81,7.52,7.1,7.03


statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,max,max,max,max,max,max,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,median,median,median,median,median,median,median,median,median,median,median,median,min,min,min,min,min,min,min,min,min,min,min,min,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,skew,std,std,std,std,std,std,std,std,std,std,std,std
number,01,02,03,04,05,06,07,08,09,10,11,12,01,02,03,04,05,06,07,08,09,10,11,12,01,02,03,04,05,06,07,08,09,10,11,12,01,02,03,04,05,06,07,08,09,10,11,12,01,02,03,04,05,06,07,08,09,10,11,12,01,02,03,04,05,06,07,08,09,10,11,12,01,02,03,04,05,06,07,08,09,10,11,12
track_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2,Unnamed: 82_level_2,Unnamed: 83_level_2,Unnamed: 84_level_2
2,7.18,5.23,0.25,1.35,1.48,0.53,1.48,2.69,0.87,1.34,1.35,1.24,0.69,0.57,0.6,0.63,0.57,0.44,0.49,0.5,0.57,0.58,0.62,0.59,0.47,0.37,0.24,0.23,0.22,0.22,0.23,0.25,0.2,0.18,0.2,0.32,0.48,0.39,0.25,0.24,0.23,0.23,0.23,0.25,0.2,0.17,0.2,0.31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.0,-1.81,-0.35,-0.44,-0.57,-0.44,-0.15,-0.66,0.1,0.57,0.36,-0.44,0.11,0.09,0.09,0.08,0.07,0.08,0.08,0.07,0.08,0.09,0.09,0.1
3,1.89,0.76,0.35,2.3,1.65,0.07,1.37,1.05,0.11,0.62,1.04,1.29,0.68,0.58,0.58,0.58,0.45,0.46,0.54,0.66,0.51,0.53,0.6,0.55,0.23,0.23,0.23,0.22,0.22,0.24,0.37,0.42,0.31,0.24,0.26,0.23,0.23,0.23,0.21,0.2,0.23,0.26,0.39,0.44,0.31,0.24,0.26,0.23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.64,0.49,0.79,1.27,-0.94,-0.89,-1.09,-0.86,-0.46,0.39,0.44,0.37,0.08,0.08,0.1,0.09,0.08,0.1,0.08,0.1,0.09,0.08,0.08,0.07
5,0.53,-0.08,-0.28,0.69,1.94,0.88,-0.92,-0.93,0.67,1.04,0.27,1.13,0.61,0.65,0.49,0.45,0.47,0.45,0.5,0.56,0.67,0.61,0.55,0.6,0.26,0.3,0.25,0.22,0.25,0.24,0.28,0.29,0.35,0.29,0.25,0.22,0.26,0.29,0.25,0.22,0.25,0.24,0.28,0.29,0.35,0.28,0.24,0.21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.26,0.05,-0.04,-0.17,-0.72,-0.58,-0.27,-0.07,-0.25,0.42,0.11,0.55,0.09,0.13,0.1,0.07,0.07,0.08,0.13,0.13,0.11,0.1,0.09,0.09
10,3.7,-0.29,2.2,-0.23,1.37,1.0,1.77,1.6,0.52,1.98,4.33,1.3,0.46,0.54,0.45,0.65,0.59,0.51,0.65,0.52,0.51,0.48,0.64,0.64,0.23,0.29,0.24,0.23,0.19,0.29,0.41,0.35,0.27,0.24,0.27,0.24,0.23,0.28,0.23,0.23,0.21,0.28,0.44,0.36,0.27,0.24,0.25,0.23,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,-0.29,0.0,0.36,0.21,-0.09,-0.06,-0.79,-0.89,-0.52,-0.49,1.08,0.55,0.05,0.1,0.06,0.12,0.08,0.07,0.08,0.07,0.1,0.08,0.07,0.09
20,-0.19,-0.2,0.2,0.26,0.78,0.08,-0.29,-0.82,0.04,-0.8,-0.99,-0.43,0.65,0.68,0.67,0.6,0.65,0.7,0.66,0.69,0.64,0.67,0.69,0.68,0.2,0.25,0.26,0.19,0.18,0.24,0.28,0.29,0.25,0.29,0.3,0.24,0.2,0.22,0.23,0.19,0.16,0.24,0.26,0.27,0.24,0.27,0.29,0.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.46,0.62,0.78,0.56,0.95,0.36,0.28,0.23,0.3,0.17,0.14,0.08,0.13,0.16,0.15,0.11,0.13,0.13,0.15,0.17,0.11,0.16,0.18,0.14


statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,median,median,median,median,median,median,min,min,min,min,min,min,skew,skew,skew,skew,skew,skew,std,std,std,std,std,std
number,01,02,03,04,05,06,01,02,03,04,05,06,01,02,03,04,05,06,01,02,03,04,05,06,01,02,03,04,05,06,01,02,03,04,05,06,01,02,03,04,05,06
track_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2
2,2.3,0.98,1.03,1.67,0.83,8.46,0.1,0.16,0.21,0.32,0.06,0.07,-0.0,0.02,0.01,0.07,0.01,0.02,-0.0,0.02,0.01,0.07,0.01,0.02,-0.06,-0.09,-0.19,-0.14,-0.05,-0.09,0.75,0.26,0.2,0.59,-0.18,-1.42,0.02,0.03,0.04,0.05,0.01,0.01
3,2.0,1.38,0.87,2.02,0.43,0.48,0.18,0.11,0.27,0.21,0.07,0.07,0.0,0.01,0.05,-0.03,0.02,-0.0,-0.0,0.01,0.05,-0.02,0.02,-0.0,-0.1,-0.08,-0.16,-0.3,-0.02,-0.06,0.27,-0.13,0.17,-0.99,0.57,0.56,0.03,0.02,0.05,0.06,0.01,0.02
5,10.77,0.92,-0.19,0.53,0.15,7.7,0.25,0.09,0.19,0.18,0.07,0.08,-0.01,-0.02,-0.03,0.02,0.0,-0.0,-0.01,-0.02,-0.02,0.02,0.0,-0.0,-0.13,-0.13,-0.36,-0.17,-0.04,-0.15,1.21,0.22,-0.42,-0.01,-0.2,-0.93,0.03,0.02,0.08,0.04,0.01,0.01
10,0.5,2.95,0.09,3.0,4.28,0.35,0.06,0.1,0.32,0.19,0.12,0.06,-0.02,-0.02,-0.0,-0.07,0.01,0.01,-0.02,-0.02,-0.01,-0.07,0.01,0.01,-0.11,-0.19,-0.27,-0.34,-0.05,-0.03,-0.14,-0.28,0.02,-1.09,1.16,0.25,0.02,0.03,0.09,0.07,0.02,0.01
20,1.11,4.17,0.25,0.16,0.52,0.46,0.17,0.19,0.35,0.29,0.1,0.07,0.01,0.01,-0.02,-0.08,0.01,-0.01,0.01,0.01,-0.03,-0.07,0.01,-0.01,-0.15,-0.21,-0.34,-0.39,-0.08,-0.09,0.19,-0.22,0.08,0.04,0.23,-0.21,0.03,0.04,0.11,0.1,0.02,0.02


statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,max,max,max,max,max,max,max,mean,mean,mean,mean,mean,mean,mean,median,median,median,median,median,median,median,min,min,min,min,min,min,min,skew,skew,skew,skew,skew,skew,skew,std,std,std,std,std,std,std
number,01,02,03,04,05,06,07,01,02,03,04,05,06,07,01,02,03,04,05,06,07,01,02,03,04,05,06,07,01,02,03,04,05,06,07,01,02,03,04,05,06,07,01,02,03,04,05,06,07
track_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2
2,2.27,0.45,0.76,0.08,0.01,7.25,3.26,50.83,41.39,39.33,31.51,33.35,47.27,54.69,18.01,15.36,17.13,17.16,18.09,17.62,38.27,17.58,15.03,16.84,16.99,17.82,17.26,39.83,2.67,2.3,3.39,3.75,5.48,9.66,10.45,0.89,0.49,0.61,0.37,0.43,1.69,-1.57,4.54,4.32,3.94,3.14,3.31,3.1,7.62
3,3.21,0.24,-0.01,-0.28,0.25,1.28,3.73,59.7,40.6,42.14,31.47,38.78,37.24,58.2,15.73,15.05,17.37,17.21,18.18,18.9,39.2,15.31,14.71,17.17,17.2,17.86,18.7,40.4,0.39,0.65,1.84,1.84,1.84,1.84,1.84,1.0,0.47,0.33,0.1,0.47,0.39,-1.46,4.43,4.52,4.63,3.75,4.09,3.35,7.61
5,1.48,0.36,0.46,0.08,-0.11,0.77,0.93,50.93,38.54,39.02,33.71,32.75,38.79,51.9,17.1,15.97,18.65,16.97,17.29,19.26,36.41,16.49,15.86,18.53,16.88,16.94,19.11,39.1,3.42,2.31,3.42,7.14,9.32,7.23,8.57,0.82,0.2,0.21,0.25,0.48,0.37,-1.27,4.94,4.38,4.26,3.19,3.1,3.09,8.49
10,2.24,0.74,1.61,0.46,-0.14,1.7,-0.4,58.62,37.43,45.41,28.71,33.74,46.3,52.13,19.18,14.28,15.51,16.54,20.32,18.97,34.89,18.23,13.76,15.09,16.4,20.16,17.75,36.59,4.17,4.42,6.24,3.98,10.01,9.76,11.21,1.11,0.73,0.85,0.37,0.19,1.26,-0.59,5.56,4.01,3.69,2.65,3.46,4.69,8.4
20,2.26,0.32,0.63,0.1,0.37,0.7,11.45,46.74,42.15,39.38,33.53,36.35,28.78,48.1,16.04,16.16,17.7,18.38,18.42,16.48,19.0,15.57,15.87,17.36,18.23,18.13,16.39,17.97,4.46,2.98,4.15,5.92,8.2,10.09,11.15,0.96,0.43,0.55,0.28,0.49,0.38,2.97,4.24,4.32,3.92,3.17,3.11,1.76,4.1


feature,spectral_centroid,spectral_centroid,spectral_centroid,spectral_centroid,spectral_centroid,spectral_centroid,spectral_centroid,spectral_bandwidth,spectral_bandwidth,spectral_bandwidth,spectral_bandwidth,spectral_bandwidth,spectral_bandwidth,spectral_bandwidth,spectral_rolloff,spectral_rolloff,spectral_rolloff,spectral_rolloff,spectral_rolloff,spectral_rolloff,spectral_rolloff
statistics,kurtosis,max,mean,median,min,skew,std,kurtosis,max,mean,median,min,skew,std,kurtosis,max,mean,median,min,skew,std
number,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01
track_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2,2.41,5514.05,1639.58,1503.5,0.0,1.08,719.77,3.87,3451.11,1607.47,1618.85,0.0,-0.88,436.81,0.84,9410.01,3267.8,3143.85,0.0,0.35,1300.73
3,3.52,6288.43,1763.01,1517.99,0.0,1.65,972.76,2.38,3469.18,1736.96,1686.77,0.0,0.46,486.66,2.38,10002.17,3514.62,3413.01,0.0,1.12,1650.36
5,1.32,5648.61,1292.96,1186.51,0.0,0.94,665.32,0.9,3492.74,1512.92,1591.52,0.0,-0.66,474.41,-0.24,9442.31,2773.93,2863.92,0.0,0.27,1323.47
10,9.73,5739.39,1360.03,1180.97,0.0,2.52,668.7,0.44,3962.7,1420.26,1301.81,0.0,0.88,604.89,3.62,10056.01,2603.49,2002.59,0.0,1.8,1524.4
20,2.18,5540.21,1732.97,1640.78,123.61,0.96,481.93,1.69,3556.88,2489.02,2467.1,677.7,-0.14,339.7,-0.74,9496.14,4201.35,4166.67,75.37,0.16,1495.3


feature,rmse,rmse,rmse,rmse,rmse,rmse,rmse,zcr,zcr,zcr,zcr,zcr,zcr,zcr
statistics,kurtosis,max,mean,median,min,skew,std,kurtosis,max,mean,median,min,skew,std
number,01,01,01,01,01,01,01,01,01,01,01,01,01,01
track_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3
2,2.5,14.75,3.19,2.65,0.0,1.57,2.54,5.76,0.46,0.09,0.07,0.0,2.09,0.06
3,-0.64,9.1,3.61,3.71,0.0,0.02,1.95,2.82,0.47,0.08,0.06,0.0,1.72,0.07
5,0.0,11.03,3.25,2.41,0.0,1.03,2.59,6.81,0.38,0.05,0.04,0.0,2.19,0.04
10,1.77,12.32,3.89,3.76,0.0,0.83,2.0,21.43,0.45,0.08,0.07,0.0,3.54,0.04
20,1.24,16.18,4.6,4.37,0.0,0.8,2.18,16.67,0.47,0.05,0.04,0.0,3.19,0.03


In [323]:
#featrep = ProfileReport(features)
#featrep.to_file(output_file='profiling/features-profiling.html')
#for col in features.columns: 
#    print(col)

## ECHONEST 

This dataset contains metrics about echonest songs. 

<b>Features</b>: acousticness, danceability,	energy,	instrumentalness,	liveness,	speechiness,	tempo,	valence

<b>echo_metrics</b> will be used for classification purposes. The label "genre_top" will be taken from tracks.csv and added to this dataset. 
All features are normalized with a MinMaxScaler

In [324]:
echo_metrics = echonest['echonest','audio_features'].copy()
echo_metrics.head()

Unnamed: 0_level_0,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0.416675,0.675894,0.634476,0.010628,0.177647,0.15931,165.922,0.576661
3,0.374408,0.528643,0.817461,0.001851,0.10588,0.461818,126.957,0.26924
5,0.043567,0.745566,0.70147,0.000697,0.373143,0.124595,100.26,0.621661
10,0.95167,0.658179,0.924525,0.965427,0.115474,0.032985,111.562,0.96359
134,0.452217,0.513238,0.56041,0.019443,0.096567,0.525519,114.29,0.894072


In [332]:
#normalize dataset
echo_metrics=(echo_metrics-echo_metrics.min())/(echo_metrics.max()-echo_metrics.min())
echo_metrics.head()

Unnamed: 0_level_0,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,0.418434,0.680869,0.634492,0.010649,0.159523,0.145135,0.642706,0.576663
3,0.375988,0.520349,0.817487,0.001855,0.084377,0.465638,0.479206,0.269236
5,0.04375,0.756819,0.701489,0.000698,0.364224,0.108355,0.367184,0.621664
10,0.955687,0.661557,0.924557,0.967346,0.094422,0.011296,0.414608,0.963599
134,0.454126,0.503556,0.560421,0.019481,0.074625,0.533129,0.426055,0.89408


In [333]:
# Merge the relevant columns of tracks and echo_metrics
echo_metrics = echo_metrics.merge(tracks["track"][["genre_top"]],on="track_id")
# display dataset
echo_metrics.head()

Unnamed: 0_level_0,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence,genre_top
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2,0.418434,0.680869,0.634492,0.010649,0.159523,0.145135,0.642706,0.576663,Hip-Hop
3,0.375988,0.520349,0.817487,0.001855,0.084377,0.465638,0.479206,0.269236,Hip-Hop
5,0.04375,0.756819,0.701489,0.000698,0.364224,0.108355,0.367184,0.621664,Hip-Hop
10,0.955687,0.661557,0.924557,0.967346,0.094422,0.011296,0.414608,0.963599,Pop
134,0.454126,0.503556,0.560421,0.019481,0.074625,0.533129,0.426055,0.89408,Hip-Hop


In [334]:
# check NaN values
echo_metrics.isnull().sum()

acousticness           0
danceability           0
energy                 0
instrumentalness       0
liveness               0
speechiness            0
tempo                  0
valence                0
genre_top           3774
dtype: int64

In [335]:
# remove rows with NaN values
echo_metrics = echo_metrics.dropna()

In [336]:
echo_metrics.shape

(9355, 9)

In [337]:
# Create a correlation matrix
corr_metrics = echo_metrics.corr()
corr_metrics.style.background_gradient()

Unnamed: 0,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence
acousticness,1.0,-0.178454,-0.46892,0.126698,0.060947,0.021417,-0.10559,-0.070763
danceability,-0.178454,1.0,0.015977,-0.123452,-0.144263,0.188797,-0.105448,0.443607
energy,-0.46892,0.015977,1.0,-0.003615,0.026049,-0.001357,0.230706,0.186517
instrumentalness,0.126698,-0.123452,-0.003615,1.0,-0.055617,-0.243555,0.025368,-0.144161
liveness,0.060947,-0.144263,0.026049,-0.055617,1.0,0.070916,-0.014862,-0.023683
speechiness,0.021417,0.188797,-0.001357,-0.243555,0.070916,1.0,0.035855,0.113319
tempo,-0.10559,-0.105448,0.230706,0.025368,-0.014862,0.035855,1.0,0.124182
valence,-0.070763,0.443607,0.186517,-0.144161,-0.023683,0.113319,0.124182,1.0


As shown below, some features are very small while some features are very big. In this case, taking the average values of all the features might not be immediately comparable. Variables that are measured at different scales do not contribute equally to the analysis and might end up introducing biases. We need a way to apply feature scaling and compare the data points.

In [338]:
echo_metrics.describe()

Unnamed: 0,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence
count,9355.0,9355.0,9355.0,9355.0,9355.0,9355.0,9355.0,9355.0
mean,0.536793,0.455889,0.539622,0.655731,0.174229,0.08307,0.463567,0.433223
std,0.387842,0.208042,0.283062,0.35793,0.169531,0.145945,0.14811,0.275766
min,0.0,0.0,0.0,0.0,0.0,0.000964,0.0,0.0
25%,0.106267,0.297652,0.318189,0.386345,0.080631,0.015893,0.34809,0.191425
50%,0.602851,0.445172,0.550833,0.848664,0.099382,0.029044,0.450929,0.408457
75%,0.934738,0.608557,0.786196,0.924762,0.204141,0.068721,0.561292,0.656774
max,1.0,0.99808,1.0,1.0,0.991245,1.0,0.995749,1.0


In [184]:
hiphop_metr = echo_metrics[echo_metrics['genre_top']=='Hip-Hop'].describe().loc['mean'].tolist()
hiphop_metr.pop(6)
hiphop_metr

[0.4126485444160446,
 0.6203852793732959,
 0.5621141994863733,
 0.3502692678023077,
 0.19019850873780225,
 0.2547050767587912,
 0.589433497066044,
 0.446171577236532]

In [196]:
echo_metrics['genre_top'].unique().tolist()

['Hip-Hop',
 'Pop',
 'Folk',
 'Jazz',
 'Rock',
 'Electronic',
 'International',
 'Blues',
 'Classical',
 'Old-Time / Historic',
 'Instrumental',
 'Experimental']

In [356]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

genr_lst = echo_metrics['genre_top'].unique().tolist()
colors = ["red", "green", "blue", "goldenrod", "magenta",'orange','purple','black','lightblue','coral','darkgreen','brown']

for genre,color in zip(genr_lst,colors):
    metr = echo_metrics[echo_metrics['genre_top']==genre].describe().loc['mean'].tolist()
    theta=['acousticness','danceability','energy','instrumentalness', 'liveness', 'tempo','speechiness','valence']
    
    fig = px.line_polar(echo_metrics, r=metr, theta=theta, line_close=True, title=genre,color_discrete_sequence=[color])
    fig.update_traces(fill='toself')
    fig.update_layout(
    title={
        'text': genre,
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

    fig.show()