In [1]:
import librosa
import pandas as pd
import numpy as np
import os
import pathlib
import csv
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import warnings
warnings.filterwarnings('ignore')
import utils
import fma

In [2]:
def build_dataset(tracks, electronic, experimental, folk, hiphop, instrumental, international, pop, rock):
    # Read in genre counts
    genre_counts = {"Electronic": electronic, 
                "Experimental": experimental,
                "Folk": folk,
                "Hip-Hop": hiphop,
                "Instrumental": instrumental,
                "International": international,
                "Pop": pop,
                "Rock": rock
               }
    
    # Build list of tracks to delete
    cnt = np.arange(8000)
    indeces = tracks.index
    delete_indeces = []
    for i in cnt:
        track = tracks.iloc[i, :]
        genre = track["genre_top"]
        if genre_counts[genre] > 0:
            genre_counts[genre] = genre_counts[genre] - 1
        else:
            delete_indeces.append(i)
            
    # Build trimmed copy
    indeces = indeces.delete(delete_indeces)
    new_tracks = tracks.loc[indeces, :]
    genre_check(new_tracks)
    return new_tracks

In [7]:
def extract_features(tracks, features, chroma, spectral_centroid, spectral_bandwith, spectral_rolloff, zero_crossing_rate, mfcc):
    mask = np.array([chroma, spectral_centroid, spectral_bandwith, spectral_rolloff, zero_crossing_rate], dtype=bool)

    if mfcc:
        mfcc_mask = np.ones(20, dtype=bool)
    else:
        mfcc_mask = np.zeroes(20, dtype=bool)
        
    mask = np.concatenate((mask, mfcc_mask))
    new_features = features.loc[tracks.index, mask]
    return new_features

In [27]:
new_features = extract_features(new_tracks, features, True, True, True, True, True, True)

KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Int64Index([  8056,   8208,   8256,   8259,   8261,\n            ...\n            129407, 129816, 129874, 129875, 129876],\n           dtype='int64', name='track_id', length=2267). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"

In [3]:
def genre_check(data):
    genres = ["Electronic", "Experimental", "Folk", "Hip-Hop", "Instrumental", "International", "Pop", "Rock"]
    count = len(data)
    print(f"Total: {count}")
    
    for i in genres:
        count = len(data.loc[data["genre_top"] == i, :])
        print(f"{i}: {count}")

In [29]:
# Load track metadata
full = fma.load('fma/tracks.csv')
small = full[full['set', 'subset'] <= 'small']
artists = small['artist']
tracks = small['track']
tracks = tracks.loc[]
indeces = tracks.index

In [None]:
tracks.loc[]

In [30]:
features = pd.read_csv('fma/fma_small_features.csv')
features.head()

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,000002.mp3,0.396907,0.145195,3037.644725,2690.110827,6214.647549,0.163695,-67.384201,65.155075,-10.669196,...,3.407173,-3.01447,8.196856,-3.306396,1.253358,-8.481487,-2.040749,-2.946624,0.094076,Hip-Hop
1,000005.mp3,0.424453,0.14883,2414.48564,2598.615712,5437.108789,0.100085,-106.55027,87.232758,12.141687,...,8.863916,3.438966,7.30151,0.219637,0.931891,-4.12328,-0.581884,-1.606464,1.342016,Hip-Hop
2,000010.mp3,0.397668,0.188251,2349.559292,2051.924534,4315.856035,0.148674,-17.238174,94.442032,-48.614548,...,2.23994,-6.41643,2.677109,-5.104513,-0.911099,-3.207996,3.043154,-1.036511,3.882601,Pop
3,000140.mp3,0.295099,0.069218,1746.32228,2484.493384,3863.175065,0.044322,-278.575378,101.530739,36.427238,...,-1.6147,-6.83642,2.879915,-8.271725,1.239001,-5.205992,-0.754437,-6.319377,-1.172379,Folk
4,000141.mp3,0.262042,0.101358,1654.237111,2036.500245,3315.637916,0.061836,-208.56926,116.633934,-1.723711,...,-3.396803,-4.81087,3.130367,-2.126665,5.053833,-0.17009,3.087436,-8.665928,-1.098168,Folk


In [32]:
#features.index = tracks.index
len(features.index)

7997

In [23]:
w = interactive(build_dataset, {'manual': True}, tracks=fixed(tracks),
                electronic=widgets.IntSlider(min=0, max=1000),
                experimental=widgets.IntSlider(min=0, max=1000),
                folk=widgets.IntSlider(min=0, max=1000),
                hiphop=widgets.IntSlider(min=0, max=1000),
                instrumental=widgets.IntSlider(min=0, max=1000),
                international=widgets.IntSlider(min=0, max=1000),
                pop=widgets.IntSlider(min=0, max=1000),
                rock=widgets.IntSlider(min=0, max=1000));
w.children[8].description="Build dataset"
display(w)

interactive(children=(IntSlider(value=0, description='electronic', max=1000), IntSlider(value=0, description='…

In [26]:
new_tracks = w.result
new_tracks

Unnamed: 0_level_0,bit_rate,comments,composer,date_created,date_recorded,duration,favorites,genre_top,genres,genres_all,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
10,192000,0,Kurt Vile,2008-11-25 17:49:06,2008-11-26,161,178,Pop,[10],[10],,54881,en,Attribution-NonCommercial-NoDerivatives (aka M...,50135,,1,,[],Freeway
140,128000,0,,2008-11-26 01:44:07,2008-11-26,253,5,Folk,[17],[17],,1593,en,Attribution-Noncommercial-No Derivative Works ...,1299,,2,,[],Queen Of The Wires
141,128000,0,,2008-11-26 01:44:10,2008-11-26,182,1,Folk,[17],[17],,839,en,Attribution-Noncommercial-No Derivative Works ...,725,,4,,[],Ohio
148,256000,0,,2008-11-26 01:44:28,2002-08-01,138,1,Experimental,[1],"[1, 38]",,2437,en,Attribution-Noncommercial-Share Alike 3.0 Unit...,1831,,2,,[],Blackout 2
182,256000,0,,2008-11-26 01:46:34,2008-11-26,228,19,Rock,[27],"[27, 12]",,5990,en,Attribution-Noncommercial-No Derivative Works ...,5547,,3,,[],Jules Lost His Jewels
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129407,192000,0,,2015-12-16 13:49:13,NaT,440,5,International,[602],"[602, 2, 79]",,10159,,Creative Commons Attribution-NonCommercial-NoD...,8008,,8,,[],Mandingo Tribe
129816,320000,0,,2015-12-29 16:56:06,NaT,222,12,International,[77],"[2, 77]",,27950,,Creative Commons Attribution-NonCommercial-NoD...,17616,,1,,[],Fam Martinia Dou
129874,320000,0,,2015-12-31 09:00:12,NaT,274,0,International,[79],"[2, 79]",,765,,Creative Commons Attribution-NonCommercial-NoD...,572,,1,,[],I
129875,320000,0,,2015-12-31 09:00:14,NaT,436,1,International,[79],"[2, 79]",,623,,Creative Commons Attribution-NonCommercial-NoD...,439,,2,,[],Enchant Down Babylon


In [15]:
w.children

(IntSlider(value=0, description='electronic', max=1000),
 IntSlider(value=0, description='experimental', max=1000),
 IntSlider(value=0, description='folk', max=1000),
 IntSlider(value=0, description='hiphop', max=1000),
 IntSlider(value=0, description='instrumental', max=1000),
 IntSlider(value=0, description='international', max=1000),
 IntSlider(value=0, description='pop', max=1000),
 IntSlider(value=0, description='rock', max=1000),
 Button(description='Run Interact', style=ButtonStyle()),
 Output())

In [8]:
z = interactive(build_dataset, {'manual': True}, tracks=fixed(tracks),
                electronic=widgets.IntSlider(min=0, max=1000),
                experimental=widgets.IntSlider(min=0, max=1000),
                folk=widgets.IntSlider(min=0, max=1000),
                hiphop=widgets.IntSlider(min=0, max=1000),
                instrumental=widgets.IntSlider(min=0, max=1000),
                international=widgets.IntSlider(min=0, max=1000),
                pop=widgets.IntSlider(min=0, max=1000),
                rock=widgets.IntSlider(min=0, max=1000));
w.children[8].description="Build dataset"
display(w)

interactive(children=(IntSlider(value=0, description='electronic', max=1000), IntSlider(value=0, description='…