In [1]:
# Import necessary packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import sklearn

# Import Librosa for processing audio data
import librosa
import librosa.display
import IPython.display as ipd
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Create a shortcut direction for accessing data easily
general_path = '/Users/user/Documents/Đồ án tốt nghiệp/Music Data/fma_metadata'


In [3]:
# Import data for model
tracks = pd.read_csv(f'{general_path}/tracks.csv', header=2)
features = pd.read_csv(f'{general_path}/features.csv')
raw_tracks = pd.read_csv(f'{general_path}/raw_tracks.csv')


## Preprocessing track data

In [4]:
# Filter out data which are in medium track dataset
data = tracks.copy()
data = data[data['Unnamed: 32'] == 'medium']
data

Unnamed: 0,track_id,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 43,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52
1,3,0,2008-11-26 01:44:45,2009-01-05 00:00:00,,4,1,<p></p>,6073,,...,,1470,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,514,,4,,[],Electric Ave
9,134,0,2008-11-26 01:44:45,2009-01-05 00:00:00,,4,1,<p></p>,6073,,...,,1126,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,943,,5,,[],Street Music
11,136,1,2008-11-26 01:49:19,2009-01-07 00:00:00,,0,58,<p>A couple of unreleased mp3s from the fellas...,3331,,...,,1948,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1498,,0,,[],Peel Back The Mountain Sky
14,139,0,2008-11-26 01:49:57,2009-01-16 00:00:00,,1,60,"<p>A full ensamble of strings, drums, electron...",1304,,...,,702,en,Attribution-Noncommercial-No Derivative Works ...,582,,2,,[],CandyAss
54,181,0,2008-11-26 01:52:15,2007-04-13 00:00:00,,0,79,<p>This Human Ear Music reissue compiles a “Be...,1339,,...,,1736,en,Attribution-Noncommercial-No Derivative Works ...,1339,,1,,[],Gopacapulco
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106551,155297,0,2017-03-30 09:50:32,2017-03-30 00:00:00,,1,22935,<p>The stars will show you the way in this unp...,6196,,...,,1463,,Attribution-NonCommercial,1049,,9,,[],Nebula Reborn
106552,155298,0,2017-03-30 10:45:13,,,0,22936,,2454,,...,,706,,Attribution,590,,2,,[],An Idiot Abroad
106560,155306,0,2017-03-30 10:45:13,,,0,22936,,2454,,...,,497,,Attribution,435,,1,,[],Tiny Man
106561,155307,0,2017-03-30 12:51:05,2017-03-27 00:00:00,Scott Williams,0,22937,<p>Scott welcomes Russian revolutionary avant ...,1206,Scott Williams,...,,630,,Creative Commons Attribution-NonCommercial-NoD...,571,,1,,[],Kolka


In [6]:
# Drop unnecessary columns and change column names in track dataset
data1 = data.drop(data.iloc[:,1:-1], axis=1)
data1.rename(columns={'Unnamed: 52':'song'}, inplace=True)
data1

Unnamed: 0,track_id,song
1,3,Electric Ave
9,134,Street Music
11,136,Peel Back The Mountain Sky
14,139,CandyAss
54,181,Gopacapulco
...,...,...
106551,155297,Nebula Reborn
106552,155298,An Idiot Abroad
106560,155306,Tiny Man
106561,155307,Kolka


In [327]:
# Make a copied dataset and filter out necessary columns of raw_track dataset
cat_tracks = raw_tracks.copy()
cat_tracks = cat_tracks.loc[:, ['track_id','track_genres']]
cat_tracks

Unnamed: 0,track_id,track_genres
0,2,"[{'genre_id': '21', 'genre_title': 'Hip-Hop', ..."
1,3,"[{'genre_id': '21', 'genre_title': 'Hip-Hop', ..."
2,5,"[{'genre_id': '21', 'genre_title': 'Hip-Hop', ..."
3,10,"[{'genre_id': '10', 'genre_title': 'Pop', 'gen..."
4,20,"[{'genre_id': '76', 'genre_title': 'Experiment..."
...,...,...
109722,155316,"[{'genre_id': '25', 'genre_title': 'Punk', 'ge..."
109723,155317,"[{'genre_id': '25', 'genre_title': 'Punk', 'ge..."
109724,155318,"[{'genre_id': '25', 'genre_title': 'Punk', 'ge..."
109725,155319,"[{'genre_id': '25', 'genre_title': 'Punk', 'ge..."


In [328]:
# Inner join track and raw_track datasets into one with track_id is interception
genre_df = data1.merge(cat_tracks, how='inner', on='track_id')
genre_df

Unnamed: 0,track_id,song,track_genres
0,3,Electric Ave,"[{'genre_id': '21', 'genre_title': 'Hip-Hop', ..."
1,134,Street Music,"[{'genre_id': '21', 'genre_title': 'Hip-Hop', ..."
2,136,Peel Back The Mountain Sky,"[{'genre_id': '45', 'genre_title': 'Loud-Rock'..."
3,139,CandyAss,"[{'genre_id': '17', 'genre_title': 'Folk', 'ge..."
4,181,Gopacapulco,"[{'genre_id': '27', 'genre_title': 'Lo-Fi', 'g..."
...,...,...,...
16995,155297,Nebula Reborn,"[{'genre_id': '18', 'genre_title': 'Soundtrack..."
16996,155298,An Idiot Abroad,"[{'genre_id': '17', 'genre_title': 'Folk', 'ge..."
16997,155306,Tiny Man,"[{'genre_id': '17', 'genre_title': 'Folk', 'ge..."
16998,155307,Kolka,"[{'genre_id': '1', 'genre_title': 'Avant-Garde..."


In [None]:
# Write a function for taking the main genre for each song
def get_genre(df):
    df = df.copy()
    df_cat = pd.DataFrame(columns=['genres'])
    for i in range(len(df['track_genres'])):
        row = df['track_genres'][i]
        row_split = row.split(':')
        loc_row = row_split[2]
        loc_row = loc_row.split(',')
        loc_row_1 = loc_row[0]
        loc_row_2 = loc_row_1.replace(' ', '')
        loc_row_3 = loc_row_2.replace("'", '')
        df_cat = df_cat.append({'genres': loc_row_3}, ignore_index=True)
    
    return df_cat

In [329]:
# Apply the function and take the result
genre = get_genre(genre_df)
genre

Unnamed: 0,genres
0,Hip-Hop
1,Hip-Hop
2,Loud-Rock
3,Folk
4,Lo-Fi
...,...
16995,Soundtrack
16996,Folk
16997,Folk
16998,Avant-Garde


In [331]:
# Drop the track_genres column and replace the genres column for the dataset
genre_df = genre_df.drop('track_genres', axis=1)
genre_df = pd.concat([genre_df, genre], axis=1)
genre_df

Unnamed: 0,track_id,song,genres
0,3,Electric Ave,Hip-Hop
1,134,Street Music,Hip-Hop
2,136,Peel Back The Mountain Sky,Loud-Rock
3,139,CandyAss,Folk
4,181,Gopacapulco,Lo-Fi
...,...,...,...
16995,155297,Nebula Reborn,Soundtrack
16996,155298,An Idiot Abroad,Folk
16997,155306,Tiny Man,Folk
16998,155307,Kolka,Avant-Garde


## Preprocessing feature data

In [332]:
# Show out the first rows of feature dataset
features.head()

Unnamed: 0,feature,chroma_cens,chroma_cens.1,chroma_cens.2,chroma_cens.3,chroma_cens.4,chroma_cens.5,chroma_cens.6,chroma_cens.7,chroma_cens.8,...,tonnetz.39,tonnetz.40,tonnetz.41,zcr,zcr.1,zcr.2,zcr.3,zcr.4,zcr.5,zcr.6
0,statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,kurtosis,max,mean,median,min,skew,std
1,number,01,02,03,04,05,06,07,08,09,...,04,05,06,01,01,01,01,01,01,01
2,track_id,,,,,,,,,,...,,,,,,,,,,
3,2,7.1806526184e+00,5.2303090096e+00,2.4932080507e-01,1.3476201296e+00,1.4824777842e+00,5.3137123585e-01,1.4815930128e+00,2.6914546490e+00,8.6686819792e-01,...,5.4125156254e-02,1.2225749902e-02,1.2110591866e-02,5.7588901520e+00,4.5947265625e-01,8.5629448295e-02,7.1289062500e-02,0.0000000000e+00,2.0898721218e+00,6.1448108405e-02
4,3,1.8889633417e+00,7.6053929329e-01,3.4529656172e-01,2.2952005863e+00,1.6540306807e+00,6.7592434585e-02,1.3668476343e+00,1.0540937185e+00,1.0810308903e-01,...,6.3831120729e-02,1.4211839065e-02,1.7740072682e-02,2.8246941566e+00,4.6630859375e-01,8.4578499198e-02,6.3964843750e-02,0.0000000000e+00,1.7167237997e+00,6.9330163300e-02


In [335]:
# Make a list of column name and a copied feature dataset
feature_columns = features.columns[1:].tolist()
feature_df = features.copy()

# Change the column position from row 2 to column
header_row = 2
feature_df.columns = feature_df.iloc[header_row]

# Drop first 3 rows in feature dataset
feature_df.drop([0,1,2], axis=0, inplace=True)
feature_df

2,track_id,NaN,NaN.1,NaN.2,NaN.3,NaN.4,NaN.5,NaN.6,NaN.7,NaN.8,...,NaN.9,NaN.10,NaN.11,NaN.12,NaN.13,NaN.14,NaN.15,NaN.16,NaN.17,NaN.18
3,2,7.1806526184e+00,5.2303090096e+00,2.4932080507e-01,1.3476201296e+00,1.4824777842e+00,5.3137123585e-01,1.4815930128e+00,2.6914546490e+00,8.6686819792e-01,...,5.4125156254e-02,1.2225749902e-02,1.2110591866e-02,5.7588901520e+00,4.5947265625e-01,8.5629448295e-02,7.1289062500e-02,0.0000000000e+00,2.0898721218e+00,6.1448108405e-02
4,3,1.8889633417e+00,7.6053929329e-01,3.4529656172e-01,2.2952005863e+00,1.6540306807e+00,6.7592434585e-02,1.3668476343e+00,1.0540937185e+00,1.0810308903e-01,...,6.3831120729e-02,1.4211839065e-02,1.7740072682e-02,2.8246941566e+00,4.6630859375e-01,8.4578499198e-02,6.3964843750e-02,0.0000000000e+00,1.7167237997e+00,6.9330163300e-02
5,5,5.2756297588e-01,-7.7654317021e-02,-2.7961030602e-01,6.8588310480e-01,1.9375696182e+00,8.8083887100e-01,-9.2319184542e-01,-9.2723226547e-01,6.6661673784e-01,...,4.0730185807e-02,1.2690781616e-02,1.4759079553e-02,6.8084154129e+00,3.7500000000e-01,5.3114086390e-02,4.1503906250e-02,0.0000000000e+00,2.1933031082e+00,4.4860601425e-02
6,10,3.7022454739e+00,-2.9119303823e-01,2.1967420578e+00,-2.3444947600e-01,1.3673638105e+00,9.9841135740e-01,1.7706941366e+00,1.6045658588e+00,5.2121698856e-01,...,7.4357867241e-02,1.7951935530e-02,1.3921394013e-02,2.1434211731e+01,4.5214843750e-01,7.7514506876e-02,7.1777343750e-02,0.0000000000e+00,3.5423245430e+00,4.0800448507e-02
7,20,-1.9383698702e-01,-1.9852678478e-01,2.0154602826e-01,2.5855624676e-01,7.7520370483e-01,8.4794059396e-02,-2.8929358721e-01,-8.1641042233e-01,4.3850939721e-02,...,9.5002755523e-02,2.2492416203e-02,2.1355332807e-02,1.6669036865e+01,4.6972656250e-01,4.7224905342e-02,4.0039062500e-02,9.7656250000e-04,3.1898307800e+00,3.0992921442e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106572,155316,-0.490129,0.463834,2.32197,-0.0843522,1.66291,2.11519,-0.237794,5.69544,0.830353,...,0.12841,0.0225471,0.0198161,4.44826,0.172852,0.0287726,0.0283203,0.00390625,0.955388,0.0123847
106573,155317,-0.461559,-0.229601,-0.496632,-0.422033,0.130612,-0.263825,-0.628103,-0.0826865,-0.229483,...,0.132964,0.0235476,0.0265269,3.27061,0.196289,0.031116,0.027832,0.00244141,1.28306,0.0190591
106574,155318,0.552473,-0.110498,-0.532014,0.263131,-0.224011,-0.530972,1.71353,1.41844,1.3252,...,0.108324,0.0175401,0.0204714,2.35673,0.212891,0.0384498,0.0371094,0.00341797,0.828569,0.0179043
106575,155319,-0.176901,0.187208,-0.0506639,0.368843,0.0660052,-0.857354,-0.78086,0.626281,-0.630938,...,0.0883108,0.0183284,0.017936,6.1886,0.16748,0.04148,0.0380859,0.00488281,1.81874,0.0201334


In [337]:
# Merge feature dataset into song dataset
merge_df = genre_df.merge(feature_df, how='inner', on='track_id')
merge_df

Unnamed: 0,track_id,song,genres,NaN,NaN.1,NaN.2,NaN.3,NaN.4,NaN.5,NaN.6,...,NaN.7,NaN.8,NaN.9,NaN.10,NaN.11,NaN.12,NaN.13,NaN.14,NaN.15,NaN.16
0,1298,Weekend Warrior,Rock,0.120188,0.759824,0.90748,0.687974,-0.25387,-0.0629012,0.454025,...,0.0873343,0.0164366,0.0178857,-0.0897113,0.15332,0.0645135,0.0634766,0,-0.024148,0.0259038
1,1311,Feeling so good?,Singer-Songwriter,-1.00078,-1.05428,0.3269,-0.234172,5.78894,4.25101,-1.22807,...,0.1115,0.0252764,0.0229601,38.6622,0.458008,0.0321172,0.0239258,0,5.48004,0.0358097
2,1339,Emotional Accident,Electronic,-0.472263,-0.695443,-0.397396,-0.980368,0.0919193,-0.917704,-1.18374,...,0.112962,0.0232879,0.0260025,123.292,0.700684,0.0392488,0.0327148,0,10.0792,0.0428257
3,1354,Fools Want Noise,Rock,1.58101,0.0119988,0.999875,-0.0735124,-0.291198,-0.258575,0.399998,...,0.0585838,0.0140475,0.014702,27.9508,0.540527,0.0801584,0.0771484,0,3.44967,0.0350803
4,1356,Elevator Shaft,Rock,2.13244,0.858505,1.35707,1.79397,0.129197,0.214749,0.249027,...,0.0767455,0.0142849,0.0208437,-0.470869,0.171387,0.0693071,0.0649414,0,0.565537,0.0350647
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16786,155297,Nebula Reborn,Soundtrack,-0.00766765,0.104604,-0.511487,0.857308,0.266202,-0.355186,-0.27364,...,0.141844,0.0240803,0.0310199,70.6048,0.850586,0.0340998,0.0209961,0.00195312,8.38092,0.0888278
16787,155298,An Idiot Abroad,Folk,-0.956019,-0.96121,-0.447053,-0.689531,-0.633071,-0.430116,-0.0311466,...,0.177286,0.028411,0.0256306,45.4458,0.303711,0.0240583,0.0195312,0,5.78619,0.0234283
16788,155306,Tiny Man,Folk,0.644815,-0.879404,-1.14923,-0.655986,-0.848881,-0.67224,0.193149,...,0.125615,0.0271096,0.0288151,17.0762,0.24707,0.0315423,0.0258789,0.00488281,3.17908,0.0220229
16789,155307,Kolka,Avant-Garde,-0.446937,1.235,-0.250855,-0.612224,-1.19739,-1.11823,0.71652,...,0.0998937,0.0191405,0.0330451,6.45891,0.344727,0.0691994,0.0634766,0.00195312,1.99608,0.0430976


In [339]:
# Filter out features of dataset
feature_columns = merge_df.iloc[,3:]

# Tag the old column names into the this new feature dataset
feature_medium.columns = feature_columns
feature_medium

Unnamed: 0,chroma_cens,chroma_cens.1,chroma_cens.2,chroma_cens.3,chroma_cens.4,chroma_cens.5,chroma_cens.6,chroma_cens.7,chroma_cens.8,chroma_cens.9,...,tonnetz.39,tonnetz.40,tonnetz.41,zcr,zcr.1,zcr.2,zcr.3,zcr.4,zcr.5,zcr.6
0,0.120188,0.759824,0.90748,0.687974,-0.25387,-0.0629012,0.454025,-0.446084,-0.367723,-0.359985,...,0.0873343,0.0164366,0.0178857,-0.0897113,0.15332,0.0645135,0.0634766,0,-0.024148,0.0259038
1,-1.00078,-1.05428,0.3269,-0.234172,5.78894,4.25101,-1.22807,-1.31655,-1.20199,-0.687852,...,0.1115,0.0252764,0.0229601,38.6622,0.458008,0.0321172,0.0239258,0,5.48004,0.0358097
2,-0.472263,-0.695443,-0.397396,-0.980368,0.0919193,-0.917704,-1.18374,1.16191,-1.15944,-1.03021,...,0.112962,0.0232879,0.0260025,123.292,0.700684,0.0392488,0.0327148,0,10.0792,0.0428257
3,1.58101,0.0119988,0.999875,-0.0735124,-0.291198,-0.258575,0.399998,0.821183,0.790184,3.04398,...,0.0585838,0.0140475,0.014702,27.9508,0.540527,0.0801584,0.0771484,0,3.44967,0.0350803
4,2.13244,0.858505,1.35707,1.79397,0.129197,0.214749,0.249027,2.18768,0.963136,0.233296,...,0.0767455,0.0142849,0.0208437,-0.470869,0.171387,0.0693071,0.0649414,0,0.565537,0.0350647
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16786,-0.00766765,0.104604,-0.511487,0.857308,0.266202,-0.355186,-0.27364,-0.324727,-0.106368,-1.14616,...,0.141844,0.0240803,0.0310199,70.6048,0.850586,0.0340998,0.0209961,0.00195312,8.38092,0.0888278
16787,-0.956019,-0.96121,-0.447053,-0.689531,-0.633071,-0.430116,-0.0311466,-0.758652,-1.09345,0.0836936,...,0.177286,0.028411,0.0256306,45.4458,0.303711,0.0240583,0.0195312,0,5.78619,0.0234283
16788,0.644815,-0.879404,-1.14923,-0.655986,-0.848881,-0.67224,0.193149,0.0691027,-0.179058,-0.713318,...,0.125615,0.0271096,0.0288151,17.0762,0.24707,0.0315423,0.0258789,0.00488281,3.17908,0.0220229
16789,-0.446937,1.235,-0.250855,-0.612224,-1.19739,-1.11823,0.71652,-0.878211,-0.637801,0.188355,...,0.0998937,0.0191405,0.0330451,6.45891,0.344727,0.0691994,0.0634766,0.00195312,1.99608,0.0430976


In [340]:
# Function to change column names to the right format
def change_name(name):
    list_name = []
    
    # Filter out similar feature names
    for i in range(len(feature_columns)):
        if name in feature_columns[i]:
            list_name.append(feature_columns[i])
            
    # Change names
    number_variable = len(list_name) / 7
    new_name = []
    n = 0
    n1 = 0
    n2 = 0
    n3 = 0
    n4 = 0
    n5 = 0
    
    # If these feature has a few stuff
    if len(list_name) < 8:
        for i in range(len(list_name)):
            if i == 0:
                new_name.append('{}_{}'.format(name, 'kurtosis'))
            elif i == 1:
                new_name.append('{}_{}'.format(name, 'max'))
            elif i == 2:
                new_name.append('{}_{}'.format(name, 'mean'))
            elif i == 3:
                new_name.append('{}_{}'.format(name, 'median'))
            elif i == 4:
                new_name.append('{}_{}'.format(name, 'min'))
            elif i == 5:
                new_name.append('{}_{}'.format(name, 'skew'))
            elif i == 6:
                new_name.append('{}_{}'.format(name, 'std'))
    
    # If these feature has many stuff
    else:
        for j in range(len(list_name)):
            if len(new_name) < number_variable:
                new_name.append('{}_{}_{}'.format(name, j, 'kurtosis'))
            elif len(new_name) < number_variable*2:
                new_name.append('{}_{}_{}'.format(name, j - (j-n), 'max'))
                n += 1
            elif len(new_name) < number_variable*3:
                new_name.append('{}_{}_{}'.format(name, j - (j-n1), 'mean'))
                n1 += 1
            elif len(new_name) < number_variable*4:
                new_name.append('{}_{}_{}'.format(name, j - (j-n2), 'median'))
                n2 += 1
            elif len(new_name) < number_variable*5:
                new_name.append('{}_{}_{}'.format(name, j - (j-n3), 'min'))
                n3 += 1
            elif len(new_name) < number_variable*6:
                new_name.append('{}_{}_{}'.format(name, j - (j-n4), 'skew'))
                n4 += 1
            elif len(new_name) < number_variable*7:
                new_name.append('{}_{}_{}'.format(name, j - (j-n5), 'std'))
                n5 += 1
    
    return new_name
    

In [341]:
# Apply function to get new names for columns
chroma_cens = change_name('chroma_cens')
chroma_cqt = change_name('chroma_cqt')
chroma_stft = change_name('chroma_stft')
mfcc = change_name('mfcc')
rmse = change_name('rmse')
spectral_bandwidth = change_name('spectral_bandwidth')
spectral_centroid = change_name('spectral_centroid')
spectral_contrast = change_name('spectral_contrast')
spectral_rolloff = change_name('spectral_rolloff')
tonnetz = change_name('tonnetz')
zcr = change_name('zcr')


In [342]:
# Combine all new column names for feature dataset
completed_column_name = chroma_cens + chroma_cqt + chroma_stft + mfcc + rmse + spectral_bandwidth + spectral_centroid + spectral_contrast + spectral_rolloff + tonnetz + zcr

In [344]:
# Change recent column name into a new column name and combine into origin feature dataset
feature_medium.columns = completed_column_name
completed_df = pd.concat([merge_df.iloc[:,:3],feature_medium], axis=1)
completed_df

Unnamed: 0,track_id,song,genres,chroma_cens_0_kurtosis,chroma_cens_1_kurtosis,chroma_cens_2_kurtosis,chroma_cens_3_kurtosis,chroma_cens_4_kurtosis,chroma_cens_5_kurtosis,chroma_cens_6_kurtosis,...,tonnetz_3_std,tonnetz_4_std,tonnetz_5_std,zcr_kurtosis,zcr_max,zcr_mean,zcr_median,zcr_min,zcr_skew,zcr_std
0,1298,Weekend Warrior,Rock,0.120188,0.759824,0.90748,0.687974,-0.25387,-0.0629012,0.454025,...,0.0873343,0.0164366,0.0178857,-0.0897113,0.15332,0.0645135,0.0634766,0,-0.024148,0.0259038
1,1311,Feeling so good?,Singer-Songwriter,-1.00078,-1.05428,0.3269,-0.234172,5.78894,4.25101,-1.22807,...,0.1115,0.0252764,0.0229601,38.6622,0.458008,0.0321172,0.0239258,0,5.48004,0.0358097
2,1339,Emotional Accident,Electronic,-0.472263,-0.695443,-0.397396,-0.980368,0.0919193,-0.917704,-1.18374,...,0.112962,0.0232879,0.0260025,123.292,0.700684,0.0392488,0.0327148,0,10.0792,0.0428257
3,1354,Fools Want Noise,Rock,1.58101,0.0119988,0.999875,-0.0735124,-0.291198,-0.258575,0.399998,...,0.0585838,0.0140475,0.014702,27.9508,0.540527,0.0801584,0.0771484,0,3.44967,0.0350803
4,1356,Elevator Shaft,Rock,2.13244,0.858505,1.35707,1.79397,0.129197,0.214749,0.249027,...,0.0767455,0.0142849,0.0208437,-0.470869,0.171387,0.0693071,0.0649414,0,0.565537,0.0350647
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16786,155297,Nebula Reborn,Soundtrack,-0.00766765,0.104604,-0.511487,0.857308,0.266202,-0.355186,-0.27364,...,0.141844,0.0240803,0.0310199,70.6048,0.850586,0.0340998,0.0209961,0.00195312,8.38092,0.0888278
16787,155298,An Idiot Abroad,Folk,-0.956019,-0.96121,-0.447053,-0.689531,-0.633071,-0.430116,-0.0311466,...,0.177286,0.028411,0.0256306,45.4458,0.303711,0.0240583,0.0195312,0,5.78619,0.0234283
16788,155306,Tiny Man,Folk,0.644815,-0.879404,-1.14923,-0.655986,-0.848881,-0.67224,0.193149,...,0.125615,0.0271096,0.0288151,17.0762,0.24707,0.0315423,0.0258789,0.00488281,3.17908,0.0220229
16789,155307,Kolka,Avant-Garde,-0.446937,1.235,-0.250855,-0.612224,-1.19739,-1.11823,0.71652,...,0.0998937,0.0191405,0.0330451,6.45891,0.344727,0.0691994,0.0634766,0.00195312,1.99608,0.0430976


In [345]:
# Save FMA dataframe into a csv file
completed_df.to_csv('FMA_dataset.csv', index=False)