In [72]:
import pandas as pd
import numpy as np
import librosa
import librosa.display
from python_speech_features import sigproc
from scipy.fftpack import dct
import os
from pymongo import MongoClient

In [73]:
# function to read audio files
def read_fma_audio(path):
    audio_files= []
    for file in os.listdir(path):
        if file.endswith(".mp3") or file.endswith(".wav"):
            file_path = os.path.join(path, file)
            try:  
                # read audio file with no sampling rate
                data, sampling_rate= librosa.load(file_path, sr=None)
                duration= librosa.get_duration(y=data, sr=sampling_rate)
                # append information to the list
                audio_files.append({
                    'file_path': file_path,
                    'duration': duration,
                    'format': file.split('.')[-1]
                })
            except Exception as e:
                print(f"Failed to process {file_path}: {e}")
    return audio_files

In [74]:
dataset_path= 'sampled_audio_1gb'
audios= read_fma_audio(dataset_path)

In [75]:
audio_df= pd.DataFrame(audios)
audio_df.head()

Unnamed: 0,file_path,duration,format
0,sampled_audio_1gb/001122.mp3,30.002698,mp3
1,sampled_audio_1gb/001265.mp3,30.002698,mp3
2,sampled_audio_1gb/000498.mp3,29.976576,mp3
3,sampled_audio_1gb/000909.mp3,29.976576,mp3
4,sampled_audio_1gb/000906.mp3,29.976576,mp3


## MetaData ##

In [76]:
# file paths
tracks_file= 'fma_metadata/tracks.csv'
genres_file= 'fma_metadata/genres.csv'
features_file= 'fma_metadata/features.csv'
echonest_file= 'fma_metadata/echonest.csv'
# load csv files
tracks_df= pd.read_csv(tracks_file, index_col= 0, header= [0, 1])
genres_df= pd.read_csv(genres_file)
features_df= pd.read_csv(features_file, index_col= 0, header= [0, 1, 2])
echonest_df= pd.read_csv(echonest_file, index_col= 0, header= [0, 1, 2])


In [77]:
tracks_df.head()

Unnamed: 0_level_0,album,album,album,album,album,album,album,album,album,album,...,track,track,track,track,track,track,track,track,track,track
Unnamed: 0_level_1,comments,date_created,date_released,engineer,favorites,id,information,listens,producer,tags,...,information,interest,language_code,license,listens,lyricist,number,publisher,tags,title
track_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,0,2008-11-26 01:44:45,2009-01-05 00:00:00,,4,1,<p></p>,6073,,[],...,,4656,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1293,,3,,[],Food
3,0,2008-11-26 01:44:45,2009-01-05 00:00:00,,4,1,<p></p>,6073,,[],...,,1470,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,514,,4,,[],Electric Ave
5,0,2008-11-26 01:44:45,2009-01-05 00:00:00,,4,1,<p></p>,6073,,[],...,,1933,en,Attribution-NonCommercial-ShareAlike 3.0 Inter...,1151,,6,,[],This World
10,0,2008-11-26 01:45:08,2008-02-06 00:00:00,,4,6,,47632,,[],...,,54881,en,Attribution-NonCommercial-NoDerivatives (aka M...,50135,,1,,[],Freeway
20,0,2008-11-26 01:45:05,2009-01-06 00:00:00,,2,4,"<p> ""spiritual songs"" from Nicky Cook</p>",2710,,[],...,,978,en,Attribution-NonCommercial-NoDerivatives (aka M...,361,,3,,[],Spiritual Level


In [78]:
genres_df.head()

Unnamed: 0,genre_id,#tracks,parent,title,top_level
0,1,8693,38,Avant-Garde,38
1,2,5271,0,International,2
2,3,1752,0,Blues,3
3,4,4126,0,Jazz,4
4,5,4106,0,Classical,5


In [79]:
features_df.head()  

feature,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,...,tonnetz,tonnetz,tonnetz,zcr,zcr,zcr,zcr,zcr,zcr,zcr
statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,kurtosis,max,mean,median,min,skew,std
number,01,02,03,04,05,06,07,08,09,10,...,04,05,06,01,01,01,01,01,01,01
track_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2,7.180653,5.230309,0.249321,1.34762,1.482478,0.531371,1.481593,2.691455,0.866868,1.341231,...,0.054125,0.012226,0.012111,5.75889,0.459473,0.085629,0.071289,0.0,2.089872,0.061448
3,1.888963,0.760539,0.345297,2.295201,1.654031,0.067592,1.366848,1.054094,0.108103,0.619185,...,0.063831,0.014212,0.01774,2.824694,0.466309,0.084578,0.063965,0.0,1.716724,0.06933
5,0.527563,-0.077654,-0.27961,0.685883,1.93757,0.880839,-0.923192,-0.927232,0.666617,1.038546,...,0.04073,0.012691,0.014759,6.808415,0.375,0.053114,0.041504,0.0,2.193303,0.044861
10,3.702245,-0.291193,2.196742,-0.234449,1.367364,0.998411,1.770694,1.604566,0.521217,1.982386,...,0.074358,0.017952,0.013921,21.434212,0.452148,0.077515,0.071777,0.0,3.542325,0.0408
20,-0.193837,-0.198527,0.201546,0.258556,0.775204,0.084794,-0.289294,-0.81641,0.043851,-0.804761,...,0.095003,0.022492,0.021355,16.669037,0.469727,0.047225,0.040039,0.000977,3.189831,0.030993


In [80]:
echonest_df.head()

Unnamed: 0_level_0,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest,echonest
Unnamed: 0_level_1,audio_features,audio_features,audio_features,audio_features,audio_features,audio_features,audio_features,audio_features,metadata,metadata,...,temporal_features,temporal_features,temporal_features,temporal_features,temporal_features,temporal_features,temporal_features,temporal_features,temporal_features,temporal_features
Unnamed: 0_level_2,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence,album_date,album_name,...,214,215,216,217,218,219,220,221,222,223
track_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2,0.416675,0.675894,0.634476,0.010628,0.177647,0.15931,165.922,0.576661,,,...,-1.992303,6.805694,0.23307,0.19288,0.027455,0.06408,3.67696,3.61288,13.31669,262.929749
3,0.374408,0.528643,0.817461,0.001851,0.10588,0.461818,126.957,0.26924,,,...,-1.582331,8.889308,0.258464,0.220905,0.081368,0.06413,6.08277,6.01864,16.673548,325.581085
5,0.043567,0.745566,0.70147,0.000697,0.373143,0.124595,100.26,0.621661,,,...,-2.288358,11.527109,0.256821,0.23782,0.060122,0.06014,5.92649,5.86635,16.013849,356.755737
10,0.95167,0.658179,0.924525,0.965427,0.115474,0.032985,111.562,0.96359,2008-03-11,Constant Hitmaker,...,-3.662988,21.508228,0.283352,0.26707,0.125704,0.08082,8.41401,8.33319,21.317064,483.403809
134,0.452217,0.513238,0.56041,0.019443,0.096567,0.525519,114.29,0.894072,,,...,-1.452696,2.356398,0.234686,0.19955,0.149332,0.0644,11.26707,11.20267,26.45418,751.147705


In [81]:
# extracting features from 'track' category
tracks_information= tracks_df[[
    ('track', 'title'),
    ('track', 'genre_top'),
    ('track', 'listens'), 
    ('track', 'favorites'),
    ('album', 'title'),
    ('album', 'listens'),
    ('artist', 'name')
]]
tracks_information.reset_index(inplace= True)
tracks_information.head()

Unnamed: 0_level_0,track_id,track,track,track,track,album,album,artist
Unnamed: 0_level_1,Unnamed: 1_level_1,title,genre_top,listens,favorites,title,listens,name
0,2,Food,Hip-Hop,1293,2,AWOL - A Way Of Life,6073,AWOL
1,3,Electric Ave,Hip-Hop,514,1,AWOL - A Way Of Life,6073,AWOL
2,5,This World,Hip-Hop,1151,6,AWOL - A Way Of Life,6073,AWOL
3,10,Freeway,Pop,50135,178,Constant Hitmaker,47632,Kurt Vile
4,20,Spiritual Level,,361,0,Niris,2710,Nicky Cook


In [82]:
tracks_information['trackID']= tracks_information['track_id'].apply(lambda x: f'{x:06d}')
audio_df['extracted_track_id']= audio_df['file_path'].apply(lambda x: os.path.basename(x).split('.')[0])
tracks_information.columns= ['_'.join(col).strip() if col[1] else col[0] for col in tracks_information.columns.values]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tracks_information['trackID']= tracks_information['track_id'].apply(lambda x: f'{x:06d}')


In [83]:
tracks_information.head()

Unnamed: 0,track_id,track_title,track_genre_top,track_listens,track_favorites,album_title,album_listens,artist_name,trackID
0,2,Food,Hip-Hop,1293,2,AWOL - A Way Of Life,6073,AWOL,2
1,3,Electric Ave,Hip-Hop,514,1,AWOL - A Way Of Life,6073,AWOL,3
2,5,This World,Hip-Hop,1151,6,AWOL - A Way Of Life,6073,AWOL,5
3,10,Freeway,Pop,50135,178,Constant Hitmaker,47632,Kurt Vile,10
4,20,Spiritual Level,,361,0,Niris,2710,Nicky Cook,20


In [84]:
tracks_information.columns

Index(['track_id', 'track_title', 'track_genre_top', 'track_listens',
       'track_favorites', 'album_title', 'album_listens', 'artist_name',
       'trackID'],
      dtype='object')

In [85]:
# merge dataframes based on IDs
music_features= pd.merge(tracks_information, audio_df, left_on= 'trackID', right_on= 'extracted_track_id', how= 'left')

In [86]:
music_features.head()

Unnamed: 0,track_id,track_title,track_genre_top,track_listens,track_favorites,album_title,album_listens,artist_name,trackID,file_path,duration,format,extracted_track_id
0,2,Food,Hip-Hop,1293,2,AWOL - A Way Of Life,6073,AWOL,2,sampled_audio_1gb/000002.mp3,29.976576,mp3,2
1,3,Electric Ave,Hip-Hop,514,1,AWOL - A Way Of Life,6073,AWOL,3,sampled_audio_1gb/000003.mp3,30.002698,mp3,3
2,5,This World,Hip-Hop,1151,6,AWOL - A Way Of Life,6073,AWOL,5,sampled_audio_1gb/000005.mp3,30.002698,mp3,5
3,10,Freeway,Pop,50135,178,Constant Hitmaker,47632,Kurt Vile,10,sampled_audio_1gb/000010.mp3,29.976576,mp3,10
4,20,Spiritual Level,,361,0,Niris,2710,Nicky Cook,20,sampled_audio_1gb/000020.mp3,29.976576,mp3,20


In [87]:
music_features_filtered= music_features.dropna(subset= ['file_path'])
music_features_filtered.drop(columns= ['format', 'duration', 'extracted_track_id'], inplace= True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  music_features_filtered.drop(columns= ['format', 'duration', 'extracted_track_id'], inplace= True)


In [88]:
music_features_filtered.head()

Unnamed: 0,track_id,track_title,track_genre_top,track_listens,track_favorites,album_title,album_listens,artist_name,trackID,file_path
0,2,Food,Hip-Hop,1293,2,AWOL - A Way Of Life,6073,AWOL,2,sampled_audio_1gb/000002.mp3
1,3,Electric Ave,Hip-Hop,514,1,AWOL - A Way Of Life,6073,AWOL,3,sampled_audio_1gb/000003.mp3
2,5,This World,Hip-Hop,1151,6,AWOL - A Way Of Life,6073,AWOL,5,sampled_audio_1gb/000005.mp3
3,10,Freeway,Pop,50135,178,Constant Hitmaker,47632,Kurt Vile,10,sampled_audio_1gb/000010.mp3
4,20,Spiritual Level,,361,0,Niris,2710,Nicky Cook,20,sampled_audio_1gb/000020.mp3


## Feature Extraction ##

In [89]:
def extract_features(file_path):
    try:
        y, sr= librosa.load(file_path, sr =None)
        mfccs= librosa.feature.mfcc(y= y, sr= sr, n_mfcc= 13)
        mfccs_mean= np.mean(mfccs, axis= 1)
        mfccs_std= np.std(mfccs, axis= 1)
        return mfccs_mean, mfccs_std
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None, None

mfcc_data= music_features_filtered['file_path'].apply(extract_features)

In [90]:
music_features_filtered['mfcc_features']= mfcc_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  music_features_filtered['mfcc_features']= mfcc_data


In [91]:
def calculate_nfft(sample_rate, window_len):
    # sample rate of the signal
    # window length in seconds
    window_len_samples= window_len * sample_rate
    nfft= 1
    while nfft < window_len_samples:
        # calculates nfft as a power of 2
        nfft *= 2
    return nfft
# this function converts Hertz to Mels
def hz2mel(hz):
    return 2595 * np.log10(1+hz/700.)
# this function converts Mels to Hertz
def mel2hz(mel):
    return 700*(10**(mel/2595.0)-1)
# this function computes the mel filterbanks
def get_filterbanks(nfilter= 20, nfft= 512, sample_rate= 16000,low_frequency= 0, high_frequency= None):
    # if high frequency is not provided, it is set to half the sample rate
    high_frequency= high_frequency or sample_rate / 2
    assert high_frequency <= sample_rate / 2, "high_freqeuncy is greater than samplerate/2"
    # compute points in mels
    low_mel= hz2mel(low_frequency)
    high_mel= hz2mel(high_frequency)
    mel_points= np.linspace(low_mel, high_mel, nfilter + 2)
    bin_num= np.floor((nfft + 1) * mel2hz(mel_points) / sample_rate)
    # filterbank matrix to store the filterbanks
    fbank= np.zeros([nfilter, nfft//2+1])
    for j in range(0, nfilter):
        for i in range(int(bin_num[j]), int(bin_num[j+1])):
            fbank[j,i]= (i - bin_num[j]) / (bin_num[j+1]-bin_num[j])
        for i in range(int(bin_num[j+1]), int(bin_num[j+2])):
            fbank[j,i]= (bin_num[j+2]-i) / (bin_num[j+2]-bin_num[j+1])
    return fbank
# computation of mel-filterbank energy features from an audio signal
def fbank(signal, sample_rate= 16000, window_len= 0.025, hop_step= 0.01, nfilter= 26, nfft= 512, low_frequency= 0, high_frequency= None, pre_emphasis= 0.97, window_func= lambda x: np.ones((x,))):
    high_frequency= high_frequency or sample_rate / 2
    signal= sigproc.preemphasis(signal, pre_emphasis)
    frames= sigproc.framesig(signal, window_len * sample_rate, hop_step * sample_rate, window_func)
    pspec= sigproc.powspec(frames, nfft)
    energy= np.sum(pspec, 1)  # total energy in each frame
    energy= np.where(energy == 0, np.finfo(float).eps, energy)
    fb= get_filterbanks(nfilter, nfft, sample_rate, low_frequency, high_frequency)
    feat= np.dot(pspec, fb.T)
    feat= np.where(feat == 0, np.finfo(float).eps, feat)
    feat= feat[:, :13]  
    return feat, energy

def logfbank(signal, sample_rate= 16000, window_len= 0.025, hop_step= 0.01, nfilter= 26, nfft= 512, low_frequency= 0, high_frequency= None, pre_emphasis =0.97, window_func= lambda x:np.ones((x,))):
    feat,energy= fbank(signal, sample_rate, window_len, hop_step, nfilter, nfft, low_frequency, high_frequency, pre_emphasis, window_func)
    return np.log(feat)


In [92]:
def mfcc(signal, sample_rate= 16000, window_len= 0.025, hop_step= 0.01, num_ceptra= 13, nfilter= 26, nfft= None, low_frequency= 0, high_frequency= None, preemphasis= 0.97, ceplifter=22, appendEnergy= True, window_func= lambda x: np.ones((x,))):
    """"  
    signal; audio signal from which to compute features. 
    sample_rate; sample rate in HZ of the signal.
    window_len; length of the analysis window in seconds.
    hop_step; steps between windows in seconds.
    num_ceptra; the number of cepstrum to return, anything above 26 is ignored by default
    nfilter; the number of filters in the mel filterbank.
    nfft; FFT size. we will use nfft function to cater for drops in sample data.
    low_frequency; lowest edge of mel filters(0).
    high_frequency; highest edge of mel filter(sr/2).
    preemphasis; pre-emphasis filter
    ceplifter; apply a lifting coefficient to final cepstral coefficients.
    appendEnergy; if set true, ceptral coeff is replaced by log of each frame.
    window_func; the analysis window to apply to each frame.
    
    """
    if nfft is None:
        nfft = calculate_nfft(sample_rate, window_len)
        feat,energy = fbank(signal,sample_rate,window_len,hop_step,nfilter,nfft,low_frequency,high_frequency,preemphasis,window_func)
        feat = np.log(feat)
        feat = dct(feat, type=2, axis=1, norm='ortho')[:,:num_ceptra]
        if appendEnergy: feat[:,0] = np.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
        

In [93]:
# music_features_filtered['mfcc_features']= music_features_filtered['file_path'].apply(lambda x: mfcc(*librosa.load(x, sr= None)))

In [94]:
music_features_filtered.dropna(inplace= True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  music_features_filtered.dropna(inplace= True)


In [95]:
music_features_filtered.head()

Unnamed: 0,track_id,track_title,track_genre_top,track_listens,track_favorites,album_title,album_listens,artist_name,trackID,file_path,mfcc_features
0,2,Food,Hip-Hop,1293,2,AWOL - A Way Of Life,6073,AWOL,2,sampled_audio_1gb/000002.mp3,"([-122.713936, 117.76009, -42.334175, 38.061, ..."
1,3,Electric Ave,Hip-Hop,514,1,AWOL - A Way Of Life,6073,AWOL,3,sampled_audio_1gb/000003.mp3,"([-151.68916, 125.360504, -37.11494, 47.89737,..."
2,5,This World,Hip-Hop,1151,6,AWOL - A Way Of Life,6073,AWOL,5,sampled_audio_1gb/000005.mp3,"([-162.23244, 131.43599, -18.426775, 51.63137,..."
3,10,Freeway,Pop,50135,178,Constant Hitmaker,47632,Kurt Vile,10,sampled_audio_1gb/000010.mp3,"([-75.63154, 154.53761, -63.849487, 22.506426,..."
9,134,Street Music,Hip-Hop,943,3,AWOL - A Way Of Life,6073,AWOL,134,sampled_audio_1gb/000134.mp3,"([-140.2245, 130.66966, -31.178131, 46.793636,..."


### Storing in MongoDB ###

In [96]:
# # converting feature vectors to list
# def convert_array(row):
#     for column, value in row.items():
#         if isinstance(value, np.ndarray):
#             row[column]= value.tolist()
#     return row
# music_features_filtered= music_features_filtered.apply(convert_array, axis= 1).to_dict('records')

In [97]:
print(music_features_filtered['mfcc_features'].head())


0    ([-122.713936, 117.76009, -42.334175, 38.061, ...
1    ([-151.68916, 125.360504, -37.11494, 47.89737,...
2    ([-162.23244, 131.43599, -18.426775, 51.63137,...
3    ([-75.63154, 154.53761, -63.849487, 22.506426,...
9    ([-140.2245, 130.66966, -31.178131, 46.793636,...
Name: mfcc_features, dtype: object


In [98]:
music_features_filtered['mfcc_features']= music_features_filtered['mfcc_features'].apply(lambda x: x[0].tolist() if x is not None else None)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  music_features_filtered['mfcc_features']= music_features_filtered['mfcc_features'].apply(lambda x: x[0].tolist() if x is not None else None)


In [102]:
# connecting to MongoDB
client= MongoClient('mongodb://localhost:27017')
mongo_database= client['MusicModel']
collection_name= mongo_database['audio_features']
# inserting data into MongoDB
collection_name.insert_many(music_features_filtered.to_dict('records'))
num_records= collection_name.count_documents({})
print(f"Inserted {num_records} documents into {mongo_database.name} database.")

Inserted 653 documents into MusicModel database.
