In [1]:
import pandas as pd
import numpy as np
import sox
import librosa
import os

# Getting labels 
test_labels = pd.read_csv("deezer_mood_detection_dataset-master/test.csv")
train_labels = pd.read_csv("deezer_mood_detection_dataset-master/train.csv")
validate_labels = pd.read_csv("deezer_mood_detection_dataset-master/validation.csv")

labels = [test_labels, train_labels, validate_labels]
labels_df = pd.concat(labels)

labels_df.head()

Unnamed: 0,dzr_sng_id,MSD_sng_id,MSD_track_id,valence,arousal,artist_name,track_name
0,532171,SOSDIDR12A8C130E5D,TRGVJCD128F42623E3,-0.420759,0.754644,Fiona Apple,Fast As You Can
1,532216,SOPMTTO12AB0183DB9,TRNXMAY128F931FAC6,-0.46533,0.011006,Fiona Apple,I Know
2,532284,SOQCEJU12A8C13220B,TRYMZQJ128F426A0E7,0.373325,-0.923151,Xzibit,Get Your Walk On
3,532989,SOLEFHR12D021B2A57,TRZHHIL128F4264EE0,0.894528,-0.390774,Fiona Apple,Extraordinary Machine
4,533164,SOFDPOX12A8C13780D,TRJEBIT128F933A400,-1.6369,-0.459145,Patty Loveless,How Can I Help You Say Goodbye


In [60]:
# local_dataset = labels_df.loc[labels_df['dzr_sng_id'] == 532171]
local_dataset = pd.DataFrame()
labeled_ids = labels_df['dzr_sng_id'].tolist()

for filename in os.listdir("deezer_mp3"):
    if filename.endswith(".mp3"):
        dzr_id = int( filename[:-4])
        if dzr_id in labeled_ids:
            local_dataset = pd.concat([local_dataset , 
                                       labels_df.loc[labels_df['dzr_sng_id'] == dzr_id]], 
                                       ignore_index=True)

local_dataset.head()

Unnamed: 0,dzr_sng_id,MSD_sng_id,MSD_track_id,valence,arousal,artist_name,track_name
0,2509072,SOONWRV12A8C138371,TRAUFCU128F42882B7,1.110105,-0.31472,Jack Johnson and Friends / Matt Costa,Fall Line
1,61064523,SOVLGTO12A8C1348EE,TRGSBHX128F427327F,-1.700572,1.842444,System of a Down,Prison Song
2,2181967,SOMEQOG12AB0183DEF,TRLLOEU12903CA775E,1.011867,0.616364,Bunny Wailer,This Train
3,784836,SOSTWHQ12A6D4FA381,TROPKDQ128F148B89A,0.373325,-0.923151,All Saints,Pure Shores (Cosmos Remix)
4,77786769,SOGZJZU12AF72A0D45,TROASXG128F932E1CE,0.678952,-2.333604,Cassandra Wilson,I Can't Stand the Rain


In [63]:
local_dataset.to_csv('local_mp3_labels.csv')

In [9]:
def melspectrogram( data, n_fft = 1024, hop_length = 1024, win_length = 1024, sr = 44100, n_mels = 40 ):
    S = librosa.stft(data, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
    mel_basis = librosa.filters.mel(sr, n_fft=n_fft, n_mels=n_mels)
    mel_S = mel_basis @ np.abs(S)
    mel_S = 10 * np.log10( mel_S + np.finfo(np.float32).eps) 
    
    return mel_S

In [88]:
converter = sox.Transformer()

labels = []
melspectrograms = []

for filename in os.listdir("deezer_mp3"):
    if filename.endswith(".mp3"):
        dzr_id = int( filename[:-4])
        valence = np.float64( local_dataset.loc[local_dataset['dzr_sng_id'] == dzr_id]['valence'] )
        arousal = np.float64( local_dataset.loc[local_dataset['dzr_sng_id'] == dzr_id]['arousal'] )
        audio_data = converter.build_array( input_filepath = 'deezer_mp3/' + filename )
        audio_data = np.mean( audio_data, axis = 1 )
        mel_spec = melspectrogram( data = audio_data )
        if mel_spec.shape[1] < 1292:
            to_pad = 1292 - mel_spec.shape[1]
            mel_spec = np.append( mel_spec , np.zeros( (40, to_pad ) ), axis = 1 )
        else:
            mel_spec = mel_spec[: , :1292]
            
        labels.append( (valence, arousal) )
        melspectrograms.append( mel_spec )


In [19]:
import os
filenames = []

for filename in os.listdir("deezer_mp3"):
    if filename.endswith(".mp3"):
        filenames.append( filename )
        
import pickle
with open('mp3_names.pkl', 'wb') as fp:
    pickle.dump(filenames, fp)
    
with open ('mp3_names.pkl', 'rb') as fp:
    itemlist = pickle.load(fp)
    
len( itemlist )

14317

In [91]:
len(melspectrograms)

14317

In [94]:
np.array(labels).dtype

dtype('float64')

In [95]:
np.array(melspectrograms).dtype

dtype('float64')

In [97]:
import h5py

hf = h5py.File('MER_Dataset.h5', 'w')

hf.create_dataset('X', data = np.array(melspectrograms) )
hf.create_dataset('Y', data = np.array(labels) )

hf.close()

In [98]:
h5f = h5py.File('MER_Dataset.h5','r')

In [106]:
np.array( h5f['X'] ).shape

(14317, 40, 1292)

In [None]:
np.array( h5f['Y'] ).shape

In [1]:
import pandas as pd
import numpy as np
import sox
import librosa
import os

In [8]:
va_labels = pd.read_csv("45s_Labels/static_annotations.csv")
va_labels

Unnamed: 0,song_id,mean_arousal,std_arousal,mean_valence,std_valence
0,2,3.1000,0.99443,3.0000,0.66667
1,3,3.5000,1.84090,3.3000,1.70290
2,4,5.7000,1.49440,5.5000,1.71590
3,5,4.4000,2.11870,5.3000,1.94650
4,7,5.8000,1.54920,6.4000,1.77640
...,...,...,...,...,...
739,995,4.7000,1.33750,2.4000,0.96609
740,996,6.0000,1.69970,6.7000,1.76700
741,997,6.4000,2.22110,6.7000,1.76700
742,999,4.1000,2.42440,5.9000,1.66330


In [15]:
converter = sox.Transformer()

labels = []
melspectrograms = []

for filename in os.listdir("45s_mp3"):
    if filename.endswith(".mp3"):
        sid = int( filename[:-4])
        valence = np.float32( va_labels.loc[va_labels['song_id'] == sid]['mean_valence'] )
        arousal = np.float32( va_labels.loc[va_labels['song_id'] == sid]['mean_arousal'] )
        audio_data = np.float32( converter.build_array( input_filepath = '45s_mp3/' + filename ) )
        mel_spec = melspectrogram( data = audio_data )
        if mel_spec.shape[1] < 1292:
            to_pad = 1292 - mel_spec.shape[1]
            mel_spec = np.append( mel_spec , np.zeros( (40, to_pad ) ), axis = 1 )
        else:
            mel_spec = mel_spec[: , :1292]
        mel_spec = mel_spec.T
            
        labels.append( (valence, arousal) )
        melspectrograms.append( mel_spec )


KeyboardInterrupt: 