In [None]:
import pandas as pd
import numpy as np
import sox
import librosa
import os

# Getting labels 
test_labels = pd.read_csv("deezer_mood_detection_dataset-master/test.csv")
train_labels = pd.read_csv("deezer_mood_detection_dataset-master/train.csv")
validate_labels = pd.read_csv("deezer_mood_detection_dataset-master/validation.csv")

labels = [test_labels, train_labels, validate_labels]
labels_df = pd.concat(labels)

labels_df.head()

In [None]:
# local_dataset = labels_df.loc[labels_df['dzr_sng_id'] == 532171]
local_dataset = pd.DataFrame()
labeled_ids = labels_df['dzr_sng_id'].tolist()

for filename in os.listdir("deezer_mp3"):
    if filename.endswith(".mp3"):
        dzr_id = int( filename[:-4])
        if dzr_id in labeled_ids:
            local_dataset = pd.concat([local_dataset , 
                                       labels_df.loc[labels_df['dzr_sng_id'] == dzr_id]], 
                                       ignore_index=True)

local_dataset.head()

In [None]:
local_dataset.to_csv('local_mp3_labels.csv')

In [None]:
def melspectrogram( data, n_fft = 1024, hop_length = 1024, win_length = 1024, sr = 44100, n_mels = 40 ):
    S = librosa.stft(data, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
    mel_basis = librosa.filters.mel(sr, n_fft=n_fft, n_mels=n_mels)
    mel_S = mel_basis @ np.abs(S)
    mel_S = 10 * np.log10( mel_S + np.finfo(np.float32).eps) 
    
    return mel_S

In [None]:
converter = sox.Transformer()

labels = []
melspectrograms = []

for filename in os.listdir("deezer_mp3"):
    if filename.endswith(".mp3"):
        dzr_id = int( filename[:-4])
        valence = np.float64( local_dataset.loc[local_dataset['dzr_sng_id'] == dzr_id]['valence'] )
        arousal = np.float64( local_dataset.loc[local_dataset['dzr_sng_id'] == dzr_id]['arousal'] )
        audio_data = converter.build_array( input_filepath = 'deezer_mp3/' + filename )
        audio_data = np.mean( audio_data, axis = 1 )
        mel_spec = melspectrogram( data = audio_data )
        if mel_spec.shape[1] < 1292:
            to_pad = 1292 - mel_spec.shape[1]
            mel_spec = np.append( mel_spec , np.zeros( (40, to_pad ) ), axis = 1 )
        else:
            mel_spec = mel_spec[: , :1292]
            
        labels.append( (valence, arousal) )
        melspectrograms.append( mel_spec )


In [None]:
import os
filenames = []

for filename in os.listdir("deezer_mp3"):
    if filename.endswith(".mp3"):
        filenames.append( filename )
        
import pickle
with open('mp3_names.pkl', 'wb') as fp:
    pickle.dump(filenames, fp)
    
with open ('mp3_names.pkl', 'rb') as fp:
    itemlist = pickle.load(fp)
    
len( itemlist )

In [None]:
len(melspectrograms)

In [None]:
np.array(labels).dtype

In [None]:
np.array(melspectrograms).dtype

In [None]:
import h5py

hf = h5py.File('MER_Dataset.h5', 'w')

hf.create_dataset('X', data = np.array(melspectrograms) )
hf.create_dataset('Y', data = np.array(labels) )

hf.close()

In [None]:
h5f = h5py.File('MER_Dataset.h5','r')

In [None]:
np.array( h5f['X'] ).shape

In [None]:
np.array( h5f['Y'] ).shape

In [4]:
import pandas as pd
import numpy as np
import sox
import librosa
import os

In [5]:
def melspectrogram( data, n_fft = 1024, hop_length = 1024, win_length = 1024, sr = 44100, n_mels = 40 ):
    S = librosa.stft(data, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
    mel_basis = librosa.filters.mel(sr, n_fft=n_fft, n_mels=n_mels)
    mel_S = mel_basis @ np.abs(S)
    mel_S = 10 * np.log10( mel_S + np.finfo(np.float32).eps) 
    
    return mel_S

In [6]:
local_dataset = pd.read_csv("45s_Labels/static_annotations.csv")

In [7]:
converter = sox.Transformer()

labels = []
melspectrograms = []

for filename in os.listdir("45s_mp3"):
    if filename.endswith(".mp3"):
        sid = int( filename[:-4])
        try: 
            valence = local_dataset.loc[local_dataset['song_id'] == sid]['mean_valence'].values[0]
            arousal = local_dataset.loc[local_dataset['song_id'] == sid]['mean_arousal'].values[0]
            labels.append( (valence, arousal) )
            
            audio_data = np.float32( converter.build_array( input_filepath = '45s_mp3/' + filename ) )
            mel_spec = melspectrogram( data = audio_data )
            if mel_spec.shape[1] < 1292:
                to_pad = 1292 - mel_spec.shape[1]
                mel_spec = np.append( mel_spec , np.zeros( (40, to_pad ) ), axis = 1 )
            else:
                mel_spec = mel_spec[: , :1292]
            
            mel_spec = mel_spec.T
            melspectrograms.append( mel_spec )
            
        except:
            pass


In [8]:
len(melspectrograms)

744

In [9]:
len(labels)

744

In [10]:
import h5py

hf = h5py.File('MER_Dataset_45s.h5', 'w')

hf.create_dataset('X', data = np.array(melspectrograms) )
hf.create_dataset('Y', data = np.array(labels) )

hf.close()