In [1]:
import numpy as np
import pandas as pd 
import librosa
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [5]:
sound_file_paths = ['fma_small/001/001482.mp3', 'fma_small/001/001443.mp3', 'fma_small/001/001197.mp3', 'fma_small/000/000002.mp3', 'fma_small/011/011638.mp3', 'fma_small/011/011672.mp3', 'fma_small/010/010809.mp3', 'fma_small/010/010677.mp3']
                     
sound_names =['Electronic', 'Experimental', 'Folk', 'Hip-Hop', 'Instrumental', 'International', 'Pop', 'Rock']

def load_sound_files(file_paths):
    raw_sounds = []
    for fp in file_paths:
        X,sr = librosa.load(fp, duration=30)
        X = librosa.util.fix_length(X, 661500)
        raw_sounds.append(X)
    return raw_sounds

In [6]:
raw_sounds = load_sound_files(sound_file_paths)
#raw_sounds = pad_sequences(np.asarray(raw_sounds), maxlen=661500, dtype='float32', padding='pre', truncating='post')

MEL NORMALIZATION

In [8]:
def ret_mel(raw_sounds):
    mel = []
    for i in raw_sounds:
        mel.append(librosa.feature.melspectrogram(y=i, sr=22050))
    return mel


In [10]:
mel = ret_mel(raw_sounds)
mel = np.asarray(mel)


In [11]:
mel.shape
print(np.max(mel), np.min(mel))

6302.492540431119 2.8640542053326805e-10


In [52]:
mel_mean = np.mean(mel)
mel_std = np.std(mel)

In [53]:
mel_norm = (mel - mel_mean)/mel_std

In [54]:
print(np.max(mel_norm), np.min(mel_norm))

70.583889831613 -0.12268644952447269


In [55]:
np.var(mel_norm)

1.0000000000000009

In [80]:
csv = pd.read_csv('valid_set.csv')
files = csv['path'].values

In [75]:
from tqdm import tqdm

In [84]:
def load_pad_mel_find_mean_and_std(file_paths):
    mel_global = []
    for fp in tqdm(file_paths):
        x,_=librosa.load(fp)
        x = librosa.util.fix_length(x, 661500)
        mel_global.append(librosa.feature.melspectrogram(y=x, sr=22050))
        
    return mel_global
        
mel_global = load_pad_mel_find_mean_and_std(files)
        
        
        
    

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 400/400 [07:30<00:00,  1.13s/it]


In [85]:
mel_mean = np.mean(mel_global)
mel_std = np.std(mel_global)

In [86]:
mel_norm = (mel_global - mel_mean)/mel_std


In [92]:
print(np.mean(mel_norm))

-1.5645895312047213e-16


In [89]:
import json

meta = {'mel_mean':mel_mean, 'mel_std':mel_std}

with open('meta.json', 'w') as out:
    json.dump(meta, out)

In [63]:
x = librosa.util.fix_length(x, 1)
len(x)

1

CHROMA NORMALIZATION

In [11]:
def ret_chroma(raw_sounds):
    chroma = []
    for i in raw_sounds:
        chroma.append(librosa.feature.chroma_stft(y=i, sr=22050))
    return chroma

In [12]:
chroma = ret_chroma(raw_sounds)

In [16]:
chroma=np.asarray(chroma)

In [27]:
def load_pad_chroma_find_mean_and_std(file_paths):
    chroma_global = []
    for fp in tqdm(file_paths):
        x,_=librosa.load(fp)
        x = librosa.util.fix_length(x, 661500)
        chroma_global.append(librosa.feature.chroma_stft(y=x, sr=22050))
        
    return chroma_global

In [28]:
from tqdm import tqdm
chroma_global = load_pad_chroma_find_mean_and_std(pd.read_csv('valid_set.csv')['path'].values)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400/400 [08:12<00:00,  1.23s/it]


In [30]:
print(np.min(chroma_global))

0.0


MFCC

In [31]:
def ret_mfcc(raw_sounds):
    mfcc = []
    for i in raw_sounds:
        mfcc.append(librosa.feature.mfcc(y=i, sr=22050))
    return mfcc

mfcc = ret_mfcc(raw_sounds)
mfcc = np.asarray(mfcc)

In [34]:
def load_pad_mfcc_find_mean_and_std(file_paths):
    mfcc_global = []
    for fp in tqdm(file_paths):
        x,_=librosa.load(fp)
        x = librosa.util.fix_length(x, 661500)
        mfcc_global.append(librosa.feature.mfcc(y=x, sr=22050))
        
    return mfcc_global

In [35]:
mfcc_global = load_pad_mfcc_find_mean_and_std(pd.read_csv('valid_set.csv')['path'].values)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400/400 [07:15<00:00,  1.09s/it]


In [37]:
print(np.mean(mfcc_global), np.std(mfcc_global))

-0.2543133366799549 50.13435542236795


Spectral Contrast

In [38]:
def ret_spec(raw_sounds):
    spec = []
    for i in raw_sounds:
        spec.append(librosa.feature.spectral_contrast(y=i, sr=22050))
    return spec

spec = ret_spec(raw_sounds)
spec = np.asarray(spec)

In [41]:
def load_pad_spec_find_mean_and_std(file_paths):
    spec_global = []
    for fp in tqdm(file_paths):
        x,_=librosa.load(fp)
        x = librosa.util.fix_length(x, 661500)
        spec_global.append(librosa.feature.spectral_contrast(y=x, sr=22050))
        
    return mfcc_global

In [42]:
spec_global = load_pad_spec_find_mean_and_std(pd.read_csv('valid_set.csv')['path'].values)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400/400 [07:28<00:00,  1.12s/it]


In [44]:
np.min(spec_global)

-595.0595575829223

In [45]:
np.max(spec_global)

315.91045594952527

Tonnetz

In [46]:
def ret_tonn(raw_sounds):
    tonn = []
    for i in raw_sounds:
        tonn.append(librosa.feature.tonnetz(y=i, sr=22050))
    return tonn

tonn = ret_tonn(raw_sounds)
tonn = np.asarray(tonn)

In [47]:
def load_pad_tonn_find_mean_and_std(file_paths):
    tonn_global = []
    for fp in tqdm(file_paths):
        x,_=librosa.load(fp)
        x = librosa.util.fix_length(x, 661500)
        tonn_global.append(librosa.feature.tonnetz(y=x, sr=22050))
        
    return tonn_global

tonn_global = load_pad_tonn_find_mean_and_std(pd.read_csv('valid_set.csv')['path'].values)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400/400 [09:49<00:00,  1.47s/it]


In [54]:
np.max(tonn_global) + np.min(tonn_global)*-1

1.0052154677337766

In [52]:
 np.min(tonn_global)

-0.44475080330173306

In [2]:
import json 

In [3]:
data = open('metadata/mel_data.json').read()
data = json.loads(data)

In [4]:
print(data)

{'mel_max': 18517.816701982778, 'mel_min': 0.0, 'mel_mean': 9.98822179823947, 'mel_std': 93.36492545254515}


In [12]:
mel_norm = (mel - data['mel_min'])/(data['mel_max'] - data['mel_min'])

In [18]:
metadata = {}
for feature in ['mel', 'chroma', 'mfcc', 'spec', 'tonn']:
    metadata[feature] = json.loads(open('metadata/'+feature+'_data.json').read())

In [19]:
print(metadata)

{'mel': {'mel_max': 18517.816701982778, 'mel_min': 0.0, 'mel_mean': 9.98822179823947, 'mel_std': 93.36492545254515}, 'chroma': {'chroma_max': 1.0, 'chroma_min': 0.0, 'chroma_mean': 0.377188293279868, 'chroma_std': 0.30716744338370644}, 'mfcc': {'mfcc_max': 303.49173728261627, 'mfcc_min': -1131.370849898476, 'mfcc_mean': -0.4252381814461332, 'mfcc_std': 51.306895577820946}, 'spec': {'spec_max': 95.36324960456079, 'spec_min': 0.0, 'spec_mean': 21.54160757401616, 'spec_std': 7.535548742967263}, 'tonn': {'tonn_max': 0.6656872958911089, 'tonn_min': -0.4680074320900417, 'tonn_mean': 0.0037642607551096213, 'tonn_std': 0.07690120648976552}}


In [20]:
with open('metadata.json','w') as f:
    json.dump(metadata, f)

In [21]:
metadata['mel']['mel_mean']

9.98822179823947