In [18]:
# Load 5 seconds of a file, starting 15 seconds in
import numpy as np
import librosa
import pandas as pd

import pathlib
path = pathlib.Path("Feature_Extraction.ipynb").parent.resolve()

filedir = str(path.parent) + '/Data/genres_original/blues/blues.00000.wav'

def extract_one_feature(y,sr):
    #________ chroma_stft _______
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_stft_mean = np.mean(chroma_stft)
    chroma_stft_var = np.var(chroma_stft)
    #______rms _____
    rms =  librosa.feature.rms(y=y)
    rms_mean = np.mean(rms)
    rms_var = np.var(rms)
    #______spectral_centroid _____
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_centroid_mean = np.mean(spectral_centroid)
    spectral_centroid_var = np.var(spectral_centroid)
    #______spectral_bandwidth______
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spectral_bandwidth_mean = np.mean(spectral_bandwidth)
    spectral_bandwidth_var = np.var(spectral_bandwidth)
    #_____rolloff_______
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    rolloff_mean = np.mean(rolloff)
    rolloff_var = np.var(rolloff)
    #_____zero_crossing_rate______
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
    zero_crossing_rate_mean = np.mean(zero_crossing_rate)
    zero_crossing_rate_var = np.var(zero_crossing_rate)
    #_____harmony_____
    harmony = librosa.effects.harmonic(y)
    harmony_mean = np.mean(harmony)
    harmony_var = np.var(harmony)
    #_____perceptr____
    C = np.abs(librosa.cqt(y, sr=sr, fmin=librosa.note_to_hz('A1')))
    freqs = librosa.cqt_frequencies(C.shape[0], fmin=librosa.note_to_hz('A1'))
    perceptr = librosa.perceptual_weighting(C**2, freqs, ref=np.max)
    perceptr_mean = np.mean(perceptr)
    perceptr_var = np.var(perceptr)
    #_____tempo______
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)[0]
    #_____mfcc______
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    mdict = {"chroma_stft_mean" : chroma_stft_mean,
            "chroma_stft_var":chroma_stft_var,
            "rms_mean": rms_mean,
            "rms_var":rms_var,
            "spectral_centroid_mean": spectral_centroid_mean,
            "spectral_centroid_var" : spectral_centroid_var,
            "spectral_bandwidth_mean":spectral_bandwidth_mean,
            "spectral_bandwidth_var": spectral_bandwidth_var, 
            "rolloff_mean" : rolloff_mean,
            "rolloff_var": rolloff_var ,
            "zero_crossing_rate_mean" : zero_crossing_rate_mean,
            "zero_crossing_rate_var":zero_crossing_rate_var,
            "harmony_mean":harmony_mean,
            "harmony_var":harmony_var,
            "perceptr_mean":perceptr_mean,
            "perceptr_var":perceptr_var,
            "tempo":tempo,}
    
    for index, a in enumerate(mfcc, start = 1):
        mdict["mfcc"+str(index)+"_mean"] = np.mean(a)
        mdict["mfcc"+str(index)+"_var"] = np.var(a)

    return mdict

def extrackt(filedir, filename, genre):
    offset = 0
    duration=3
    go = True
    i=0
    csv = []
    length = 0
    while(go):
        y, sr = librosa.load(filedir, offset=offset, duration=duration)
        if length != len(y) and length !=0:
            i=0
            break
        length = len(y)
        offset += duration
        mdict_ = extract_one_feature(y,sr)
        mdict = {}
        mdict["name"] = filename
        mdict["name_v"] = filename+"."+str(i)
        mdict["filedir"] = filedir
        mdict["genre"] = genre
        mdict["length"] = length

        mdict.update(mdict_)
        csv.append(mdict)
        i+=1
    return csv
    
df = pd.DataFrame(extrackt(filedir, "blues.0000", "blues"))

In [17]:
df

Unnamed: 0,name,name_v,filedir,genre,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var
0,blues.0000,blues.0000.0,/Users/amineelblidi/Documents/Data-Science-II/...,blues,66150,0.335555,0.090997,0.130189,0.003559,1773.358004,...,-2.881668,39.547073,-3.230046,36.606857,0.696385,37.766132,-5.035945,33.668549,-0.239585,43.818882
1,blues.0000,blues.0000.1,/Users/amineelblidi/Documents/Data-Science-II/...,blues,66150,0.343523,0.086782,0.112119,0.001491,1817.244034,...,4.050664,64.819778,-6.025472,40.548805,0.127131,51.048935,-2.808956,97.221497,5.771882,60.360352
2,blues.0000,blues.0000.2,/Users/amineelblidi/Documents/Data-Science-II/...,blues,66150,0.347746,0.092495,0.130895,0.004552,1790.722357,...,4.730752,68.306786,-1.714475,28.136944,2.329553,47.211426,-1.925621,52.922436,2.466996,33.164001
3,blues.0000,blues.0000.3,/Users/amineelblidi/Documents/Data-Science-II/...,blues,66150,0.363863,0.087207,0.131349,0.002338,1660.545231,...,-1.45431,48.543198,-3.786987,28.419542,1.153315,35.682701,-3.501979,50.61034,3.580637,32.325874
4,blues.0000,blues.0000.4,/Users/amineelblidi/Documents/Data-Science-II/...,blues,66150,0.335481,0.088482,0.14237,0.001734,1634.465077,...,2.053745,30.829544,0.635797,44.645561,1.591108,51.415863,-3.364909,26.421085,0.501505,29.109531
5,blues.0000,blues.0000.5,/Users/amineelblidi/Documents/Data-Science-II/...,blues,66150,0.374287,0.088972,0.131466,0.00351,1996.01765,...,2.091273,30.950649,-3.461423,34.216366,-0.450124,39.934879,-3.643696,32.52187,3.015992,28.928495
6,blues.0000,blues.0000.6,/Users/amineelblidi/Documents/Data-Science-II/...,blues,66150,0.380956,0.088845,0.129517,0.003174,1962.139384,...,4.819421,34.264614,-2.104399,25.30571,1.416341,46.991249,-3.212211,38.579281,3.367521,25.76704
7,blues.0000,blues.0000.7,/Users/amineelblidi/Documents/Data-Science-II/...,blues,66150,0.332675,0.091968,0.139707,0.00257,1703.371234,...,3.151013,38.173569,-3.557689,23.974079,-0.106229,26.752272,-1.958288,22.97687,0.247115,41.848137
8,blues.0000,blues.0000.8,/Users/amineelblidi/Documents/Data-Science-II/...,blues,66150,0.348459,0.094078,0.13227,0.002522,1749.64082,...,-0.367742,43.697987,-4.256653,29.800327,2.083615,34.017033,-2.184457,80.908493,3.702477,97.842499
9,blues.0000,blues.0000.9,/Users/amineelblidi/Documents/Data-Science-II/...,blues,66150,0.359891,0.083232,0.114406,0.001845,1767.490801,...,6.581156,44.138424,-3.329914,48.998123,0.094831,41.979404,-7.586964,96.841919,0.77222,40.325226


In [None]:
import os
datadir='data/genres_original'
df1 = ""
for genredir in os.listdir(datadir):
    if os.path.isdir(datadir +"/"+genredir):
        for sound in os.listdir(datadir +"/"+genredir):
            if(sound[-4:] == ".wav" or sound[-4:] == ".mp3" or sound[-4:] == ".wma" or sound[-4:] == ".aac" or sound[-4:] == ".asf" or sound[-5:] == ".opus" or sound[-4:] == ".ogg" or sound[-3:] == ".au"):
                sounddir = datadir+"/"+ genredir+"/"+sound
                name = sound[:-4]
                genre = genredir
                try:
                    df2 = pd.DataFrame(extrackt(sounddir, name, genre))
                    if(type(df1) == str ):
                        df1 = df2
                        continue
                    df1 = [df1, df2]
                    df1 = pd.concat(df1)
                except Exception as e:
                    print(e)
                    print(sounddir)


df1.to_csv("Data/extraction.csv")