In [1]:
import numpy as np
import scipy.signal as ss
import pywt
import itertools as it

In [2]:
def _partition(signal, window_length=661500, jump=22050):
    """
    Params:
    signal: (one-dimensional array)
    window_length: (int) size of the window/frame
    jump: (int) length of distance between windows/frames
    
    Return:
    partitions: (list) list of partitioned windows/frames of length window_length and 
    """
    signal_length = len(signal)
    signal_index = range(signal_length - window_length)
    beg_index = np.array(filter(lambda x: x%jump==0, signal_index))
    end_index = beg_index + window_length
    zipped = zip(beg_index, end_index)
    
    partitions = [signal[i[0]:i[1]] for i in zipped]
    return partitions

def lpfilter(signal,alpha=0.99):
    y = range(len(signal))
    y[0] = 0
    for i in range(1,len(signal)):
        y[i] = ((1-alpha) * signal[i]) + (alpha * y[i-1])
    return np.array(y)

def acf(signal):
    n = len(signal)
    array = np.array([np.dot(signal[:n-i], signal[i:])/float(n) for i in xrange(n)]) 
    return array

In [3]:
#arjay
def auto2bpm(index, ln=10337):
    return int(60.0 * ln / index)

def bpm2auto(bpm, ln=10337):
    return int(60.0 * ln / bpm)

def beat_histogram(signal,sr=22050):
    histogram = []
    partition = _partition(signal)
    hz = [200., 400., 800., 1600., 3200.]
    for sig in partition:
        subbands = []
        #lowpass filter
        B, A = ss.butter(4,200./sr,btype='low')
        subbands.append(np.array(ss.lfilter(B,A,pywt.dwt(sig,'db4')[0])))
        #bandpass filter
        for t in range(len(hz)-1):
            B, A = ss.butter(4,[hz[t]/sr,hz[t+1]/sr],btype='bandpass')
            subbands.append(np.array(ss.lfilter(B,A,pywt.dwt(sig,'db4')[0])))
        #highpass filter
        B, A = ss.butter(4,3200./sr,btype='highpass')
        subbands.append(np.array(ss.lfilter(B,A,pywt.dwt(sig,'db4')[0])))
        #envelope extraction
        for s in range(len(subbands)):
            y = pywt.dwt(subbands[s],'db4')[0]
            y = abs(y) #full wave rectification
            y = lpfilter(y,0.99) #low pass filter
            y = y[::16] #downsampling
            subbands[s] = y - np.mean(y)#mean removal

        x = np.sum(subbands,axis=0)
        
        ac = acf(x) #enhanced autocorrelation

        pk = peak(ac[bpm2auto(200):bpm2auto(40)]) #peak finding

        histogram.append(pk)   
    return [i for row in histogram for i in row]

def peak(array):
    z=np.diff(array)
    ind = []
    for i in range(len(z)-1):
        if z[i+1] < 0 and z[i] > 0:
            ind.append((array[i+1],auto2bpm(i+1+bpm2auto(200))))
    ind_ = []
    pk = []
    for k,j in sorted(ind):
        if j not in ind_:
            ind_.append(j)
            pk.append((k,j))
    return pk[-3:]

def bh_feat(signal):
    bh = beat_histogram(signal)
    y = np.zeros((200))
    for i, j in bh:
        y[j] = y[j] + i

    r_amp = y / np.sum(y)

    a1, a0 = sorted(r_amp)[-2:]
    ra = a1 / a0
    sm = np.sum(y)
    mx = sorted(y)[-2:]
    p2, p1 = [i for i,x in enumerate(y) for j in mx if j == x]
    
    return a0, a1, ra, p1, p2, sm

In [4]:
#joseph
def peaks(signal):
    z=np.diff(signal)
    ind = []
    for i in range(len(z)-1):
        if z[i+1] < 0 and z[i] > 0:
            ind.append(i+1)
    amp = [signal[k] for k in ind[:3]]
    return zip(ind[:3],amp)

def decompose(signal):
    B_low,A_low = ss.cheby1(5, 1,1000/22050, 'low')
    B_hi,A_hi = ss.cheby1(5, 1,1000/22050, 'highpass')
    low_pass = ss.lfilter(B_low,A_low,signal)
    high_pass = ss.lfilter(B_hi,A_hi,signal)
    return low_pass, high_pass

def half_wave_rectify(signal):
    hwr = signal.copy()
    hwr[hwr<0]=0
    return hwr

def envelope_sum(signal):
    low,hi = decompose(signal)
    env_low = lpfilter(half_wave_rectify(low))
    env_hi = lpfilter(half_wave_rectify(hi))
    env_sum = env_low + env_hi
    return env_sum

def SACF(signal):
    low,hi = decompose(signal)
    env_low = lpfilter(half_wave_rectify(low))
    env_hi = lpfilter(half_wave_rectify(hi))
    acf_low = acf(env_low)
    acf_high = acf(env_hi)
    return acf_low + acf_high


def pitch_histogram(peak_tupple):
    p = np.arange(1,2028)
    a = np.zeros(len(p))
    for i,j in peak_tupple:
        a[i] = a[i] + j            
    return np.array(zip(p,a)).astype(int)
            

def unfolded_histogram(pitch_histogram_tuple):
    x = pitch_histogram_tuple.T[0]
    n = map(lambda x: 12*np.log2(x/440)+69,x)
    return np.array(n),pitch_histogram_tuple.T[1]

def folded_histogram(pitch_histogram_tuple):
    x,y = unfolded_histogram(pitch_histogram_tuple)
    c = map(lambda x: x.astype(int) % 12, x)
    h = np.arange(12)
    amplitude_ufh = np.zeros(12)
    for i in range(len(h)):
        for j in range(len(c)):
            if h[i] == c[j]:
                amplitude_ufh[i] += y[j]
    return h,amplitude_ufh

def folded_histogram_max_amplitude(folded_histogram_period, folded_histogram_amplitude):
    index_of_max = np.argmax(folded_histogram_amplitude)
    return folded_histogram_period[index_of_max], folded_histogram_amplitude[index_of_max]

def folded_histogram_amplitude_sum(folded_histogram_amplitude):
    return np.sum(folded_histogram_amplitude)

def unfolded_histogram_period_max_amplitude(unfolded_histogram_period, unfolded_histogram_amplitude):
    index_of_max = np.argmax(unfolded_histogram_amplitude)
    return unfolded_histogram_period[index_of_max]

def folded_histogram_pitch_interval(folded_histogram_period, folded_histogram_amplitude):
    index_max = np.argmax(folded_histogram_amplitude)
    remove_max = folded_histogram_amplitude.copy()
    remove_max[index_max] = 0
    index_max_2 = np.argmax(remove_max)
    return abs(index_max - index_max_2)

def pitch_histogram_features(signal_segment):
    
    windowed_signal = _partition(signal_segment,2028,2028)
    sacf = map(SACF, windowed_signal)
    my_peaks_list = map(peaks, sacf)
   
    peaks_flat = np.array(list(it.chain.from_iterable(my_peaks_list))).astype(int)
    PH = pitch_histogram(peaks_flat)

    q,r = unfolded_histogram(PH)
    u,v = folded_histogram(PH)
    
    ### Feature 1: Period of Max Amplitude of Folded Histogram
    ### Feature 2: Max Amplitude of Max Amplitude of Folded Histogram
    ### Feature 3: Sum of Amplitude of Folded Histogram
    ### Feature 4: Period of Max Amplitude of unfolded Histogram
    ### Feature 5: Pitch Interval of Folded Histogram
    
    F1, F2 = folded_histogram_max_amplitude(u,v)
    F3 = folded_histogram_amplitude_sum(v)
    F4 = unfolded_histogram_period_max_amplitude(q,r)
    F5 = folded_histogram_pitch_interval(u, v)
    
    return [F1, F2, F3, F4, F5]

In [5]:
def segment_music(df,num_secs):
    segments = []
    for i in df.index:
        song = df.ix[i]['signal']
        num_pts = df.ix[i]['sample_rate']*num_secs
        trimmed = np.trim_zeros(song)
        while np.all(trimmed[:10])==False:
            trimmed = trimmed[5:]
        while np.all(trimmed[-10:])==False:
            trimmed = trimmed[:-5]
        seg_beg = trimmed[:num_pts].astype(int)
        seg_mid = trimmed[int(trimmed.shape[0]/2)-(num_pts/2):int(trimmed.shape[0]/2)+(num_pts/2)].astype(int)
        seg_end = trimmed[-num_pts:].astype(int)
        
        segments.append([seg_beg,seg_mid,seg_end])
    return segments

In [1]:
df_sample = pd.read_pickle("df_sample.pkl")

In [7]:
segments = pd.DataFrame(segment_music(df_sample, 30), columns=["beg", "mid", "end"])

In [8]:
df = pd.concat([df_sample, segments], axis = 1)
df

Unnamed: 0,artist,song,signal,sample_rate,channel,beg,mid,end
0,Pink,Are We All We Are,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",44100,2,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, ...","[809, 903, 417, -201, -177, -333, -995, -1688,...","[16448, 17526, 18439, 17593, 13771, 8368, 3233..."
1,Pink,Blow Me One Last Kiss,"[-9, -14, -10, -9, -11, -11, -9, -12, -11, -10...",44100,2,"[-9, -14, -10, -9, -11, -11, -9, -12, -11, -10...","[10569, 5250, 279, 2198, 5924, 3850, -2975, -9...","[4325, 10096, 14250, 9580, 3494, 3670, 6913, 1..."
2,Pink,Try,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",44100,2,"[-2, -1, -1, -2, -1, 1, -1, 1, -1, 1, 1, 0, 0,...","[2739, 1973, 1015, 95, -202, -238, -200, -94, ...","[5988, 3302, 3242, 5465, 6914, 6744, 6358, 825..."
3,Pink,Just Give Me A Reason,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",44100,2,"[-2, -2, -1, 1, 1, -2, -1, -3, -3, -1, -2, -1,...","[-2535, -2845, -3301, -4150, -4906, -5295, -55...","[-9430, -6699, -3657, -452, 2889, 6250, 9652, ..."
4,Pink,True Love,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",44100,2,"[2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 3, ...","[-16230, -12410, -13635, -11647, -9389, -17166...","[18431, 24020, 31532, 32767, 27869, 25101, 263..."


In [9]:
%%time
arjayfeat = pd.DataFrame(map(lambda x: bh_feat(x), df.beg),
                         columns = ["amp1", "amp2", "ratio_amp1_amp2", "per1", "per2", "beat_strength"])

CPU times: user 1min 36s, sys: 3.72 s, total: 1min 40s
Wall time: 1min 40s


In [10]:
arjayfeat

Unnamed: 0,amp1,amp2,ratio_amp1_amp2,per1,per2,beat_strength
0,0.420955,0.206238,0.489929,142,140,612307900.0
1,0.324115,0.303821,0.937384,190,171,2174113000.0
2,0.255653,0.227294,0.889069,195,173,1268834000.0
3,0.255414,0.129069,0.505332,118,89,85247850.0
4,0.440457,0.272051,0.617657,180,120,1986638000.0


In [19]:
from utils import cython_func as cf
import librosa as lb



In [12]:
%%time
seg = "beg"
feature7_15 = pd.DataFrame(map(lambda x: cf.feature_wrapper(x), df[seg]),
                          columns = [seg+"_centroid_mean",
                                     seg+"_centroid_std",
                                     seg+"_rolloff_mean",
                                     seg+"_rolloff_std",
                                     seg+"_flow_mean",
                                     seg+"_flow_std",
                                     seg+"_zrc_mean",
                                     seg+"_zrc_std",
                                     seg+"_lowenergy"])

CPU times: user 15.7 s, sys: 1.53 s, total: 17.2 s
Wall time: 17.2 s


In [14]:
feature7_15

Unnamed: 0,beg_centroid_mean,beg_centroid_std,beg_rolloff_mean,beg_rolloff_std,beg_flow_mean,beg_flow_std,beg_zrc_mean,beg_zrc_std,beg_lowenergy
0,3420.53418,425.522278,102814.46875,17064.160156,1.961744,0.00819,0.078796,0.01739,0.48
1,4750.083496,46.60487,155783.671875,777.187866,2.08656,0.017651,0.086198,0.003308,0.64
2,4119.118164,270.182129,149502.0625,7982.341797,1.977856,0.038451,0.028082,0.00551,0.48
3,3136.13501,138.536499,128966.46875,5410.90918,1.992231,0.020744,0.042894,0.005313,0.48
4,3957.986572,80.74366,132623.0625,895.153076,1.976086,0.011083,0.053811,0.009748,0.36


In [15]:
%%time
feature26_30 = pd.DataFrame(map(lambda x: pitch_histogram_features(x), df[seg]),
                           columns = [seg+"_pitch_strength",
                                      seg+"_per_max_unfolded",
                                      seg+"_amp_max_folded",
                                      seg+"_per_max_folded",
                                      seg+"_intrvl_peaks_folded"])

CPU times: user 32.9 s, sys: 97.4 ms, total: 33 s
Wall time: 33.2 s


In [16]:
feature26_30

Unnamed: 0,beg_pitch_strength,beg_per_max_unfolded,beg_amp_max_folded,beg_per_max_folded,beg_intrvl_peaks_folded
0,5,937038049,7881037874,29.906427,2
1,9,1767416343,11326084948,77.283786,4
2,9,987398783,5623936696,70.792531,7
3,0,86373790,563632577,48.287073,2
4,2,1481931135,10933615919,73.722792,1


In [17]:
def get_mfcc_mean_sd(data_array, n = 5, sr = 44100, n_mfcc = 40, verbose = False):
    mfccs = lb.feature.mfcc(y=data_array, sr=sr, n_mfcc=n_mfcc)
    means = [x.mean() for x in mfccs[:n]]
    sds = [x.std() for x in mfccs[:n]]
    if verbose == True:
        print means
    return means + sds

In [22]:
pd.DataFrame(map(lambda x: get_mfcc_mean_sd(x), df[seg]),
            columns = [seg+"_%dmfcc_"%i+metric for metric in ["mean", "std"] for i in range(1, 6)])

Unnamed: 0,beg_1mfcc_mean,beg_2mfcc_mean,beg_3mfcc_mean,beg_4mfcc_mean,beg_5mfcc_mean,beg_1mfcc_std,beg_2mfcc_std,beg_3mfcc_std,beg_4mfcc_std,beg_5mfcc_std
0,947.628949,129.206842,-44.584359,32.576331,-5.007685,39.236337,34.162564,19.671942,11.9932,13.820254
1,923.05078,100.200408,-7.371712,37.887281,-15.535273,105.982678,33.690376,23.142295,24.951123,13.568349
2,769.908163,135.850141,20.537432,22.819391,3.264879,123.584713,54.667639,26.532977,29.145388,15.521205
3,678.724901,189.531106,4.518027,1.714989,1.612009,80.94065,46.21106,28.271916,24.070823,20.929255
4,858.008574,139.142886,9.914198,34.2491,-16.560637,110.535441,54.486595,45.661594,35.030543,13.985375


In [23]:
def features(df):
    seg_col = ["beg", "mid", "end"]
    segments = pd.DataFrame(segment_music(df, 30), columns= seg_col) #segments are 30 seconds long
    df = pd.concat([df, segments], axis = 1)
    df.drop(["signal", "sample_rate", "channel"], axis = 1, inplace = True)
    
    
    for seg in seg_col:
        #carlo and arjay's features
        feature1_6 = pd.DataFrame(map(lambda x: bh_feat(x), df["%s"%seg]), 
                                  columns = ["%s_amp1"%seg,
                                             "%s_amp2"%seg,
                                             "%s_ratio_amp1_amp2"%seg, 
                                             "%s_per1"%seg, 
                                             "%s_per2"%seg,
                                             "%s_beat_strength" %seg])
        df = pd.concat([df, feature1_6], axis = 1)
        
        #paul's features
        feature7_15 = pd.DataFrame(map(lambda x: cf.feature_wrapper(x), df[seg]),
                                  columns = [seg+"_centroid_mean",
                                             seg+"_centroid_std",
                                             seg+"_rolloff_mean",
                                             seg+"_rolloff_std",
                                             seg+"_flow_mean",
                                             seg+"_flow_std",
                                             seg+"_zrc_mean",
                                             seg+"_zrc_std",
                                             seg+"_lowenergy"])
        df = pd.concat([df, feature7_15], axis = 1)
        
        #pucca and mark's features
        feature16_25 = pd.DataFrame(map(lambda x: get_mfcc_mean_sd(x), df[seg]),
                                    columns = [seg+"_%dmfcc_"%i+metric for metric in ["mean", "std"] for i in range(1, 6)])
        df = pd.concat([df, feature16_25], axis = 1)
        
        #joseph's features
        feature26_30 = pd.DataFrame(map(lambda x: pitch_histogram_features(x), df[seg]),
                                   columns = [seg+"_pitch_strength",
                                              seg+"_per_max_unfolded",
                                              seg+"_amp_max_folded",
                                              seg+"_per_max_folded",
                                              seg+"_intrvl_peaks_folded"])
        df = pd.concat([df, feature26_30], axis = 1)
        
    df.drop(["beg", "mid", "end"],axis = 1, inplace = True)
        
    return df

In [24]:
%%time
s = features(df_sample)

CPU times: user 7min 31s, sys: 20 s, total: 7min 51s
Wall time: 7min 52s


In [26]:
s

Unnamed: 0,artist,song,beg_amp1,beg_amp2,beg_ratio_amp1_amp2,beg_per1,beg_per2,beg_beat_strength,beg_centroid_mean,beg_centroid_std,beg_rolloff_mean,beg_rolloff_std,beg_flow_mean,beg_flow_std,beg_zrc_mean,beg_zrc_std,beg_lowenergy,beg_1mfcc_mean,beg_2mfcc_mean,beg_3mfcc_mean,beg_4mfcc_mean,beg_5mfcc_mean,beg_1mfcc_std,beg_2mfcc_std,beg_3mfcc_std,beg_4mfcc_std,beg_5mfcc_std,beg_pitch_strength,beg_per_max_unfolded,beg_amp_max_folded,beg_per_max_folded,beg_intrvl_peaks_folded,mid_amp1,mid_amp2,mid_ratio_amp1_amp2,mid_per1,mid_per2,mid_beat_strength,mid_centroid_mean,mid_centroid_std,mid_rolloff_mean,mid_rolloff_std,mid_flow_mean,mid_flow_std,mid_zrc_mean,mid_zrc_std,mid_lowenergy,mid_1mfcc_mean,mid_2mfcc_mean,mid_3mfcc_mean,mid_4mfcc_mean,mid_5mfcc_mean,mid_1mfcc_std,mid_2mfcc_std,mid_3mfcc_std,mid_4mfcc_std,mid_5mfcc_std,mid_pitch_strength,mid_per_max_unfolded,mid_amp_max_folded,mid_per_max_folded,mid_intrvl_peaks_folded,end_amp1,end_amp2,end_ratio_amp1_amp2,end_per1,end_per2,end_beat_strength,end_centroid_mean,end_centroid_std,end_rolloff_mean,end_rolloff_std,end_flow_mean,end_flow_std,end_zrc_mean,end_zrc_std,end_lowenergy,end_1mfcc_mean,end_2mfcc_mean,end_3mfcc_mean,end_4mfcc_mean,end_5mfcc_mean,end_1mfcc_std,end_2mfcc_std,end_3mfcc_std,end_4mfcc_std,end_5mfcc_std,end_pitch_strength,end_per_max_unfolded,end_amp_max_folded,end_per_max_folded,end_intrvl_peaks_folded
0,Pink,Are We All We Are,0.420955,0.206238,0.489929,142,140,612307900.0,3420.53418,425.522278,102814.46875,17064.160156,1.961744,0.00819,0.078796,0.01739,0.48,947.628949,129.206842,-44.584359,32.576331,-5.007685,39.236337,34.162564,19.671942,11.9932,13.820254,5,937038049,7881037874,29.906427,2,0.252735,0.238945,0.945439,190,175,2126785000.0,3281.460938,31.136532,105122.070312,985.683838,2.022635,0.006515,0.066544,0.003292,0.64,983.791087,141.755551,-40.198016,38.890756,-9.060866,43.290194,19.080793,15.32348,11.966724,10.80799,0,2115187311,16997263959,75.269052,3,0.321801,0.248914,0.773503,162,142,1419819000.0,3176.164795,12.878837,99428.46875,546.185303,2.012935,0.005958,0.067273,0.001532,0.24,980.909251,129.69861,-45.098505,31.77478,-13.495899,124.073159,32.413439,25.245239,25.013021,12.685112,5,3574753474,25668685094,72.319584,5
1,Pink,Blow Me One Last Kiss,0.324115,0.303821,0.937384,190,171,2174113000.0,4750.083496,46.60487,155783.671875,777.187866,2.08656,0.017651,0.086198,0.003308,0.64,923.05078,100.200408,-7.371712,37.887281,-15.535273,105.982678,33.690376,23.142295,24.951123,13.568349,9,1767416343,11326084948,77.283786,4,0.312065,0.285113,0.913633,171,142,930264300.0,5079.212891,104.317009,160248.0,1792.653442,2.05906,0.005823,0.12422,0.006129,0.28,1046.652978,83.486505,-21.928339,42.901845,-8.241475,28.60417,18.293821,13.714241,14.345909,11.770444,9,3278702186,24261850655,77.259385,5,0.341051,0.24954,0.731678,155,142,983942700.0,5164.964844,17.99785,163520.328125,885.836548,2.06727,0.005854,0.129672,0.000953,0.44,1043.342158,74.435947,-18.703345,42.198358,-7.794019,69.838194,21.633069,11.89051,13.148586,10.241491,4,3767438947,23439042615,76.124157,1
2,Pink,Try,0.255653,0.227294,0.889069,195,173,1268834000.0,4119.118164,270.182129,149502.0625,7982.341797,1.977856,0.038451,0.028082,0.00551,0.48,769.908163,135.850141,20.537432,22.819391,3.264879,123.584713,54.667639,26.532977,29.145388,15.521205,9,987398783,5623936696,70.792531,7,0.400125,0.167713,0.419153,195,156,1274071000.0,5160.400879,51.148285,165435.140625,1119.897095,2.013709,0.030874,0.12039,0.004113,0.32,1015.585848,95.252136,-15.891095,37.550946,-11.189215,35.273834,25.58802,14.665278,10.612989,9.768609,7,1670125732,13027980919,74.253706,5,0.318925,0.120133,0.376683,195,194,310318700.0,2695.501953,697.868896,87473.867188,30633.521484,2.056533,0.036407,0.024259,0.009166,0.52,733.802882,131.263773,25.154482,23.529027,5.177926,128.886715,53.574659,35.573704,19.887502,16.047208,2,765023012,4087868724,64.988682,3
3,Pink,Just Give Me A Reason,0.255414,0.129069,0.505332,118,89,85247850.0,3136.13501,138.536499,128966.46875,5410.90918,1.992231,0.020744,0.042894,0.005313,0.48,678.724901,189.531106,4.518027,1.714989,1.612009,80.94065,46.21106,28.271916,24.070823,20.929255,0,86373790,563632577,48.287073,2,0.412261,0.342882,0.831712,178,118,2198361000.0,2770.746094,105.259956,92713.203125,6330.235352,1.953902,0.017169,0.049012,0.003797,0.64,912.491668,172.16676,-28.776377,38.356146,-6.480639,50.778176,25.475877,20.486537,12.748172,14.695044,9,1291303707,10219780324,80.215227,7,0.307634,0.14362,0.466854,178,118,237038200.0,2247.426514,273.667145,64617.535156,8745.482422,1.895831,0.040412,0.027913,0.005443,0.52,773.684428,163.09863,6.224049,20.875824,13.077282,128.142786,48.643391,40.134386,20.193365,16.236716,2,594232215,4824346144,83.858772,7
4,Pink,True Love,0.440457,0.272051,0.617657,180,120,1986638000.0,3957.986572,80.74366,132623.0625,895.153076,1.976086,0.011083,0.053811,0.009748,0.36,858.008574,139.142886,9.914198,34.2491,-16.560637,110.535441,54.486595,45.661594,35.030543,13.985375,2,1481931135,10933615919,73.722792,1,0.344556,0.139829,0.405825,180,179,1950984000.0,4570.238281,38.105808,148613.140625,1761.585205,1.975823,0.006181,0.090804,0.001698,0.68,993.081961,99.935777,-26.047442,40.755372,-19.149766,48.415278,24.152984,18.186748,13.109115,12.682487,0,3262442736,18937327623,72.2545,11,0.292841,0.150197,0.512897,180,90,1742591000.0,4399.541016,23.00598,145252.59375,236.508438,2.033853,0.006289,0.084807,0.014538,0.32,923.158014,114.912365,-3.642868,25.718454,-19.915106,169.582663,40.599208,43.030166,21.695961,20.79393,0,3387092084,20647041391,72.090711,2


In [2]:
from utils import feature_extraction



In [3]:
%%time
x = feature_extraction.features(df_sample)

CPU times: user 7min 20s, sys: 17.5 s, total: 7min 37s
Wall time: 7min 38s


In [4]:
x

Unnamed: 0,artist,song,beg_amp1,beg_amp2,beg_ratio_amp1_amp2,beg_per1,beg_per2,beg_beat_strength,beg_centroid_mean,beg_centroid_std,beg_rolloff_mean,beg_rolloff_std,beg_flow_mean,beg_flow_std,beg_zrc_mean,beg_zrc_std,beg_lowenergy,beg_1mfcc_mean,beg_2mfcc_mean,beg_3mfcc_mean,beg_4mfcc_mean,beg_5mfcc_mean,beg_1mfcc_std,beg_2mfcc_std,beg_3mfcc_std,beg_4mfcc_std,beg_5mfcc_std,beg_pitch_strength,beg_per_max_unfolded,beg_amp_max_folded,beg_per_max_folded,beg_intrvl_peaks_folded,mid_amp1,mid_amp2,mid_ratio_amp1_amp2,mid_per1,mid_per2,mid_beat_strength,mid_centroid_mean,mid_centroid_std,mid_rolloff_mean,mid_rolloff_std,mid_flow_mean,mid_flow_std,mid_zrc_mean,mid_zrc_std,mid_lowenergy,mid_1mfcc_mean,mid_2mfcc_mean,mid_3mfcc_mean,mid_4mfcc_mean,mid_5mfcc_mean,mid_1mfcc_std,mid_2mfcc_std,mid_3mfcc_std,mid_4mfcc_std,mid_5mfcc_std,mid_pitch_strength,mid_per_max_unfolded,mid_amp_max_folded,mid_per_max_folded,mid_intrvl_peaks_folded,end_amp1,end_amp2,end_ratio_amp1_amp2,end_per1,end_per2,end_beat_strength,end_centroid_mean,end_centroid_std,end_rolloff_mean,end_rolloff_std,end_flow_mean,end_flow_std,end_zrc_mean,end_zrc_std,end_lowenergy,end_1mfcc_mean,end_2mfcc_mean,end_3mfcc_mean,end_4mfcc_mean,end_5mfcc_mean,end_1mfcc_std,end_2mfcc_std,end_3mfcc_std,end_4mfcc_std,end_5mfcc_std,end_pitch_strength,end_per_max_unfolded,end_amp_max_folded,end_per_max_folded,end_intrvl_peaks_folded
0,Pink,Are We All We Are,0.420955,0.206238,0.489929,142,140,612307900.0,3420.53418,425.522278,102814.46875,17064.160156,1.961744,0.00819,0.078796,0.01739,0.48,947.628949,129.206842,-44.584359,32.576331,-5.007685,39.236337,34.162564,19.671942,11.9932,13.820254,5,937038049,7881037874,29.906427,2,0.252735,0.238945,0.945439,190,175,2126785000.0,3281.460938,31.136532,105122.070312,985.683838,2.022635,0.006515,0.066544,0.003292,0.64,983.791087,141.755551,-40.198016,38.890756,-9.060866,43.290194,19.080793,15.32348,11.966724,10.80799,0,2115187311,16997263959,75.269052,3,0.321801,0.248914,0.773503,162,142,1419819000.0,3176.164795,12.878837,99428.46875,546.185303,2.012935,0.005958,0.067273,0.001532,0.24,980.909251,129.69861,-45.098505,31.77478,-13.495899,124.073159,32.413439,25.245239,25.013021,12.685112,5,3574753474,25668685094,72.319584,5
1,Pink,Blow Me One Last Kiss,0.324115,0.303821,0.937384,190,171,2174113000.0,4750.083496,46.60487,155783.671875,777.187866,2.08656,0.017651,0.086198,0.003308,0.64,923.05078,100.200408,-7.371712,37.887281,-15.535273,105.982678,33.690376,23.142295,24.951123,13.568349,9,1767416343,11326084948,77.283786,4,0.312065,0.285113,0.913633,171,142,930264300.0,5079.212891,104.317009,160248.0,1792.653442,2.05906,0.005823,0.12422,0.006129,0.28,1046.652978,83.486505,-21.928339,42.901845,-8.241475,28.60417,18.293821,13.714241,14.345909,11.770444,9,3278702186,24261850655,77.259385,5,0.341051,0.24954,0.731678,155,142,983942700.0,5164.964844,17.99785,163520.328125,885.836548,2.06727,0.005854,0.129672,0.000953,0.44,1043.342158,74.435947,-18.703345,42.198358,-7.794019,69.838194,21.633069,11.89051,13.148586,10.241491,4,3767438947,23439042615,76.124157,1
2,Pink,Try,0.255653,0.227294,0.889069,195,173,1268834000.0,4119.118164,270.182129,149502.0625,7982.341797,1.977856,0.038451,0.028082,0.00551,0.48,769.908163,135.850141,20.537432,22.819391,3.264879,123.584713,54.667639,26.532977,29.145388,15.521205,9,987398783,5623936696,70.792531,7,0.400125,0.167713,0.419153,195,156,1274071000.0,5160.400879,51.148285,165435.140625,1119.897095,2.013709,0.030874,0.12039,0.004113,0.32,1015.585848,95.252136,-15.891095,37.550946,-11.189215,35.273834,25.58802,14.665278,10.612989,9.768609,7,1670125732,13027980919,74.253706,5,0.318925,0.120133,0.376683,195,194,310318700.0,2695.501953,697.868896,87473.867188,30633.521484,2.056533,0.036407,0.024259,0.009166,0.52,733.802882,131.263773,25.154482,23.529027,5.177926,128.886715,53.574659,35.573704,19.887502,16.047208,2,765023012,4087868724,64.988682,3
3,Pink,Just Give Me A Reason,0.255414,0.129069,0.505332,118,89,85247850.0,3136.13501,138.536499,128966.46875,5410.90918,1.992231,0.020744,0.042894,0.005313,0.48,678.724901,189.531106,4.518027,1.714989,1.612009,80.94065,46.21106,28.271916,24.070823,20.929255,0,86373790,563632577,48.287073,2,0.412261,0.342882,0.831712,178,118,2198361000.0,2770.746094,105.259956,92713.203125,6330.235352,1.953902,0.017169,0.049012,0.003797,0.64,912.491668,172.16676,-28.776377,38.356146,-6.480639,50.778176,25.475877,20.486537,12.748172,14.695044,9,1291303707,10219780324,80.215227,7,0.307634,0.14362,0.466854,178,118,237038200.0,2247.426514,273.667145,64617.535156,8745.482422,1.895831,0.040412,0.027913,0.005443,0.52,773.684428,163.09863,6.224049,20.875824,13.077282,128.142786,48.643391,40.134386,20.193365,16.236716,2,594232215,4824346144,83.858772,7
4,Pink,True Love,0.440457,0.272051,0.617657,180,120,1986638000.0,3957.986572,80.74366,132623.0625,895.153076,1.976086,0.011083,0.053811,0.009748,0.36,858.008574,139.142886,9.914198,34.2491,-16.560637,110.535441,54.486595,45.661594,35.030543,13.985375,2,1481931135,10933615919,73.722792,1,0.344556,0.139829,0.405825,180,179,1950984000.0,4570.238281,38.105808,148613.140625,1761.585205,1.975823,0.006181,0.090804,0.001698,0.68,993.081961,99.935777,-26.047442,40.755372,-19.149766,48.415278,24.152984,18.186748,13.109115,12.682487,0,3262442736,18937327623,72.2545,11,0.292841,0.150197,0.512897,180,90,1742591000.0,4399.541016,23.00598,145252.59375,236.508438,2.033853,0.006289,0.084807,0.014538,0.32,923.158014,114.912365,-3.642868,25.718454,-19.915106,169.582663,40.599208,43.030166,21.695961,20.79393,0,3387092084,20647041391,72.090711,2
