In [540]:
import wave
from scipy.fftpack import fft,ifft,fftfreq
import scipy
import  matplotlib.pyplot as plt
import numpy as np
import librosa

# Extract Signals

In [541]:
def read_wav(path):
    with wave.open(path,'rb') as f:
        params = f.getparams()
        nchannels,sampwidth,framerate,nframes = params[:4]
        str_data = f.readframes(nframes)
    stereo_data = np.frombuffer(str_data,dtype = np.short)
    stereo_data.shape=-1,2
    mono_data = np.sum(stereo_data, axis=1)/2.0
    return mono_data, framerate

# MFCC

In [542]:
def mfcc_features(path:str, wndSec:float):  # window size (sec)
    # get stereo time domain signal
    wave_data, sampling_rate = read_wav(path)   # mono wave_data, sampling rate
    
    # MFCC
    max_freq = 3000
    wndSize = int(wndSec*sampling_rate)
    wndSize += wndSize%2
    y, sr = librosa.load(path)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=12, n_fft=wndSize, n_mels=24, hop_length=wndSize//2, window='hamming', power=2, fmin=0, fmax=max_freq)
    mfcc = abs(mfcc).T
    
    # log-scale short-time power
    rms = librosa.feature.rms(y=y, frame_length=wndSize, hop_length=wndSize//2)
    rms.shape = -1, 1
    
    # MFCC features
    p = 2
    diff0 = np.concatenate((mfcc, rms), axis=1)
    diff1 = np.zeros(shape=diff0.shape)
    diff2 = np.zeros(shape=diff0.shape)
    for t in range(len(diff0)):
        for p in range(-p, p+1):
            diff1+=diff0[(t+p)%len(diff0)]
    for t in range(len(diff1)):
        for p in range(-p, p+1):
            diff2+=diff1[(t+p)%len(diff1)]
    mfcc_features = np.concatenate((diff0, diff1, diff2), axis=-1)
    
    return mfcc_features    # [frame, 39d mfcc]
    


In [543]:
mfcc = mfcc_features('../audio/I swear.wav', 0.5)
print(mfcc.shape)

(622, 39)


In [544]:
def data_fft(data,time,start,end):
    t=[]
    y=[]
    for i in range(len(time)):
        if(time[i]>=start)and (time[i]<=end):
            t = np.append(t,time[i])
            y = np.append(y,data[0][i])
    n = len(t)
    yf = abs(fft(y))/n
    yf = yf[range(int(n/2))]
    xf =np.arange(len(yf))/abs(end-start)
    
    return xf,yf