In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io.wavfile as wavfile
import scipy.signal as signal
import scipy.fftpack as fftpack

In [2]:
def buffer(x, n, p=0, opt=None):
    '''Mimic MATLAB routine to generate buffer array

    MATLAB docs here: https://se.mathworks.com/help/signal/ref/buffer.html

    Args
    ----
    x:   signal array
    n:   number of data segments
    p:   number of values to overlap
    opt: initial condition options. default sets the first `p` values
         to zero, while 'nodelay' begins filling the buffer immediately.
    '''
    import numpy

    if p >= n:
        raise ValueError('p ({}) must be less than n ({}).'.format(p,n))

    # Calculate number of columns of buffer array
    cols = int(numpy.ceil(len(x)/(n-p)))

    # Check for opt parameters
    if opt == 'nodelay':
        # Need extra column to handle additional values left
        cols += 1
    elif opt != None:
        raise SystemError('Only `None` (default initial condition) and '
                          '`nodelay` (skip initial condition) have been '
                          'implemented')

    # Create empty buffer array
    b = numpy.zeros((n, cols))

    # Fill buffer by column handling for initial condition and overlap
    j = 0
    for i in range(cols):
        # Set first column to n values from x, move to next iteration
        if i == 0 and opt == 'nodelay':
            b[0:n,i] = x[0:n]
            continue
        # set first values of row to last p values
        elif i != 0 and p != 0:
            b[:p, i] = b[-p:, i-1]
        # If initial condition, set p elements in buffer array to zero
        else:
            b[:p, i] = 0

        # Get stop index positions for x
        k = j + n - p

        # Get stop index position for b, matching number sliced from x
        n_end = p+len(x[j:k])

        # Assign values to buffer array from x
        b[p:n_end,i] = x[j:k]

        # Update start index location for next iteration of x
        j = k

    return b

In [3]:
def melfb(p, n, fs):
    f0 = 700 / fs
    fn2 = np.floor(n/2)

    lr = np.log1p(0.5/f0)/ (p + 1)

    bl = [n * (f0 * (np.exp(i * lr) - 1)) for i in [0, 1, p, p + 1]]

    b1 = np.floor(bl[0]) + 1
    b2 = np.ceil(bl[1])
    b3 = np.floor(bl[2])
    b4 = min(fn2, np.ceil(bl[3])) - 1
    
    b1 = b1.astype(np.int64)
    b2 = b2.astype(np.int64)
    b3 = b3.astype(np.int64)
    b4 = b4.astype(np.int64)
    
    pf = [np.log1p(i/n/f0) /lr for i in range(b1,b4 + 1)]
    fp = [np.floor(i) for i in pf]
    pm = [pf[i] - fp[i] for i in range(len(pf))]

    r = fp[b2 -1:b4] + [fp[i] + 1 for i in range(b3)]
    c = [i+1 for i in range(b2,b4+1)] + [i + 1 for i in range(1,b3+1)]
    v = [2*(1-pm[i-1]) for i in range(b2,b4+1)] + [2*(pm[i]) for i in range(b3)]

    return [r,c,v]


In [4]:
def mfcc(f, fs):
    file_data = wavfile.read(f)
    
    buffered = buffer(file_data[1], 256, 256//3)
    
    windowed = np.multiply(buffered, signal.hamming(fs))
    
    fourier = np.fft.fft(windowed)
    
    ms = melfb(20, fourier.size, fs)
    
    c = fftpack.dct(np.log(ms))
    
    return c[1:]

In [5]:
mfcc('test/s1.wav', 256)

ValueError: operands could not be broadcast together with shapes (256,74) (256,) 

In [68]:
help (signal.hamming)

Help on function hamming in module scipy.signal.windows:

hamming(M, sym=True)
    Return a Hamming window.
    
    The Hamming window is a taper formed by using a raised cosine with
    non-zero endpoints, optimized to minimize the nearest side lobe.
    
    Parameters
    ----------
    M : int
        Number of points in the output window. If zero or less, an empty
        array is returned.
    sym : bool, optional
        When True (default), generates a symmetric window, for use in filter
        design.
        When False, generates a periodic window, for use in spectral analysis.
    
    Returns
    -------
    w : ndarray
        The window, with the maximum value normalized to 1 (though the value 1
        does not appear if `M` is even and `sym` is True).
    
    Notes
    -----
    The Hamming window is defined as
    
    .. math::  w(n) = 0.54 - 0.46 \cos\left(\frac{2\pi{n}}{M-1}\right)
               \qquad 0 \leq n \leq M-1
    
    The Hamming was named for R. W. H