# Multirate sampling demo
This demonstrates decimation and interpolation of audio signals.  The data is captured at a high sampling rate of 44.1 kHz, downsampled by a factor of 8 (reducing the sampling rate to 5.5125 kHz), and then upsampled back to 44.1 kHz.  High frequency information is lost in this process, reducing the quality of the audio signal.

### Preamble
Start by importing the Python libraries that we will require

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as sps
from scipy.io import loadmat
import scipy.linalg as spl
import scipy as sp
import IPython.display as ipd

### Load in the trumpet recording

In [None]:
# read data from .mat file
x = loadmat('trumpet.mat')
audiodata = np.reshape(x['audiodata'], -1)
sampling_rate = 44100

### User specified parameters
The following parameters can be specified.

Parameter | Meaning
--------- | -------
<code>decimation_factor</code>| (e.g. 8)
<code>transition_band_proportion</code> | (e.g. 0.02)

In [None]:
decimation_factor = 8
transition_band_proportion = 0.02

### Function Definitions
The functions below are required for computing the minimum variance spectral estimation of the input data.

In [None]:
def Autocorrelation(x, M):
    """
        Calculate autocorrelation of x with maximum lag M.
        
        INPUT:
            x - vector to be correlated
            M - maximum correlation lag
        
        RETURN:
            acf - autocorrelation of x with lag M
        
    """

    #### Step 1 - initialise the index to identify the data block
    #### We don't need to copy the data block as python can select
    #### the block at the call time of the FFT
    index = 0
    
    #### Step 2 - compute the FFT of x_i(n)
    X = np.fft.fft(x[index*(M+1):(index+1)*(M+1)], 2*M+2)

    #### Steps 3 to 6 are repeated, accumulating their results in
    #### a vector.  We need to initialise the vector first

    result = np.zeros(2*M+2)

    # It is also helpful to generate the vector of [ 1 -1 1 ... ]

    phase_shift = np.power((-1), np.arange(0, 2*M+2))

    # Do the repetition until we run out of data
    while (True):

        #### Step 3 - compute X_i(k)X^*_i(k) and store

        result = result + np.multiply(np.conj(X), X)

        #### Step 4 - increment i

        index = index + 1

        # Check to see if we have used all of the data
        if (index*(M+1) > len(x)):
            break

        #### Step 5 - compute the transform for the next block

        if ((index+1)*(M+1) <= len(x)):
            nextX = np.fft.fft(x[index*(M+1):(index+1)*(M+1)], 2*M+2)
        else:
            # We don't always have a full block of data at the end
            # of the record, but we still need to process it
            nextX = np.fft.fft(x[index*(M+1):], 2*M+2)

        #### Step 6 - add in the product of the previous and next
        ####          transforms, with the phase shift

        result = result + np.multiply(np.multiply(phase_shift, np.conj(X)), nextX)

        #### Step 7 - repeat steps 3 to 6 until all of the data
        #### has been used.  Before we do this, we need
        #### to make X = nextX

        X = nextX

    #### Step 8 - inverse FFT

    time_domain = np.fft.ifft(result, 2*M+2)

    #### Step 9 = present only the first M+1 values

    acf = np.divide(time_domain[0:M+1], len(x)).real
    
    return acf

In [None]:
def MVSE(p, fft_size, input_data):
    """
       Compute the final spectrum
       
       INPUT:
           p                 (int): maximum correlation lag
           fft_size          (int): length of transformed output
           input_data (array-like): input data sequence
       
       RETURN:
           Px: the final spectrum
    """
    
    # Compute the autocorrelation, and create the Toeplitz matrix
    rxx = Autocorrelation(input_data, p)
    
    R = spl.toeplitz(rxx)

    # The naive approach is to invert the correlation matrix, R,
    # and then sweep through a set of frequency terms, computing
    # the power at each frequency.  
    #
    # The alternative is to note that the set of frequency terms
    # corresponds to a discrete Fourier transform.  It is then more
    # efficient to find the Eigenvalue decomposition of the
    # correlation matrix, which simplifies the matrix inversion
    # process, and then taking the Fourier transform of the eigenvectors

    # Perform the eigenvalue decomposition
    # [d,v] = np.linalg.eig(R)
    [d,v] = sp.linalg.eigh(R)

    # Invert the elements of the diagonal matrix, and store as a vector
    # eps avoids a divide by zero
    U = np.divide(np.ones(p+1), (abs(d)+np.finfo(float).eps)) 

    # Transform the eigenvectors.  The result is a matrix of dimensions
    # fs x p - each eigenvector is transformed
    V = abs(np.fft.fft(v.T, fft_size))**2

    # Then compute the final spectrum by combining the transformed variables
    # and normalising by the length of the correlation vector
    Px = 10*np.log10(p) - 10*np.log10(np.dot(V.T,U))
    
    return Px

### Calculate the new sampling rate, and the transition band in terms of the original sampling rate
This step determines the filter cut-off that we will require for the new sampling rate

In [None]:
new_nyquist = sampling_rate / (2*decimation_factor)
cut_off = new_nyquist * (1-transition_band_proportion)
transition_band = transition_band_proportion / decimation_factor

### Select a Blackman window
A Blackman window has a sufficiently low stopband rejection for the level of noise anticipated in the audio file.  The factor of 5.5 is taken from the table of filter characteristics in order to determine the number of coefficients required in the filter.

In [None]:
M = np.ceil(5.5/transition_band)

### Calculate the ideal filter response and the window coefficients

In [None]:
# ideal filter
n = np.arange(0, M)
delay = (M-1) / 2
hd = np.divide(np.sin(2*np.pi*cut_off*(n-delay)/sampling_rate),(np.pi*(n-delay)))
if delay == np.round(delay):
    hd[delay] = 2 * cut_off / sampling_rate

# window coefficients
w = 0.42 - 0.5*np.cos(2*n*np.pi/(M-1)) + 0.08*np.cos(4*n*np.pi/(M-1))

### Generate the filter and apply to the data
The filter is the product of the window and the ideal filter response.

In [None]:
h = np.multiply(w, hd)

Now apply this to the data, and subsample the output.  Note that this is not an efficient implementation as downsampling is done after computing all of the filtered data.

In [None]:
filtered = sps.lfilter(h, [1], audiodata, axis=0)
decimated = filtered[np.arange(0, len(filtered), decimation_factor)]

### Create players for the original and resampled data
This generates the players that can be used when allowing the user to play back the audio file

In [None]:
original = ipd.Audio(audiodata.T, rate = sampling_rate, autoplay = False)
resampled = ipd.Audio(decimated.T, rate = sampling_rate/decimation_factor, autoplay = False)

### MVSE analysis parameters

In [None]:
p = 2822
fft_size = 8192

### Plot and play the original data
First, analyse the audio data to produce a spectral plot of its frequency content.  Note that this may be slow as the spectral estimate is operating on a lot of data

In [None]:
frequency = np.arange(0, fft_size) * sampling_rate / fft_size
Original_Px = MVSE(p, fft_size, audiodata)

Now initiate the player for the original sound file, as well as displaying the spectral estimate.

In [None]:
plt.figure(figsize = (16, 8))
plt.rcParams.update({'font.size': 16})

plt.plot(frequency, Original_Px)

plt.xlabel('Frequency (Hz)')
plt.xlim([0, sampling_rate/2])
plt.ylabel('Magnitude (dB)')
plt.title('Original input')
plt.show()

# play the sound
original

### Plot and play the decimated data
As above, analyse the audio data, and produce a player.  Here the sampling rate is lower than the input by the desired subsampling factor

In [None]:
decimated_frequency = frequency / decimation_factor
Decimated_Px = MVSE(int(np.ceil(p/decimation_factor)), fft_size, decimated)

In [None]:
plt.figure(figsize = (16, 8))
plt.rcParams.update({'font.size': 16})

plt.plot(decimated_frequency, Decimated_Px)
plt.xlabel('Frequency (Hz)')
plt.xlim([0, sampling_rate/(2*decimation_factor)])
plt.ylabel('Magnitude (dB)')
plt.title('Decimated input')
plt.show()

# play the sound of resampled data
resampled

### Now interpolate back again to demonstrate this process
To demonstrate the effect of downsampling followed by upsampling, we now upsample the data back to the original sampling rate.  This involves first creating a vector with zeros between all of the data samples.

In [None]:
interpolated = np.zeros((len(decimated)*decimation_factor))
interpolated[np.arange(0, len(decimated)*decimation_factor, decimation_factor)] = decimated

Then a filter is applied.  We need to scale the filter by the upsampling factor in order to preserve the magnitude of the signal.

In [None]:
interpolated = sps.lfilter(h*decimation_factor, [1], interpolated, axis = 0)

### Plot and play interpolated input

In [None]:
upsampled = ipd.Audio(interpolated.T, rate = sampling_rate, autoplay = False)
Upsampled_Px = MVSE(p, fft_size, interpolated)

In [None]:
# Plot the figure
plt.figure(figsize = (16, 8))
plt.rcParams.update({'font.size': 16})

plt.plot(frequency, Upsampled_Px)
plt.xlabel('Frequency (Hz)')
plt.xlim([0, sampling_rate/2])
plt.ylabel('Magnitude (dB)')
plt.title('Interpolated input')
plt.show()

# Play the sound
upsampled

### Plot the time series
For interest, the original, decimated and upsampled time series are shown in the plot below.  Note that there are small differences in the upsampled signal.  These are due to the removal of high frequency components in the signal.

In [None]:
start_t = 1
end_t = 1.002

plt.figure(figsize = (16, 8))
plt.rcParams.update({'font.size': 16})

# Plot original samples 
t1_whole = np.arange(0, (len(audiodata))) / sampling_rate
index1 = np.nonzero((t1_whole >= start_t) & (t1_whole <= end_t))
t1 = t1_whole[index1]
stem1 = audiodata[index1]

(markerLines, stemLines, baseLines) = plt.stem(t1, stem1,
                                               label = 'Original samples',
                                               use_line_collection = True)
plt.setp(baseLines, color = 'black', linewidth=1)  
markerLines.set_markerfacecolor('none')

# Plot decimated samples
t2_whole = ((np.arange(0, (len(decimated))) - (len(h)-1)/(2*decimation_factor)) * 
      decimation_factor / sampling_rate)
index2 = np.nonzero((t2_whole >= start_t) & (t2_whole <= end_t))
t2 = t2_whole[index2]
stem2 = decimated[index2]

(markerLines, stemLines, baseLines) = plt.stem(t2, stem2,
                                               label = 'Decimated samples',
                                               use_line_collection = True)
plt.setp(baseLines, color = 'black', linewidth=1) 
plt.setp(stemLines, color = 'black', linewidth=2) 
plt.setp(markerLines, color = 'black', linewidth=2) 
markerLines.set_markerfacecolor('none')

# Plot Interpolated samples
t3_whole = (np.arange(0, (len(interpolated)))-len(h)+1) / sampling_rate
index3 = np.nonzero((t3_whole >= start_t) & (t3_whole <= end_t))
t3 = t3_whole[index3]
stem3 = interpolated[index3]

(markerLines, stemLines, baseLines) = plt.stem(t3, stem3,
                                               label = 'Interpolated samples',
                                               use_line_collection = True)
plt.setp(baseLines, color = 'black', linewidth=1) 
plt.setp(stemLines, color = 'orange', linewidth=1) 
plt.setp(markerLines, color = 'orange', linewidth=1) 
markerLines.set_markerfacecolor('none')

plt.xlim([start_t,end_t])
plt.xlabel('Time (s)', fontsize = 16)
plt.ylabel('Amplitude', fontsize = 16)
plt.title('Time domain', fontsize = 16)
plt.legend(prop={'size': 15});

© The University of Edinburgh: Produced by D. Laurenson, School of Engineering. Initial code conversion by Xing Zixiao.