In [1]:
# ipython directives
%matplotlib inline

In [2]:
from essentia.standard import Resample, MonoLoader, MonoWriter
import numpy as np
from scipy.io.wavfile import read as wavread, write as wavwrite
from matplotlib import pyplot as plt
import pyaudio
import wave
from scipy import signal
from scipy.interpolate import interp1d
from numpy.fft import fft

%matplotlib inline

ImportError: No module named essentia.standard

In [None]:
# To begin, let's load in some original speech and define some constants.
FILE_NAME = "4.wav"
FILE_PATH = "Data/wave/{}".format(FILE_NAME)
NFFT = 1024
[fs, speech] = wavread(FILE_PATH)

In [None]:
# Take a look at the unaltered speech in the time and frequency domains while it plays.
# Notice the higher frequencies present during fricatives.
observe_signal(speech, nfft=NFFT, "Simple Speech Signal")
play_audio(FILE_PATH)

In [None]:
# Now let's decimate the signal and see what happens.
decimation_factor = 2
dec_fs = fs / decimation_factor
decimated_speech = signal.decimate(speech, decimation_factor).astype("int16")

# Take a look at the changes.
observe_signal(decimated_speech, "Decimated Speech Signal (M = {})".format(decimation_factor), fs=dec_fs)

# Save off the audio and play it back.
dec_file_name = "decimated_{}".format(FILE_NAME)
wavwrite(dec_file_name, dec_fs, decimated_speech)
play_audio(dec_file_name)

In [None]:
# Now let's resample the speech to 6.4 kHz, the transmission rate used over the public switched telephone network.
# Among other methods, we can perform this resampling by interpolation by a factor of 2 followed by
# decimation by a factor of 5.
interpolated = signal.resample(speech, len(speech)*2).astype("int16")
phone_speech = signal.decimate(interpolated, 5).astype("int16")
phone_fs = 6400


In [None]:
# Observe the changes in the time and frequency domains.
observe_signal(phone_speech, "Speech Signal Resampled for Telephone Transmission", fs=phone_fs)

# Save off the resampled speech and play it back.
new_file_name = "phone_{}".format(FILE_NAME)
wavwrite(new_file_name, phone_fs, phone_speech)
play_audio(new_file_name)

In [None]:
# Observe the difference in samples due to the change in sampling rate, while the duration remains unchanged.
original_duration = len(speech) / fs
resampled_duration = len(phone_speech) / phone_fs
resampled_signal_length = resampled_duration * phone_fs
print "Original Speech: {}s, {} samples @ {} Hz".format(original_duration, len(speech), fs)
print "Resampled Speech: {}s, {} samples @ {} Hz".format(resampled_duration, len(phone_speech), phone_fs)