# Analysis
This notebook is used to analyze sound files and extract spectra, harmonics, formant, etc.

In [19]:
import matplotlib.pyplot as plt
import numpy as np
import soundfile as sf
import sounddevice as sd
import scipy
%matplotlib qt5

In [None]:
# helpers to play/write/read audio
fs=44100

def play_audio(y):
    sd.play(y,fs)

def write_audio(y, filename):
    sf.write(filename+'.wav',y,fs)

def read_audio(filename):
    y, fs = sf.read(filename +'.wav')
    return y

def graph_signal(y, start=None, end=None):    
    plt.figure()
    x = np.arange(len(y))/fs
    plt.plot(x, y)
    plt.xlim(start, end)

def graph_spectrum(y, distance, title=""):
    Y = scipy.fft.rfft(y)
    plt.figure()
    Y = abs(Y)
    peaks, _ = scipy.signal.find_peaks(Y, distance=distance)
    plt.title(title)
    plt.plot(Y)
    plt.plot(peaks, Y[peaks], "x")

    # also graph multiples of the fundamental frequency
    space = peaks[1]
    a = (np.arange(20) + 1) * space
    plt.plot(a, np.zeros_like(a), '2', color='red')

# convert to mono by taking left channel (arbitrary)
def to_mono(y):
    return y[:,0]


## Visualize Vowel Formants

In [None]:
distance = 300
for v in ['a', 'e', 'i', 'o', 'u']:
    y = read_audio(f"./samples/vowel/{v}")
    y = to_mono(y)
    graph_spectrum(y, distance, title=v)

0.5571287870407104
0.282989501953125
0.4012455940246582
0.3316342830657959
0.4101555347442627


From the graphs visualized above, we can see each vowel has different levels in harmonics. The harmonics die off after the 15th harmonic, so we will just use the first 17 harmonics. A few of the vowels have a noticable second formant around the 14/15th harmonic.

Note that the peaks returned by findpeaks include the 0th harmonic, which we ignore.

In [None]:
harmonic_levels = {}
num_harmonics = 17

for v in ['a', 'e', 'i', 'o', 'u']:
    Y = scipy.fft.rfft(y)
    Y = abs(Y)
    peaks, _ = scipy.signal.find_peaks(Y, distance=distance)
    peaks = peaks[1:(num_harmonics + 1)]
    harmonic_levels[v] = Y[peaks]

print(harmonic_levels)

{'a': array([4.33167806e+03, 2.73146636e+03, 2.07260544e+02, 1.15662739e+02,
       2.13538243e+02, 7.35606420e+01, 1.46386735e+01, 9.77919133e+00,
       7.21990105e+00, 5.32806084e+00, 3.81312747e+00, 4.21666705e+00,
       3.63031098e+00, 1.26496765e+01, 3.29919744e+00, 2.62974010e+00,
       2.28709153e+00]), 'e': array([4.33167806e+03, 2.73146636e+03, 2.07260544e+02, 1.15662739e+02,
       2.13538243e+02, 7.35606420e+01, 1.46386735e+01, 9.77919133e+00,
       7.21990105e+00, 5.32806084e+00, 3.81312747e+00, 4.21666705e+00,
       3.63031098e+00, 1.26496765e+01, 3.29919744e+00, 2.62974010e+00,
       2.28709153e+00]), 'i': array([4.33167806e+03, 2.73146636e+03, 2.07260544e+02, 1.15662739e+02,
       2.13538243e+02, 7.35606420e+01, 1.46386735e+01, 9.77919133e+00,
       7.21990105e+00, 5.32806084e+00, 3.81312747e+00, 4.21666705e+00,
       3.63031098e+00, 1.26496765e+01, 3.29919744e+00, 2.62974010e+00,
       2.28709153e+00]), 'o': array([4.33167806e+03, 2.73146636e+03, 2.07260544e+0