In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir(os.path.join(os.getcwd(),".."))

from pathlib import Path
import numpy as np
from scipy import signal
from scipy.signal import hann
from scipy.io import wavfile
from utils.functions import *
import matplotlib.pyplot as plt
%matplotlib widget
import ipywidgets as widgets
import IPython.display as ipd
from ipywidgets import interact, interactive, interactive_output, interact_manual, HBox, VBox

## Μέρος 1

Δημιουργία δύο ημιτονικών σημάτων.

In [3]:
def create_sinusoid(rate, f_s, s_m, duration=1):
    """
    Function that generates simple sinusoid signals.
        rate(int): The sampling rate of the signal
        f_s(int or float): The sinusoid frequency
        s_m(int or float): The signal magnitude
        duration(int): The signal duration in seconds
    """
    time = np.linspace(0,duration, num=rate*duration)
    s = s_m*np.sin(2*np.pi*f_s*time)
    w = hann(rate*duration)
    y = s*w
    return y

In [4]:
sw441 = create_sinusoid(44100, 1e3, 1)
sw882 = create_sinusoid(88200, 1e3, 1)

## Σύγκριση τύπων Dither

Δημιουργία διαδραστικής εφαρμογής για την απεικόνιση των αποτελεσμάτων εφαρμογής διαφορετικών τύπων Dither και Noise Shaping στα σήματα.  

In [5]:
def plot_spectrum_interactive(qbits, dith, ns):
#     fig = plt.gcf()
#     _ = plt.clf()
    fig, ax = plt.subplots(1, 1, figsize=(8, 6), num="interactive_app", clear=True)
    out00 = quantise_audio(sw441,qbits,1,-1,ns,dith)
    ax.clear()
    ax = plotSpectrum(out00, 44100, db=True, logx=True, ax=ax)
    ax.set_xlabel("Frequency (Hz)")
    ax.set_ylabel("Magnitude (dB)")
    ax.set_ylim([-200,5])
    ax.set_xlim([1,22050])
    ax.set_title("")
    ax.set_title("Quantization : {0}, Dither : {1},  Noise Shaping: {2}".format(qbits,dith,ns))
    ax.grid()
    plt.show()

In [6]:
qbits_wdgt = widgets.Dropdown(options=[16,8], value=16, description="Quantization bits :")
dith_wdgt = widgets.Dropdown(
                options=[("No dither", 0), ("RPDF", 1), ("TPDF", 2), ("HP-TPDF", 3)],
                value=1,
                description="Dither type :",
            )
ns_wdgt = widgets.Dropdown(options=[("No noise shaping", 0), ("2nd Order NS", 2), ("3rd Order NS", 3)], value=0, description="Noise shaping :")
controls = HBox([qbits_wdgt, dith_wdgt, ns_wdgt])

In [7]:
app = interactive_output(plot_spectrum_interactive, {"qbits":qbits_wdgt, "dith":dith_wdgt, "ns":ns_wdgt})

In [8]:
display(controls,app)

HBox(children=(Dropdown(description='Quantization bits :', options=(16, 8), value=16), Dropdown(description='D…

Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': "Canvas(toolbar=Toolbar(toolitems=[('Ho…

In [9]:
Out16_44_rpdf = quantise_audio(sw441,16,1,-1,2,1)
Out16_88_rpdf = quantise_audio(sw882,16,1,-1,2,1)
Out16_44_tpdf = quantise_audio(sw441,16,1,-1,2,2)
Out16_88_tpdf = quantise_audio(sw882,16,1,-1,2,2)

In [10]:
fig01, ax = plt.subplots(1, 2, figsize=(14, 6))
ax[0] = plotSpectrum(Out16_44_rpdf, 44100, db=True, logx=True, ax=ax[0])
ax[0] = plotSpectrum(Out16_88_rpdf, 88100, db=True, logx=True, ax=ax[0])
ax[0].set_xlabel("Frequency (Hz)")
ax[0].set_ylabel("Magnitude (dB)")
ax[0].set_ylim([-200,5])
ax[0].set_xlim([1,22050])
ax[0].set_title("RPDF Dither and 2nd order Noise Shaping")
ax[0].grid()
ax[1] = plotSpectrum(Out16_44_tpdf, 44100, db=True, logx=True, ax=ax[1])
ax[1] = plotSpectrum(Out16_88_tpdf, 88100, db=True, logx=True, ax=ax[1])
ax[1].set_xlabel("Frequency (Hz)")
ax[1].set_ylabel("Magnitude (dB)")
ax[1].set_ylim([-200,5])
ax[1].set_xlim([1,22050])
ax[1].set_title("TPDF Dither and 2nd order Noise Shaping")
ax[1].grid()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Eισαγωγή θορύβου Dither τύπου TPDF

In [11]:
Out16_44=quantise_audio(sw441,16,1,-1,2,2)
Out16_88=quantise_audio(sw882,16,1,-1,2,2)

In [12]:
fig1, ax = plt.subplots(1, 1, figsize=(8, 6))
ax = plotSpectrum(Out16_44, 44100, db=True, logx=True, ax=ax)
ax = plotSpectrum(Out16_88, 88100, db=True, logx=True, ax=ax)
ax.set_xlabel("Frequency (Hz)")
ax.set_ylabel("Magnitude (dB)")
ax.set_ylim([-200,5])
ax.set_xlim([1,22050])
# ax.set_title("No Dither or Noise Shaping")
ax.grid()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Δημιουργία frequency vectors και υπολογισμός FFT

In [13]:
l1 = np.size(Out16_44)
l2 = np.size(Out16_88)

In [14]:
fs1 = 44100
fs2 = 88200
fvec1 = [x for x in np.arange(0,(fs1-(1/l1)), fs1/l1)]
fvec2 = [x for x in np.arange(0,(fs2-(1/l2)), fs2/l2)]

In [15]:
Out16_44_sp = np.abs(np.fft.fft(Out16_44))
Out16_88_sp = np.abs(np.fft.fft(Out16_88))

In [16]:
m = np.max(Out16_44_sp[:(l1 // 2 + 1)])
c1 = np.argmax(Out16_44_sp[:(l1 // 2 + 1)])
print("Max value {0:.5f} at index {1}".format(m,c1))

Max value 11021.09482 at index 1000


In [17]:
m = np.max(Out16_88_sp)
c2 = np.argmax(Out16_88_sp)
print("Max value {0:.5f} at index {1}".format(m,c2))

Max value 22047.92276 at index 1000


### Υπολογισμός Total Harmonic Distortion+Noise (THD+N)

In [18]:
thdn16_44=10*np.log10(calc_thdn(Out16_44_sp,np.array(fvec1),c1))

In [19]:
thdn16_88=10*np.log10(calc_thdn(Out16_88_sp,np.array(fvec2),c2))
thd_n_str = "THD+N (16 bit 44.1 kHz) = {0:.2f}\nTHD+N (16 bit 88.2 kHz) = {1:.2f}".format(thdn16_44, thdn16_88)
print(thd_n_str)

THD+N (16 bit 44.1 kHz) = -46.87
THD+N (16 bit 88.2 kHz) = -52.90


In [21]:
# ax = plotSpectrum(Out16_88, 88200, db=True, logx=True, ax=ax)

In [20]:
# Sets the title at cell 12
fig1.suptitle(thd_n_str, fontsize=14, weight="normal", ha="left")

Text(0.5, 0.98, 'THD+N (16 bit 44.1 kHz) = -46.87\nTHD+N (16 bit 88.2 kHz) = -52.90')

## Κβάντιση με 8 bit

In [21]:
Out8_44=quantise_audio(Out16_44,8,1,-1,0,1);
Out8_88=quantise_audio(Out16_88,8,1,-1,0,1);
l1 = np.size(Out8_44)
l2 = np.size(Out8_88)
fvec1 = [x for x in np.arange(0,(fs1-(1/l1)), fs1/l1)]
fvec2 = [x for x in np.arange(0,(fs2-(1/l2)), fs2/l2)]
Out8_44_sp = np.abs(np.fft.fft(Out8_44))
Out8_88_sp = np.abs(np.fft.fft(Out8_88))
m = np.max(Out8_44_sp)
c1 = np.argmax(Out8_44_sp[:l1 // 2 + 1])
print("Max value {0:.5f} at index {1}".format(m,c1))
c2 = np.argmax(Out8_88_sp[:l2 // 2 + 1])
print("Max value {0:.5f} at index {1}".format(m,c2))

Max value 11020.82645 at index 1000
Max value 11020.82645 at index 1000


In [22]:
print(Out8_88_sp[1001])

11212.5941880289


In [23]:
thdn8_44=10*np.log10(calc_thdn(Out8_44_sp,np.array(fvec1),c1))
thdn8_88=10*np.log10(calc_thdn(Out8_88_sp,np.array(fvec2),c2))

In [24]:
fig2, ax = plt.subplots(1, 1, figsize=(8, 6))
ax = plotSpectrum(Out8_44, 44100, db=True, logx=True, ax=ax)
ax = plotSpectrum(Out8_88, 88200, db=True, logx=True, ax=ax)
ax.set_xlabel("Frequency (Hz)")
ax.set_ylabel("SNR (dB)")
ax.set_ylim([-150,5])
ax.grid()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [25]:
thd_n_8_str = "THD+N (16 bit 44.1 kHz) = {0:.2f}\nTHD+N (16 bit 88.2 kHz) = {1:.2f}".format(thdn8_44, thdn8_88)
print(thd_n_8_str)
fig2.suptitle(thd_n_8_str, fontsize=14, weight="normal", ha="left")

THD+N (16 bit 44.1 kHz) = -41.61
THD+N (16 bit 88.2 kHz) = -45.31


Text(0.5, 0.98, 'THD+N (16 bit 44.1 kHz) = -41.61\nTHD+N (16 bit 88.2 kHz) = -45.31')

# Μέρος Β 

In [26]:
source_audio_dir = Path("source_audio_files")
rate, speech = wavfile.read(Path(source_audio_dir / "speech.wav"))

  


In [27]:
ipd.display(ipd.Audio(data=speech, rate=rate))

In [28]:
print("max : {0:.4f} min : {1:.4f}".format(speech.max(), speech.mean()))

max : 9420.0000 min : -6.9889


In [29]:
speech = speech/32767

In [30]:
print("max : {0:.4f} min : {1:.4f}".format(speech.max(), speech.mean()))

max : 0.2875 min : -0.0002


In [31]:
dataOvs = Interpolate_zeros(speech,2)

In [32]:
firx2 = np.array([-850, 0, 245, 0, -541, 0, 1041, 0, -1865, 0, 3303, 0, -6400, 0, 20670, 32767, 20670, 0, -6400,
0, 3303, 0, -1865, 0, 1041, 0, -541, 0, 245, 0, -850])

In [33]:
from scipy.signal import filtfilt

In [34]:
dataOvsFilt = filtfilt(firx2/32767,1,dataOvs)

In [35]:
data8bitOvs = quantise_audio(dataOvsFilt,8,1,-1,2,0)

In [36]:
data8bit = quantise_audio(speech,8,1,-1,0,0)

In [37]:
print(np.max(speech))

0.28748435926389354


In [38]:
print(np.max(data8bit))

0.2901960784313725


In [39]:
fig3, ax = plt.subplots(1, 1, figsize=(8, 6))
ax = plotSpectrum(dataOvsFilt, 44100, db=True, logx=False, ax=ax)
ax = plotSpectrum(dataOvsFilt, 88200, db=True, logx=False, ax=ax)
ax.set_xlabel("Frequency (Hz)")
ax.set_ylabel("SNR (dB)")
ax.set_ylim([-150,5])
ax.legend(["44100", "88200"])
ax.grid()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [41]:
output_directory = Path("generated_audio_files_python/askisi3")
wavfile.write(Path(output_directory / "out88.wav"), 88200, np.array(data8bitOvs*1e5, dtype=np.int16))

In [42]:
rate, out88 = wavfile.read(Path(output_directory / "out88.wav"))
ipd.display(ipd.Audio(data=out88, rate=88200))

In [43]:
fig4 = plt.figure()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [44]:
plt.cla()
plt.plot(speech)
plt.plot(dataOvsFilt)
plt.plot(data8bitOvs)
# plt.plot(data8bit)

[<matplotlib.lines.Line2D at 0x7f991dbd0910>]

In [45]:
plt.legend(["speech", "dataOvsFilt", "data8bitOvs"])

<matplotlib.legend.Legend at 0x7f991dbf4f90>