# Welcome to PYNQ Audio


## Install Dependencies

In [None]:
!apt update
!apt install -y sox ffmpeg flac
!pip3 install gtts pysndfx speechRecognition pydub

In [None]:
import speech_recognition as sr
from pynq.overlays.base import BaseOverlay
from pysndfx import AudioEffectsChain
from gtts import gTTS
from pydub import AudioSegment
from IPython.display import Audio

## Create new audio object

In [None]:
base = BaseOverlay("base.bit")
pAudio = base.audio

## Bypass audio
Users can select either `LINE_IN`, or `HP+MIC` as the input port.
**

In the following example, we choose `LINE_IN`. To choose `MIC`:
```python
pAudio.select_microphone()
```
or choose `LINE_IN`:
```python
pAudio.select_line_in()
```

**Try it first with your earbuds out**

In [None]:
pAudio.select_microphone()

In [None]:
pAudio.bypass(seconds=5)

## Record
Record a 5-second sample and save it into a file.

In [None]:
pAudio.record(5)
pAudio.save("data/recording.wav")

## Load and play
Load a sample and play the loaded sample.

In [None]:
pAudio.load("data/recording.wav")
pAudio.play()

## Adding effects
Create a new file with effects added. [Information for effects can be found here](https://github.com/carlthome/python-audio-effects/blob/master/pysndfx/dsp.py)

In [None]:
fx = AudioEffectsChain().delay().reverb()

In [None]:
fx("data/recording.wav","data/recording.ogg")

## Play in notebook
Since the samples are in 24-bit PCM format, 
users can play the audio directly in notebook.

In [None]:
Audio("data/recording.ogg")

## Using Text to Speech
You can use this wrapper for Google's text-to-speech service to produce natural sounding speech.

In [None]:
words = "Hello World"
tts = gTTS(words, "en", slow=False)
tts.save("data/hello.mp3")
mp3 = AudioSegment.from_mp3("data/hello.mp3")
mp3.export("data/hello.wav", format="wav")

In [None]:
Audio("data/hello.wav")

## Using Speech to Text
You can use this wrapper for Google's speech recognition service to extract words from an audio file.

In [None]:
r = sr.Recognizer()

with sr.AudioFile("data/hello.wav") as source:
    audio = r.record(source)

In [None]:
r.recognize_google(audio)

## Plotting PCM data

Users can display the audio data in notebook:

1. Plot the audio signal's amplitude over time.
2. Plot the spectrogram of the audio signal.

The next cell reads the saved audio file and processes it into a `numpy` array.
Note that if the audio sample width is not standard, additional processing
is required. In the following example, the `sample_width` is read from the
wave file itself (24-bit dual-channel PCM audio, where `sample_width` is 3 bytes).

In [None]:
%matplotlib inline
import wave
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.fftpack import fft

wav_path = "data/hello.wav"
with wave.open(wav_path, 'r') as wav_file:
    raw_frames = wav_file.readframes(-1)
    num_frames = wav_file.getnframes()
    num_channels = wav_file.getnchannels()
    sample_rate = wav_file.getframerate()
    sample_width = wav_file.getsampwidth()
    
temp_buffer = np.empty((num_frames, num_channels, 4), dtype=np.uint8)
raw_bytes = np.frombuffer(raw_frames, dtype=np.uint8)
temp_buffer[:, :, :sample_width] = raw_bytes.reshape(-1, num_channels, 
                                                    sample_width)
temp_buffer[:, :, sample_width:] = \
    (temp_buffer[:, :, sample_width-1:sample_width] >> 7) * 255
frames = temp_buffer.view('<i4').reshape(temp_buffer.shape[:-1])

### 1. Amplitude over time

In [None]:
for channel_index in range(num_channels):
    plt.figure(num=None, figsize=(15, 3))
    plt.title('Audio in Time Domain (Channel {})'.format(channel_index))
    plt.xlabel('Time in s')
    plt.ylabel('Amplitude')
    time_axis = np.arange(0, num_frames/sample_rate, 1/sample_rate)
    plt.plot(time_axis, frames[:, channel_index])
    plt.show()

### 2. Frequency spectrum

In [None]:
for channel_index in range(num_channels):
    plt.figure(num=None, figsize=(15, 3))
    plt.title('Audio in Frequency Demain (Channel {})'.format(channel_index))
    plt.xlabel('Frequency in Hz')
    plt.xscale('log')
    plt.ylabel('Magnitude')
    temp = fft(frames[:, channel_index])
    yf = temp[1:len(temp)//2]
    xf = np.linspace(0.0, sample_rate/2, len(yf))
    plt.xlim(20,20000)
    plt.plot(xf, abs(yf))
    plt.show()

### 3. Frequency spectrum over time
Use the `classic` plot style for better display.

In [None]:
for channel_index in range(num_channels):
    np.seterr(divide='ignore', invalid='ignore')
    matplotlib.style.use("classic")
    plt.figure(num=None, figsize=(15, 3))
    plt.title('Signal Spectogram (Channel {})'.format(channel_index))
    plt.xlabel('Time in s')
    plt.ylim(20,20000)

    plt.ylabel('Frequency in Hz')
    plt.specgram(frames[:, channel_index], Fs=sample_rate)