# Next week: visualizing frequency using a spectrogram

In [None]:
import matplotlib.pyplot as plt
import IPython.display as ipd
import pandas as pd
import numpy as np
import librosa
import scipy
import IPython.display as ipd

audio_path = "asset/log_scale_perception.wav"
ipd.Audio(audio_path)

## Method 1: Librosa

In [None]:
audio, sr = librosa.load(audio_path, sr=None) 

# parameters of the short-time Fourier transform:
# (algorithm that creates the spectrogram)
win_length = 2**11  # number of samples in each window
n_fft = win_length
hop_length = win_length // 4
window = scipy.signal.get_window("triang", Nx=win_length)

S = librosa.stft(audio, 
                       n_fft=n_fft, hop_length=hop_length, 
                       win_length=win_length, window=window)
S_magnitude = np.abs(S)  # |a+bi| = sqrt(a^2 + b^2)
S_db = librosa.amplitude_to_db(S_magnitude, ref=np.max)

im = plt.imshow(S_db, cmap="inferno", aspect="auto", origin="lower")
plt.colorbar(im, format="%+2.0f dB")
plt.xlabel("Time (sec)")
plt.ylabel("Frequency (Hz)")
plt.show()

## Method 2: Scipy (what we'll use)

In [None]:
audio, sr = librosa.load(audio_path, sr=None) 

# parameters of the short-time Fourier transform:
# (algorithm that creates the spectrogram)
nperseg = win_length = 2**11  # number of samples in each window
nfft = n_fft = win_length
hop_length = win_length // 4
window = scipy.signal.get_window("triang", Nx=win_length)

# scipy.signal.stft also uses the sample rate to output 
# frequency (in Hz) and time (in seconds) vectors,
# corresponding to the rows and columns of the stft matrix 
# in "s_scipy"
fs=sr 
noverlap = nperseg - hop_length

freq_scipy, time_scipy, s_scipy = scipy.signal.stft(
    audio, 
    fs=fs, window="hann", nfft=nfft, 
    nperseg=nperseg, noverlap=noverlap
)

print(f"freq vector shape: {freq_scipy.shape}")
print(f"time vector shape: {time_scipy.shape}")
print(f"stft matrix shape: {s_scipy.shape}")

s_scipy_db = librosa.amplitude_to_db(np.abs(s_scipy), ref=np.max)

im = plt.imshow(s_scipy_db, cmap="inferno", aspect="auto", origin="lower")
plt.colorbar(im, format="%+2.0f dB")
plt.xlabel("Time (sec)")
plt.ylabel("Frequency (Hz)")
plt.show()

## Recording from Microphone

In [None]:

import scipy.io.wavfile
import numpy as np
import pyaudio

def record_audio(n_seconds: int = 5) -> str:
    """
    Record audio using computer microphone.
    """
    chunk = 1024
    bit_depth = pyaudio.paInt16
    n_channels = 1
    sample_rate = 48000

    input("Press Enter to begin recording ðŸŽ¤")
    print("ðŸŽ¤ Listening for music", end="\r")

    outfile = "microphone_sample.wav"

    p = pyaudio.PyAudio()

    stream = p.open(format=bit_depth, channels=n_channels, rate=sample_rate, input=True, frames_per_buffer=chunk)

    frames = []
    for _ in range(0, int(sample_rate / chunk * n_seconds)):
        data = stream.read(chunk)
        frames.append(data)

    stream.stop_stream()
    stream.close()
    p.terminate()


    audio_np = np.frombuffer(b''.join(frames), dtype=np.int16)
    scipy.io.wavfile.write(outfile, sample_rate, audio_np)
    print(f"âœ… Recording saved to {outfile}", end="\n")

    return outfile

#audio_path = record_audio()

#audio_path = "microphone_sample.wav"

audio, sr = librosa.load(audio_path, sr=None) 
S_db = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
im = plt.imshow(S_db, cmap="inferno", aspect="auto", origin="lower")
plt.colorbar(im, format="%+2.0f dB")
plt.xlabel("Time (sec)")
plt.ylabel("Frequency (Hz)")
plt.show()
