In [1]:
from wave import open

import numpy as np
from matplotlib import pyplot as plt
from matplotlib.animation import FuncAnimation
from matplotlib.axes import Axes
from scipy.fft import rfft, rfftfreq
from scipy.signal import stft

In [2]:
song_name = "Always - Bon Jovi"

In [3]:
file = open(f"./files/{song_name}.wav", mode="rb")

In [4]:
channels_count = file.getnchannels()
sample_width = file.getsampwidth()
frame_rate = file.getframerate()
frames_count = file.getnframes()
print(f"Number of channels: {channels_count}")
print(f"Sample width (in bytes): {sample_width}")
print(f"Frame rate (sampling frequency): {frame_rate}")
print(f"Number of frames: {frames_count}")

length_in_seconds = frames_count / frame_rate
print(f"Length (in seconds): {length_in_seconds}")

Number of channels: 2
Sample width (in bytes): 2
Frame rate (sampling frequency): 44100
Number of frames: 15601536
Length (in seconds): 353.77632653061227


In [5]:
# -1 means "read all frames"
raw_signal = file.readframes(-1)

In [6]:
# Each sample consists of `sample_width` bytes that must be interpreted as a single number.
signal = np.frombuffer(raw_signal, dtype=f"int{sample_width * 8}")

In [7]:
print(f"Expected signal length: {frames_count * channels_count}")
print(f"Actual signal length: {len(signal)}")

Expected signal length: 31203072
Actual signal length: 31203072


In [8]:
def visualize_audio(axes: Axes, channel_signal: np.ndarray) -> None:
    time_series = np.linspace(start=0, stop=len(channel_signal) / frame_rate, num=len(channel_signal))
    
    # Adjust the horizontal axis labels to ensure they are evenly spaced
    x_step = 5
    x_ticks = np.arange(time_series[0], time_series[-1], x_step)
    axes.set_xticks(x_ticks)
    
    # Remove the vertical axis labels
    axes.set_yticks([])
    
    axes.plot(time_series, channel_signal, linewidth=0.5)

In [9]:
fig = plt.figure(1,)
fig.set_figwidth(fig.get_figwidth() * 30)
fig.tight_layout()

fig.suptitle(song_name)
fig.supxlabel("Time (in seconds)")

subplots = fig.subplots(nrows=channels_count, ncols=1)

for i in range(channels_count):
    visualize_audio(subplots[i], signal[i::channels_count])

fig.savefig(f"images/{song_name}.png", bbox_inches="tight")
plt.close(fig)

![Always - Bon Jovi](images/Always%20-%20Bon%20Jovi.png)

In [10]:
frames_analysis_length = frame_rate // 100 # 10ms
start = frame_rate # Start from 1s
end = start + frames_analysis_length # End at 1s 10ms
length = end - start

print(f"Start frequency analysis at: {start}")
print(f"Start frequency analysis at: {end}")
print(f"Length: {length}")

Start frequency analysis at: 44100
Start frequency analysis at: 44541
Length: 441


In [11]:
def visualize_frequencies(axes: Axes, x: np.ndarray, y: np.ndarray) -> None:
    axes.plot(x, y)

In [12]:
frequencies = rfftfreq(length, 1 / frame_rate)

In [13]:
frequency_values = []
for i in range(channels_count):
    excerpt = signal[i::channels_count]
    excerpt = excerpt[start:end]

    current_values = np.abs(rfft(excerpt))
    frequency_values.append(current_values)

In [14]:
fig = plt.figure(2)
fig.set_figwidth(fig.get_figwidth() * 5)

subplots = fig.subplots(nrows=channels_count, ncols=1)

for i in range(channels_count):
    subplots[i].set_yticks([])
    subplots[i].plot(frequencies, frequency_values[i])

fig.savefig(f"images/{song_name}_freq_{start}_{end}.png", bbox_inches="tight")
plt.close(fig)

![Always - Bon Jovi (Frequency analysis)](images/Always%20-%20Bon%20Jovi_freq_44100_44541.png)

In [15]:
fig = plt.figure(3)

subplots = fig.subplots(nrows=channels_count, ncols=1)
matrices = []
lines = []

for i in range(channels_count):
    freq, times, matrix = stft(signal[i::channels_count][:1000 * frames_analysis_length], frame_rate, nperseg=frames_analysis_length)
    normalized_matrix = np.abs(matrix.transpose())
    
    subplots[i].set_ylim(0, normalized_matrix.max())
    line, = subplots[i].plot(freq, np.empty(freq.shape))
    
    matrices.append(normalized_matrix)
    lines.append(line)

def animate(frame_number: int):
    for j in range(channels_count):
        lines[j].set_ydata(matrices[j][frame_number])

animation = FuncAnimation(fig, animate, frames=200, interval=50)
animation.save(f"animations/{song_name}.mp4")

plt.close(fig)

![Always - Bon Jovi](animations/Always%20-%20Bon%20Jovi.mp4)