In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
root_path = '/content/gdrive/MyDrive/'

Mounted at /content/gdrive


In [None]:
"""
Experiments with going back and forth between time-space and fft-space audio.
"""

from IPython.display import Audio
import numpy as np
import scipy.io.wavfile

In [None]:
# Input audio file (must be in wav format)
INPUT_AUDIO = "/content/gdrive/MyDrive/APS360 Group Project/Experiments/FFT/1812_Overture.wav"
sample_rate, data = scipy.io.wavfile.read(INPUT_AUDIO)
num_samples = len(data)

print(data.shape)
data = data.T
print(data.shape)

# Allow audio to be played.
Audio(data=data, rate=sample_rate)

(328545, 2)
(2, 328545)


# Experiment 1: Transforming Entire Audio

In [None]:
"""
Single DFFT

Calculate FFT of entire data at one time, then reconstruct.
"""

# There are as many frequency bins as there are elements in the original audio.
# Each element is the complex amplitude of that bin.
# Each bin i has center frequency i * (sample rate) / (num samples)
single_fft_freqs = np.fft.fft(data)

We can now reconsruct the audio in two different ways.

In [None]:
# 1. (Easier) Using np's built in inverse FFT.
print("Single DFFT, Inverse FFFT")
single_ifft_audio = np.fft.ifft(single_fft_freqs)
Audio(data=single_ifft_audio, rate=sample_rate)

Single DFFT, Inverse FFFT


  data = np.array(data, dtype=float)


In [None]:
# 2. (More Insightful) Manually
#    Map real portions to cos waves.
#    Map imaginary portions to sin waves.
#    Superimpose and re-sample.

# This is *technically* possible, but isn't practical over this timescale.
# Because we are using the entire audio sample at one time, there's a huge
# number of components to calculate (billions).
# Doing it in time takes forever, in memory crashes the system.
# This method may be possible if we chunk the audio first.

# Experiment 2: Chunking Audio
First, the audio will be broken into fixed length chunks (0 padded at the end.)

Then, each of those chunks will individually be FFTd and IFFTd.

Then, those results will be re-fused.

In [None]:
"""
Split the data into evenly sized chunks.
"""

samples_per_chunk = 1

# zero-pad the end of the data to make it easily chunk-able.
padding_needed = 100 - len(data) % samples_per_chunk
padded_data = np.pad(data, (0, padding_needed))

chunks = np.split(padded_data, len(padded_data) / samples_per_chunk)

In [None]:
"""
Break apart and reconstruct chunks using built in FFT/IFFT.
"""

reconstructed = np.zeros(padded_data.shape)

for i, chunk in enumerate(chunks):
  # Break apart chunk into frequency buckets.
  chunk_freqs = np.fft.fft(chunk)

  # Reconstruct chunk using inverse operation.
  reconstructed[i*samples_per_chunk:(i+1)*samples_per_chunk] = np.fft.ifft(chunk_freqs)

print("Chunk Transformed Audio")
Audio(data=reconstructed, rate=sample_rate)

Chunk Transformed Audio


  if __name__ == '__main__':


In [None]:
"""
Break apart chunks using FFT, but manually reconstruct.
Here, the audio quality degrades massively with chunk size.
"""

manually_reconstructed = np.zeros(padded_data.shape)

# Break apart and reconstruct each chunk.
for i, chunk in enumerate(chunks):
  # Map the frequency bucketed amplitudes, to actual frequencies.
  chunk_freqs = np.linspace(0, sample_rate, samples_per_chunk).reshape(1, samples_per_chunk)
  chunk_freq_amplitudes = np.fft.fft(chunk).reshape(1, samples_per_chunk)

  # To reconstruct this chunk, we must rebuild the frequencies at each time.
  times = np.linspace(0, samples_per_chunk / sample_rate, samples_per_chunk).reshape(1, samples_per_chunk)
  
  # Each timestep is a row, and each frequency is a column.
  times_freqs_grid =  times.T @ (2 * np.pi * chunk_freqs)

  # We get our reconstructed chunk by computing the grid,
  # And collapsing all frequencies into a sum.
  reconstructed_chunk = np.sum(
    # Component from cos.
    np.abs(chunk_freq_amplitudes.real) * np.cos(times_freqs_grid)
    # Component from sin.
    + np.abs(1j*chunk_freq_amplitudes.real - chunk_freq_amplitudes.imag) * np.sin(times_freqs_grid),
  1)

  # Add our chunk to the overall time-space result.
  manually_reconstructed[i*samples_per_chunk:(i+1)*samples_per_chunk] = reconstructed_chunk

print("Chunk Transformed Manually Reconstructed Audio")
Audio(data=manually_reconstructed, rate=sample_rate)


ValueError: ignored