Experiment with spec conversion

In [2]:
from utils.spectrogram_converter import SpectrogramConverter
from utils.spectrogram_params import SpectrogramParams
from utils.riff_util import image_util
import os
import pydub
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from IPython.display import Audio, display
import typing as T
from utils.spectrogram_image_converter import SpectrogramImageConverter

In [3]:
converter = SpectrogramConverter(SpectrogramParams())

In [4]:
# Set up
device = os.environ.get("RIFFUSION_TEST_DEVICE", "cuda")

# experiment label
label = "Bird vocalization-bird call-bird song"

# set paths
wav_source = "./AudioSet/wav/" + label
spec_dest = "./AudioSet/spec/" + label
os.makedirs(spec_dest, exist_ok=True)

# Convert wav to audiosegment
segment = pydub.AudioSegment.from_wav(wav_source + "/EtPIEB0fbzA.wav")

# Convert to mono if desired
use_stereo = False
if use_stereo:
    assert segment.channels == 2
else:
    segment = segment.set_channels(1)

# Define named sets of parameters
param_sets: T.Dict[str, SpectrogramParams] = {}

param_sets["default"] = SpectrogramParams(
    sample_rate=segment.frame_rate,
    stereo=use_stereo,
    step_size_ms=20,
    min_frequency=20,
    max_frequency=20000,
    num_frequencies=512,
)

segments: T.Dict[str, pydub.AudioSegment] = {
            "original": segment,
        } 
images: T.Dict[str, Image.Image] = {}
for name, params in param_sets.items():
    converter = SpectrogramImageConverter(params=params, device=device)
    images[name] = converter.spectrogram_image_from_audio(segment)
    segments[name] = converter.audio_from_spectrogram_image(
        image=images[name],
        apply_filters=True,
    )

# Save images to disk
for name, image in images.items():
    image_out = spec_dest + ".png"
    image.save(image_out, exif=image.getexif(), format="PNG")
    print(f"Saved {image_out}")

# Save segments to disk
for name, segment in segments.items():
    audio_out = spec_dest + ".wav"
    segment.export(audio_out, format="wav")
    print(f"Saved {audio_out}")

""" # Convert to np array of amplitudes
spec_test = converter.spectrogram_from_audio(segment)

# Convert array to spectrogram image
spec_test = image_util.image_from_spectrogram(spec_test)

# Save
spec_test.save(spec_dest + "test.png")
os.system(f" start {spec_dest}") """

Saved ./AudioSet/spec/Bird vocalization-bird call-bird song.png
Saved ./AudioSet/spec/Bird vocalization-bird call-bird song.wav
Saved ./AudioSet/spec/Bird vocalization-bird call-bird song.wav


' # Convert to np array of amplitudes\nspec_test = converter.spectrogram_from_audio(segment)\n\n# Convert array to spectrogram image\nspec_test = image_util.image_from_spectrogram(spec_test)\n\n# Save\nspec_test.save(spec_dest + "test.png")\nos.system(f" start {spec_dest}") '

In [5]:
""" from utils.spectrogram_image_converter import SpectrogramImageConverter

converter = SpectrogramImageConverter(SpectrogramParams())

# Open spectrogram image
image = Image.open(spec_dest + "test.png")

# Convert image to wav
audio_test = converter.audio_from_spectrogram_image(image)

# Save wav
audio_test.export(spec_dest + "test_conversion.wav", format="wav") """

' from utils.spectrogram_image_converter import SpectrogramImageConverter\n\nconverter = SpectrogramImageConverter(SpectrogramParams())\n\n# Open spectrogram image\nimage = Image.open(spec_dest + "test.png")\n\n# Convert image to wav\naudio_test = converter.audio_from_spectrogram_image(image)\n\n# Save wav\naudio_test.export(spec_dest + "test_conversion.wav", format="wav") '

In [6]:
""" from processor import *
from pydub import AudioSegment
import os

# experiment label
label = "Bird vocalization-bird call-bird song"

# set paths
wav_source = "./AudioSet/wav/" + label
spec_dest = "./AudioSet/spec/" + label
os.makedirs(spec_dest, exist_ok=True)

waveform, sample_rate = torchaudio.load(wav_source + "/EtPIEB0fbzA.wav")

processor = Processor(n_fft = 256, n_mels = 80, sample_rate = sample_rate)

spec = processor.wav_to_spec(waveform)

wav_from_spec = processor.spec_to_wav(spec)

processor.play_wav(wav_from_spec) """

' from processor import *\nfrom pydub import AudioSegment\nimport os\n\n# experiment label\nlabel = "Bird vocalization-bird call-bird song"\n\n# set paths\nwav_source = "./AudioSet/wav/" + label\nspec_dest = "./AudioSet/spec/" + label\nos.makedirs(spec_dest, exist_ok=True)\n\nwaveform, sample_rate = torchaudio.load(wav_source + "/EtPIEB0fbzA.wav")\n\nprocessor = Processor(n_fft = 256, n_mels = 80, sample_rate = sample_rate)\n\nspec = processor.wav_to_spec(waveform)\n\nwav_from_spec = processor.spec_to_wav(spec)\n\nprocessor.play_wav(wav_from_spec) '

In [7]:
""" # experiment label
label = "Bird vocalization-bird call-bird song"

# set paths
wav_source = "./utils/riff_util/test_data/tired_traveler/clips/clip_0_start_15795_ms_duration_5678_ms.wav"
spec_dest = "./utils/riff_util/test_data/tired_traveler/images/clip_0.png"

# Convert wav to audiosegment
test = AudioSegment.from_wav(wav_source)

# Convert to np array of amplitudes
spec_test = converter.spectrogram_from_audio(test)

# Convert array to spectrogram image
spec_test = image_util.image_from_spectrogram(spec_test)

# Save
spec_test.save(spec_dest)
os.system(f" start {spec_dest}") """

' # experiment label\nlabel = "Bird vocalization-bird call-bird song"\n\n# set paths\nwav_source = "./utils/riff_util/test_data/tired_traveler/clips/clip_0_start_15795_ms_duration_5678_ms.wav"\nspec_dest = "./utils/riff_util/test_data/tired_traveler/images/clip_0.png"\n\n# Convert wav to audiosegment\ntest = AudioSegment.from_wav(wav_source)\n\n# Convert to np array of amplitudes\nspec_test = converter.spectrogram_from_audio(test)\n\n# Convert array to spectrogram image\nspec_test = image_util.image_from_spectrogram(spec_test)\n\n# Save\nspec_test.save(spec_dest)\nos.system(f" start {spec_dest}") '

In [8]:
""" from utils.spectrogram_image_converter import SpectrogramImageConverter

converter = SpectrogramImageConverter(SpectrogramParams())

# Open spectrogram image
image = Image.open(spec_dest)

# Convert image to wav
audio_test = converter.audio_from_spectrogram_image(image)

# Save wav
audio_test.export(spec_dest + "test_conversion.wav", format="wav") """

' from utils.spectrogram_image_converter import SpectrogramImageConverter\n\nconverter = SpectrogramImageConverter(SpectrogramParams())\n\n# Open spectrogram image\nimage = Image.open(spec_dest)\n\n# Convert image to wav\naudio_test = converter.audio_from_spectrogram_image(image)\n\n# Save wav\naudio_test.export(spec_dest + "test_conversion.wav", format="wav") '