Experiment with spec conversion

In [None]:
from utils.spectrogram_converter import SpectrogramConverter
from utils.spectrogram_params import SpectrogramParams
from utils.riff_util import image_util
import os
import pydub
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from IPython.display import Audio, display
import typing as T
from utils.spectrogram_image_converter import SpectrogramImageConverter

In [None]:
converter = SpectrogramConverter(SpectrogramParams())

In [None]:
# Set up
device = os.environ.get("RIFFUSION_TEST_DEVICE", "cuda")

# experiment label
label = "Bird vocalization-bird call-bird song"

# set paths
wav_source = "./AudioSet/wav/" + label
spec_dest = "./AudioSet/spec/" + label
os.makedirs(spec_dest, exist_ok=True)

for wav in os.listdir(wav_source):
    # Convert wav to audiosegment
    segment = pydub.AudioSegment.from_wav(wav_source + "/" + wav)

    # Convert to mono if desired
    use_stereo = False
    if use_stereo:
        assert segment.channels == 2
    else:
        segment = segment.set_channels(1)

    # Define named sets of parameters
    param_sets: T.Dict[str, SpectrogramParams] = {}

    param_sets["default"] = SpectrogramParams(
        sample_rate=segment.frame_rate,
        stereo=use_stereo,
        step_size_ms=20,
        min_frequency=20,
        max_frequency=20000,
        num_frequencies=512,
    )
    print(segment.frame_rate)

    segments: T.Dict[str, pydub.AudioSegment] = {
                "original": segment,
            } 
    images: T.Dict[str, Image.Image] = {}
    for name, params in param_sets.items():
        converter = SpectrogramImageConverter(params=params, device=device)
        images[name] = converter.spectrogram_image_from_audio(segment)
        segments[name] = converter.audio_from_spectrogram_image(
            image=images[name],
            apply_filters=True,
        )

    # Save images to disk
    for name, image in images.items():
        image_out = spec_dest + "/" + os.fsdecode(wav) + ".png"
        image.save(image_out, exif=image.getexif(), format="PNG")
        print(f"Saved {image_out}")

""" # Save segments to disk
for name, segment in segments.items():
    audio_out = spec_dest + ".wav"
    segment.export(audio_out, format="wav")
    print(f"Saved {audio_out}") """