In [10]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
import numpy as np
import re
import shutil
import random
import pydub
import sys
sys.path.append('..')
from utils.spectrogram_image_converter import SpectrogramImageConverter
from utils.spectrogram_params import SpectrogramParams
import typing as T
from PIL import Image

In [None]:
# Set up
device = os.environ.get("RIFFUSION_TEST_DEVICE", "cuda")

# experiment label
label = "Bird vocalization-bird call-bird song"

# set paths
wav_source = "./wav/" + label
spec_dest = "./spec/" + label
os.makedirs(spec_dest, exist_ok=True)

for wav in os.listdir(wav_source):
    # Convert wav to audiosegment
    segment = pydub.AudioSegment.from_wav(wav_source + "/" + wav)

    # Convert to mono if desired
    use_stereo = False
    if use_stereo:
        assert segment.channels == 2
    else:
        segment = segment.set_channels(1)

    # Define named sets of parameters
    param_sets: T.Dict[str, SpectrogramParams] = {}

    param_sets["default"] = SpectrogramParams(
        sample_rate=segment.frame_rate,
        stereo=use_stereo,
        step_size_ms=20,
        min_frequency=20,
        max_frequency=20000,
        num_frequencies=512,
    )
    print(segment.frame_rate)

    segments: T.Dict[str, pydub.AudioSegment] = {
                "original": segment,
            } 
    images: T.Dict[str, Image.Image] = {}
    for name, params in param_sets.items():
        converter = SpectrogramImageConverter(params=params, device=device)
        images[name] = converter.spectrogram_image_from_audio(segment)
        segments[name] = converter.audio_from_spectrogram_image(
            image=images[name],
            apply_filters=True,
        )

    # Save images to disk
    for name, image in images.items():
        image_out = spec_dest + "/" + os.fsdecode(wav) + ".png"
        image.save(image_out, exif=image.getexif(), format="PNG")
        print(f"Saved {image_out}")


In [12]:
# Create splits and folders
def split_folder(input_folder, output_folder, split_ratio):
    # Create output folders
    train_folder = os.path.join(output_folder, "train")
    validation_folder = os.path.join(output_folder, "val")
    test_folder = os.path.join(output_folder, "test")
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(validation_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)

    # Get a list of files in the input folder
    files = os.listdir(input_folder)

    # Shuffle the file list randomly
    random.shuffle(files)

    # Calculate the number of files for each split
    total_files = len(files)
    train_split = int(total_files * split_ratio[0])
    validation_split = int(total_files * split_ratio[1])
    test_split = total_files - train_split - validation_split

    # Move files to the respective folders
    for i, file in enumerate(files):
        source_path = os.path.join(input_folder, file)
        if i < train_split:
            destination_path = os.path.join(train_folder, file)
        elif i < train_split + validation_split:
            destination_path = os.path.join(validation_folder, file)
        else:
            destination_path = os.path.join(test_folder, file)
        shutil.copyfile(source_path, destination_path)

input_folder = spec_dest + "/unsplit"
output_folder = spec_dest
split_ratio = [0.8, 0.1, 0.1]

split_folder(input_folder, output_folder, split_ratio)

In [16]:
# Create metadata csv

base_dir = "./spec/Bird vocalization-bird call-bird song/"
prompt = 'a spectrogram of bird song'

def create_metadata(base_dir, folder, prompt):
    data = []
    folder_path = os.path.join(base_dir, folder)

    for file in os.listdir(folder_path):
        data.append((os.fsdecode(file), prompt))
        
    df = pd.DataFrame(data, columns=['file_name', 'text'])
    output_csv_path = os.path.join(folder_path, "metadata.csv")
    df.to_csv(output_csv_path, index=False)
    
for folder in os.listdir(base_dir):
    create_metadata(base_dir, folder, prompt)
