## Setup dataset for experiments

In [1]:
import lancedb

  from .autonotebook import tqdm as notebook_tqdm


## Make different audio queries

In [6]:
import numpy as np
import soundfile as sf
import librosa
import random

def load_and_process_audio_files(file_range=(1, 11), chunk_size=44100 * 10,
                                 add_offset=False, pitch_shift=False, time_stretch=False):
    # List to hold dictionaries with chunked audio data
    sound_arrays = []

    # Process each file
    for num in range(file_range[0], file_range[1]):
        for letter in ["a", "b"]:
            # Load the audio file
            data, sample_rate = sf.read(f"../../data/CSD/english/wav/en{num:03}{letter}.wav")
            data = np.array(data)
            
            # Combine stereo channels into mono
            if np.ndim(data) == 2:
                data = np.mean(data, axis=1)
            
            # Calculate the total number of chunks based on the original chunk size
            num_chunks = len(data) // chunk_size

            for i in range(num_chunks):
                # Apply random offset to start chunking at a slightly different point
                if add_offset:
                    offset = random.randint(-int(chunk_size * 0.2), int(chunk_size * 0.2))
                else:
                    offset = 0
                start = max(0, i * chunk_size + offset)
                end = min(len(data), start + chunk_size)
                chunk = data[start:end]
                
                # Randomly decide to apply pitch shifting and time stretching
                pitch_shift = random.choice([-2, -1, 0, 1, 2]) if pitch_shift else 0  # Random pitch shift between -2 and 2 semitones
                stretch_factor = random.uniform(0.8, 1.2)  if time_stretch else 1.0 # Random stretch factor between 0.8 and 1.2

                # Apply pitch shifting if not zero
                if pitch_shift != 0:
                    chunk = librosa.effects.pitch_shift(chunk, sr=sample_rate, n_steps=pitch_shift)

                # Apply time stretching if not 1.0
                if stretch_factor != 1.0:
                    chunk = librosa.effects.time_stretch(chunk, rate=stretch_factor)

                chunk = chunk[:chunk_size]
                if len(chunk) < chunk_size:
                    chunk = np.pad(chunk, (0, chunk_size - len(chunk)), mode="constant")
                # Append the processed chunk to the list
                sound_arrays.append({
                    "vector": chunk,
                    "sample_rate": sample_rate,
                    "pitch_shift": pitch_shift,
                    "time_stretch": stretch_factor,
                    "offset": offset,
                    "chunk_num": i+1,
                    "song_num": num,
                    "song_version": letter,
                    "filename": f"en{num:03}{letter}_chunk{i+1}_offset{offset}_ps{pitch_shift}_ts{stretch_factor:.2f}"
                })

    return sound_arrays


In [3]:
uri = "../../data/lancedb-data/audio-lancedb"
db = lancedb.connect(uri)


In [7]:
import pandas as pd

settings = [
    (False, False, False),
    (True, False, False),
    (False, True, False),
    (False, False, True),
    (False, True, True),
    (True, True, True)
]
db_setup = False

for setting in settings:
    print(setting)
    add_offset, pitch_shift, time_stretch = setting
    for i in range(1, 51, 10):
        print(f"Processing files {i} to {i+10}")
        sound_arrays = load_and_process_audio_files(file_range=(i, i+10), chunk_size=44100 * 10,
                                    add_offset=add_offset, pitch_shift=pitch_shift, time_stretch=time_stretch)

        if db_setup:
            tbl.add(sound_arrays)
        else:
            tbl = db.create_table("audio_example_queries", data=sound_arrays)
            db_setup = True

(False, False, False)
Processing files 1 to 11
Processing files 11 to 21
Processing files 21 to 31
Processing files 31 to 41
Processing files 41 to 51
(True, False, False)
Processing files 1 to 11
Processing files 11 to 21
Processing files 21 to 31
Processing files 31 to 41
Processing files 41 to 51
(False, True, False)
Processing files 1 to 11
Processing files 11 to 21
Processing files 21 to 31
Processing files 31 to 41
Processing files 41 to 51
(False, False, True)
Processing files 1 to 11
Processing files 11 to 21
Processing files 21 to 31
Processing files 31 to 41
Processing files 41 to 51
(False, True, True)
Processing files 1 to 11
Processing files 11 to 21
Processing files 21 to 31
Processing files 31 to 41
Processing files 41 to 51
(True, True, True)
Processing files 1 to 11
Processing files 11 to 21
Processing files 21 to 31
Processing files 31 to 41
Processing files 41 to 51


In [8]:
import pandas as pd


db_setup = False

add_offset, pitch_shift, time_stretch = False, False, False
for i in range(1, 51, 10):
    print(f"Processing files {i} to {i+10}")
    sound_arrays = load_and_process_audio_files(file_range=(i, i+10), chunk_size=44100 * 10,
                                add_offset=add_offset, pitch_shift=pitch_shift, time_stretch=time_stretch)

    if db_setup:
        tbl.add(sound_arrays)
    else:
        tbl = db.create_table("audio_dataset", data=sound_arrays)
        db_setup = True
        

Processing files 1 to 11
Processing files 11 to 21
Processing files 21 to 31
Processing files 31 to 41
Processing files 41 to 51


: 