# Synthetic Datasets

In [None]:
import pathlib
import numpy as np
from py5canvas import *

import torch
import torchaudio

import torchvision as tv
from torchvision.transforms import v2

import matplotlib.pyplot as plt
import IPython.display as ipd

In [None]:
# the number of images to record
n_samples = 10

# bw or colour?
bnw = False

DATASET_DIR = pathlib.Path(f"datasets/synthetic")

## Images

In [None]:
IMG_DS = DATASET_DIR / "images"

C = create_canvas(512, 512)

### Image Generation Functions

In [None]:
def draw_equilateral_triangle(centre, size, rotation):
    begin_shape()
    for i in range(3):
        angle = rotation + TWO_PI / 3 * i
        x = centre[0] + size / 2 * cos(angle)
        y = centre[1] + size / 2 * sin(angle)
        vertex(x, y)
    end_shape(close=True)

### Dataset Generation

In [None]:
# 0: rectangles, 1: ellipses, 2: triangles
n_classes = 3
class_names = ["rectangle", "ellipse", "triangle"]

for sample_class in range(n_classes):

    print(f"Generating samples for class {sample_class} ({class_names[sample_class]})")

    out_dir = IMG_DS / f"{class_names[sample_class]}"
    out_dir.mkdir(exist_ok=True, parents=True)

    for i in range(n_samples):
        background(255)

        if bnw:
            fill(random(255), random(255))
        else:
            fill(random(255), random(255), random(255), random(255))

        rotation = random(0, 2 * PI)
        third_width = C.width / 3
        third_height = C.height / 3

        # draw
        if sample_class == 0:
            rect_mode(CENTER)
            translate(center)
            rotate(rotation)
            rect(
                random(-third_width, third_width),
                random(-third_height, third_height),
                random(10, third_width),
                random(10, third_height),
            )
        elif sample_class == 1:
            translate(center)
            rotate(rotation)
            ellipse(
                random(-third_width, third_width),
                random(-third_height, third_height),
                random(10, third_width),
                random(10, third_height),
            )
        else:
            centre = create_vector(
                random(third_width, C.width - third_width),
                random(third_height, C.height - third_height)
            )
            size = random(10, C.width / 2)
            draw_equilateral_triangle(centre, size, rotation)

        # save
        fname = out_dir / f"{sample_class}_{i:0{len(str(n_samples))}}.png"
        save_image(fname)

### Dataset Loading

In [None]:
transforms = v2.Compose([  
    # tv.transforms.Grayscale(num_output_channels=1),
    # tv.transforms.Resize(size=(28,28), antialias=True),
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True)    
])

img_ds = tv.datasets.ImageFolder(
    IMG_DS,
    transform=transforms,
)

In [None]:
img_ds.classes

In [None]:
img_ds.class_to_idx['ellipse']

In [None]:
for b in img_ds:
    img, cl = b
    print(img.shape, cl, img_ds.classes[cl])
    plt.imshow(torch.einsum("cwh -> whc", img))
    plt.axis("off")
    break

## Sound

In [None]:
SOUND_DS = DATASET_DIR / "sounds"

SAMPLE_RATE = 8000

### Sound Generation Functions

#### Single pitch

In [None]:
# Generate a 440 Hz A note at 8000 Hz sample rate for 1 second

def single_pitch(
    frequency,
    duration=1.0, # seconds
    sample_rate=8000,
):
    # Create time array
    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
    
    # Generate sine wave
    audio_samples = np.sin(2 * np.pi * frequency * t)
    
    return audio_samples

In [None]:
audio_samples = single_pitch(
    frequency = 440,  # Hz (A note)
)


ipd.Audio(audio_samples, rate=SAMPLE_RATE)

#### Single sweep

In [None]:
def single_sweep(
    f_start,
    f_end,
    duration=1.0, # seconds
    sample_rate=8000,
):

    # Generate a linear frequency sweep from 440 Hz to 880 Hz (one octave up) over 1 second
    # Create time array
    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)

    # For a linear frequency sweep, we need to integrate the frequency to get the phase
    # Phase = 2π * ∫ frequency(t) dt = 2π * ∫ (f_start + (f_end - f_start) * t) dt
    # Phase = 2π * (f_start * t + (f_end - f_start) * t² / 2)
    phase = 2 * np.pi * (f_start * t + (f_end - f_start) * t**2 / 2)

    # Generate sine wave with changing frequency
    audio_samples = np.sin(phase)

    return audio_samples    

In [None]:
# ascending
audio_samples = single_sweep(
    f_start = 440,  # Hz (A note)
    f_end = 880,    # Hz (A note one octave up)
)

ipd.Audio(audio_samples, rate=SAMPLE_RATE)

In [None]:
# descending
audio_samples = single_sweep(
    f_start = 880,  # Hz (A note)
    f_end = 440,    # Hz (A note one octave up)
)

ipd.Audio(audio_samples, rate=SAMPLE_RATE)

In [None]:
up_pitch = np.random.randint(500, 1300)
down_pitch = up_pitch - np.random.randint(100, 300)            
audio_samples = single_sweep(
    up_pitch,
    down_pitch
)
ipd.Audio(audio_samples, rate=SAMPLE_RATE)

#### Down Up Down (& vice versa)

In [None]:
def piecewise_sweep(
    f_start,
    f_mid,
    f_end,
    duration=1, # seconds
    sample_rate=8000
):
    # Create time array
    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
    
    # Create piecewise linear frequency array
    # First half: f_start → f_mid
    # Second half: f_mid → f_end
    midpoint = duration / 2
    frequency = np.where(
        t < midpoint,
        f_start + (f_mid - f_start) * (t / midpoint),  # First half
        f_mid + (f_end - f_mid) * ((t - midpoint) / midpoint)  # Second half
    )
    
    # For a piecewise linear frequency sweep, we need to integrate the frequency to get the phase
    # Phase = 2π * ∫ frequency(t) dt
    phase = np.zeros_like(t)
    midpoint_idx = len(t) // 2
    
    # First half
    t_first = t[:midpoint_idx]
    phase[:midpoint_idx] = 2 * np.pi * (f_start * t_first + (f_mid - f_start) * t_first**2 / (2 * midpoint))
    
    # Second half: continue from where first half ended
    t_second = t[midpoint_idx:] - midpoint
    phase_midpoint = phase[midpoint_idx - 1]  # Phase at the midpoint
    phase[midpoint_idx:] = phase_midpoint + 2 * np.pi * (f_mid * t_second + (f_end - f_mid) * t_second**2 / (2 * midpoint))
    
    # Generate sine wave with changing frequency
    audio_samples = np.sin(phase)
    
    return audio_samples

In [None]:
audio_samples = piecewise_sweep(
    f_start = 440,  # Hz (A note)
    f_mid = 880,    # Hz (A note one octave up)
    f_end = 440,    # Hz (back to A note)
)
ipd.Audio(audio_samples, rate=SAMPLE_RATE)

In [None]:
audio_samples = piecewise_sweep(
    f_start = 880,  # Hz (A note one octave up)
    f_mid = 440,    # Hz (A note)
    f_end = 880,    # Hz (back to one octave up)
)
ipd.Audio(audio_samples, rate=SAMPLE_RATE)

### Dataset Generation

In [None]:
n_samples = 10

# 1: fixed pitch; 2: up sweep; 3: down sweep; 4: down up down; 5: up down up
n_classes = 5
class_names = ["fixed_pitch", "up_sweep", "down_sweep", "down_up_down", "up_down_up"]

for sample_class in range(n_classes):

    msg = f"Generating samples for class {sample_class} ({class_names[sample_class]})"

    out_dir = SOUND_DS / f"{class_names[sample_class]}"
    out_dir.mkdir(exist_ok=True, parents=True)

    for i in range(n_samples):

        if sample_class == 0:
            audio_samples = single_pitch(
                np.random.randint(200, 1200)
            )
        elif sample_class == 1:
            # up
            down_pitch = np.random.randint(200, 1000)
            up_pitch = down_pitch + np.random.randint(100, 300)
            audio_samples = single_sweep(
                down_pitch,
                up_pitch
            )
        elif sample_class == 2:
            # down
            up_pitch = np.random.randint(500, 1300)
            down_pitch = up_pitch - np.random.randint(100, 300)            
            audio_samples = single_sweep(
                up_pitch,
                down_pitch
            )
        elif sample_class == 3:
            # down up down
            down_pitch = np.random.randint(200, 1000)
            up_pitch = down_pitch + np.random.randint(100, 300)            
            audio_samples = piecewise_sweep(
                down_pitch,
                up_pitch,
                down_pitch,
            )            
        elif sample_class == 4:
            # up down up
            up_pitch = np.random.randint(500, 1300)
            down_pitch = up_pitch - np.random.randint(100, 300)              
            audio_samples = piecewise_sweep(
                up_pitch,
                down_pitch,
                up_pitch,
            )           

        # Convert numpy array to torch tensor
        audio_tensor = torch.from_numpy(audio_samples).float()

        # Save as WAV file
        # torchaudio.save expects shape (channels, samples) or (samples,)
        # For mono audio, we need to add a channel dimension: (1, samples)
        audio_tensor = audio_tensor.unsqueeze(0)  # Add channel dimension: (1, 8000)

        # save
        fname = out_dir / f"{sample_class}_{i:0{len(str(n_samples))}}.wav"
        ipd.clear_output(wait=True)
        print(f"{msg} | {fname}")

        output_path = out_dir /  "descending_sweep.wav"
        torchaudio.save(fname, audio_tensor, SAMPLE_RATE)


### Dataset Loading

In [None]:
AUDIO_EXTENSIONS = [".wav"]

# Source: https://github.com/bellchenx/AudioFolder-Dataloader-PyTorch/blob/master/dataloader.py

def is_audio_file(filename):
    filename_lower = filename.lower()
    return any(filename_lower.endswith(ext) for ext in AUDIO_EXTENSIONS)

def make_dataset(dirname, class_to_idx):
    audio = []
    dirname = os.path.expanduser(dirname)
    for label in sorted(os.listdir(dirname)):
        d = os.path.join(dirname, label)
        if not os.path.isdir(d):
            continue
        for root, _, fnames in sorted(os.walk(d)):
            for fname in sorted(fnames):
                if is_audio_file(fname):
                    path = os.path.join(root, fname)
                    item = (path, class_to_idx[label])
                    audio.append(item)
    return audio

def find_classes(dirname):
    classes = [d for d in os.listdir(dirname) if os.path.isdir(os.path.join(dirname, d))]
    classes.sort()
    class_to_idx = {classes[i]: i for i in range(len(classes))}
    return classes, class_to_idx

class AudioFolder(torch.utils.data.Dataset):
    def __init__(
        self,
        root,
        transform=None
    ):
        
        classes, class_to_idx = find_classes(root)
        audios = make_dataset(root, class_to_idx)
        if len(audios) == 0:
            raise(RuntimeError("Found 0 audios in subfolders of: " + root + "\n"
                               "Supported audio extensions are: " + ",".join(AUDIO_EXTENSIONS)))

        self.root = root
        self.audios = audios
        self.classes = classes
        self.class_to_idx = class_to_idx
        self.idx_to_class = {v: k for k, v in class_to_idx.items()}
        self.transform = transform

    def __getitem__(self, index):
        path, label = self.audios[index]
        audio, _ = torchaudio.load(path)
        if self.transform is not None:
            audio = self.transform(audio)
        return audio, label

    def __len__(self):
        return len(self.audios)

In [None]:
audio_ds = AudioFolder(SOUND_DS)

In [None]:
audio_ds.classes

In [None]:
for b in audio_ds:
    sample, cl = b
    print(sample.shape, cl, audio_ds.idx_to_class[cl])
    ipd.display(ipd.Audio(sample, rate=SAMPLE_RATE))
    break

## TODO

- Create nice(r) classes samples!
- Add more noise at various levels of the generation process!