In [None]:
import numpy as np
from IPython.display import Audio, display
from scipy.io import wavfile
import scipy.signal as signal
import warnings

audio_path = "../../resources/flute1.wav"
warnings.simplefilter("ignore", wavfile.WavFileWarning)
sr, audio = wavfile.read(audio_path)

waveform = audio.T.astype(np.float32) / np.max(np.abs(audio))

if audio.ndim > 1:
    audio = np.mean(audio, axis=1)

def grain_processing(
        grain_size: float,
        density: int,
        jitter: float,
        stretch_factor: float = 1.0,
        random_pitch: bool = False,
        pitch_range: float = 0.5):
    samples_per_grain = int(grain_size * sr)
    grain_spacing = max(int(sr / density), 1)

    output_length = int(len(audio) * stretch_factor)

    # output buffer
    output = np.zeros(output_length)
    window = np.hanning(samples_per_grain).astype(np.float32)

    grains_total = int((len(audio) - samples_per_grain) / grain_spacing)

    # start processing
    position = 0

    for i in range(grains_total):
        # Random jitter to grain read offset
        random_offset = int(np.random.uniform(-jitter, jitter) * grain_spacing)

        # compute jittered grain start
        grain_start = position + random_offset

        # Ensure we clamp the grain_start as we could end up outside the bounds of the audio
        grain_start = np.clip(grain_start, 0, len(audio) - samples_per_grain)

        # clip start to ensure grains within bounds and apply window
        grain = audio[grain_start: grain_start + samples_per_grain] * window

        if random_pitch:
            semitones = np.random.uniform(-pitch_range, pitch_range)
            pitch_factor = 2 ** (semitones / 12.0)

            num_samples = int(len(grain) / pitch_factor)
            grain = signal.resample(grain, num_samples)

            # Pad or trim to original grain size for consistent output alignment
            if len(grain) < samples_per_grain:
                grain = np.pad(grain, (0, samples_per_grain - len(grain)))
            else:
                grain = grain[:samples_per_grain]

        # grain write position output
        output_position = int(position * stretch_factor)

        # Trim grain if overrun of buffer
        if output_position + samples_per_grain > output_length:
            grain = grain[:output_length - output_position]

        # mix grain into output
        output[output_position: output_position + len(grain)] += grain
        position += grain_spacing

    peak = np.max(np.abs(output))
    output = (output / peak) * 0.8

    return output


print("Original:")
display(Audio(waveform, rate=sr))

print("Slight:")
display(Audio(grain_processing(grain_size=0.05, density=100, jitter=0.5, stretch_factor=0.5), rate=sr))

print("Fragmented:")
display(Audio(grain_processing(grain_size=0.05, density=10000, jitter=20000, stretch_factor=1.5), rate=sr))

print("Chopped:")
display(Audio(grain_processing(grain_size=0.05, density=100, jitter=500), rate=sr))

print("Stretched Excessively:")
display(Audio(grain_processing(grain_size=0.2, density=100, jitter=500), rate=sr))

print("Cloud of Sound:")
display(Audio(grain_processing(grain_size=0.2, density=5000, jitter=100000), rate=sr))

print("Slight (Random Pitch):")
display(Audio(grain_processing(grain_size=0.05,
                               density=100,
                               jitter=0.5,
                               stretch_factor=0.5,
                               pitch_range=2.0,
                               random_pitch=True), rate=sr))

# Other ideas
# Vary density per second over time (increasing/decreasing)
# vary grian sizes
# periodically reverse grains

