In [None]:
import sys
sys.path.append("..")

import random
import math
import itertools
from copy import deepcopy
from io import BytesIO
from pathlib import Path
from typing import Optional, Callable, List, Tuple, Iterable, Generator, Union, Dict

import PIL.Image
import PIL.ImageDraw
import plotly
import plotly.express as px
import plotly.graph_objects as go
plotly.io.templates.default = "plotly_dark"
import numpy as np
import pandas as pd
pd.options.plotting.backend = "plotly"
from sklearn.manifold import TSNE

from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset, IterableDataset, RandomSampler
import torchvision.transforms as VT
import torchvision.transforms.functional as VF
import torchaudio.transforms as AT
from torchvision.utils import make_grid
from IPython.display import display, Audio
import torchaudio
from torchaudio.io import StreamReader

from src.datasets import *
from src.algo import GreedyLibrary
from src.util.image import *
from src.util import to_torch_device, iter_batches
from src.patchdb import PatchDB, PatchDBIndex
from src.models.encoder import *
from src.util.audio import *
from src.util.files import *
from src.util.embedding import *
from scripts.train_vae_audio import VAEAudioConv

In [None]:
@torch.no_grad()
def play(audio, shape=(200, 1024), normalize=False):
    if normalize:
        audio_max = audio.abs().max()
        if audio_max:
            audio = audio / audio_max
    display(Audio(audio.clamp(-1, 1), rate=SAMPLERATE, normalize=False))
    display(plot_audio(audio, shape))
    
def read_stream(stream, seconds: float = 3.):
    chunks = []
    max_samples = int(seconds * SAMPLERATE)
    num_samples = 0
    for chunk, in stream:
        chunks.append(chunk.mean(-1))
        num_samples += chunks[-1].shape[-1]
        if num_samples >= max_samples:
            break
    return torch.concat(chunks)[:max_samples]

# audio slice dataset

In [None]:
FILENAMES1 = ['~/Music/Alejandro Jodorowsky - [1971] El Topo OST (vinyl rip)/side1.mp3', '~/Music/Aphex Twin/Acoustica Alarm Will Sound Performs Aphex Twin/01 Cock_Ver 10.mp3', '~/Music/BR80-backup/ROLAND/LIVEREC/LIVE0000.WAV', '~/Music/BR80-backup/ROLAND/MASTERING/42FUNK.WAV', '~/Music/Bertolt Brecht & Kurt Weill - [1954] The Threepenny Opera (original off-broadway cast)/01 - prologue (spoken) gerald price.mp3', '~/Music/COIL - Absinthe/COIL - animal are you.mp3', '~/Music/COIL - Black Antlers/01-the gimp-sometimes.mp3', "~/Music/COIL - live at All Tomorrow's Parties, April 4, 2003...and the Ambulance Died in His Arms/01 - Triple Sun Introduction.mp3", "~/Music/Coil - [1991] Love's Secret Domain/01 - Disco Hospital.mp3", '~/Music/Crosby Stills  Nash & Young/carry on/Crosby Stills  Nash & Young - after the dolphin.mp3', '~/Music/Felix Kubin - Jane B. ertrinkt mit den Pferden/01 Wagner 99.mp3', "~/Music/Hitchhiker's Guide - Radio Play/Hitchhiker'sGuideEpisode-02,mp3", '~/Music/King Crimson/Discipline [30th Anniversary Edition] [Bonus Track]/01 Elephant Talk.mp3', "~/Music/King Crimson/Larks' Tongues in Aspic/01 Larks' Tongues in Aspic, Pt. 1.mp3", '~/Music/King Crimson/Three of a Perfect Pair- 30th Anniversary [Bonus Tracks]/01 Three of a Perfect Pair.mp3', '~/Music/King Crimson/Vrooom Vrooom Disc 1/01 Vrooom Vrooom.mp3', '~/Music/King Crimson/Vrooom Vrooom Disc 2/01 Conundrum.mp3', '~/Music/MODULE/42_modus.wav', '~/Music/MODULE/ATOMIK/INTRO.WAV', '~/Music/MODULE/FILMTHEA/sample.wav', '~/Music/MODULE/PATTERN/hoast84.wav', '~/Music/MODULE/TRIBB/02- MultiDrum_mp3.wav', '~/Music/MODULE/for_gonz/ATOMIK/INTRO.WAV', '~/Music/MODULE/sendung/UnitedSchneegl.wav', '~/Music/MODULE/werner/dolby/recycle samples/pianoarpeggio.wav', '~/Music/Primus/Primus - Antipop (Complete CD)(AlbumWrap)_ALBW.mp3', '~/Music/Ray Kurzweil The Age of Spiritual Machines/(audiobook) Ray Kurzweil - The Age of Spiritual Machines - 1 of 4.mp3', '~/Music/Scirocco/01 Zug.mp3', '~/Music/Symphony X/01 - Symphony X - The Damnation Game.mp3', '~/Music/VOLCANO THE BEAR - Amidst The Noise And Twigs (2007)/01 - Volcano The Bear - The Sting Of Haste.mp3', '~/Music/VOLCANO THE BEAR - Classic Erasmus Fusion (2006)/A01. Classic Erasmus Fusion.mp3', '~/Music/VOLCANO THE BEAR - Guess the Birds (2002)/01 - urchins at the harp.mp3', '~/Music/VOLCANO THE BEAR - Xvol/01. moon chorus.mp3', '~/Music/Volcano the bear - [2001] Five Hundred Boy Piano/01. Hairy Queen.mp3', '~/Music/Ys/01 emily.mp3', '~/Music/anke/DR-100_0809-mucke-tanja-traum-ist-aus.wav', '~/Music/diffusion/known-unknowns-02.wav', '~/Music/francis/scarlatti-k119.wav', '~/Music/grafft/Lotte/210429_1859.mp3', '~/Music/grafft/MUSIC/20200505_Bauchentscheidung.mp3', '~/Music/olli/24.07.19 eberhardt finaaaaaal.wav', '~/Music/record/20220624_untitled.wav', '~/Music/the who/Tommy/the who - 1921.mp3', '~/Music/theDropper/01 CD Track 01.mp3', '~/Music/yaggediyo.mp3']
FILENAMES2 = ['~/Music/Alejandro Jodorowsky - [1971] El Topo OST (vinyl rip)/side2.mp3', '~/Music/Aphex Twin/Acoustica Alarm Will Sound Performs Aphex Twin/02 Logon Rock Witch.mp3', '~/Music/BR80-backup/ROLAND/LIVEREC/LIVE0001.WAV', '~/Music/BR80-backup/ROLAND/MASTERING/44BOND.WAV', '~/Music/Bertolt Brecht & Kurt Weill - [1954] The Threepenny Opera (original off-broadway cast)/02 - overture.mp3', '~/Music/COIL - Black Antlers/02-sex with sun ra (part 1 - saturnalia).mp3', "~/Music/COIL - live at All Tomorrow's Parties, April 4, 2003...and the Ambulance Died in His Arms/02 - Snow Falls Into Military Temples.mp3", "~/Music/Coil - [1991] Love's Secret Domain/02 - Teenage Lightning 1.mp3", '~/Music/Crosby Stills  Nash & Young/carry on/Crosby Stills  Nash & Young - almost cut my hair.mp3', '~/Music/Felix Kubin - Jane B. ertrinkt mit den Pferden/02 Vater Muss Die Stube Peitschen.mp3', "~/Music/Hitchhiker's Guide - Radio Play/Hitchhiker'sGuideEpisode-03.mp3", '~/Music/King Crimson/Discipline [30th Anniversary Edition] [Bonus Track]/02 Frame by Frame.mp3', "~/Music/King Crimson/Larks' Tongues in Aspic/02 Book of Saturday.mp3", '~/Music/King Crimson/Three of a Perfect Pair- 30th Anniversary [Bonus Tracks]/02 Modelk Man.mp3', '~/Music/King Crimson/Vrooom Vrooom Disc 1/02 Coda- Marine 475.mp3', '~/Music/King Crimson/Vrooom Vrooom Disc 2/02 Thela Hun Ginjeet.mp3', '~/Music/MODULE/43_monkeys have reached___.wav', '~/Music/MODULE/TRIBB/03- Unbenannt003_wav.wav', '~/Music/MODULE/sendung/buchstab01.wav', '~/Music/Ray Kurzweil The Age of Spiritual Machines/(audiobook) Ray Kurzweil - The Age of Spiritual Machines - 2 of 4.mp3', '~/Music/Scirocco/02 Nini Toscanè.mp3', '~/Music/Symphony X/02 - Symphony X - Dressed To Kill.mp3', '~/Music/VOLCANO THE BEAR - Amidst The Noise And Twigs (2007)/02 - Volcano The Bear - Before We Came To This Religion.mp3', '~/Music/VOLCANO THE BEAR - Classic Erasmus Fusion (2006)/A02. Did You Ever Feel Like Jesus¿.mp3', '~/Music/VOLCANO THE BEAR - Guess the Birds (2002)/02 - maureen memorium.mp3', '~/Music/VOLCANO THE BEAR - Xvol/02. snang dushko.mp3', '~/Music/Volcano the bear - [2001] Five Hundred Boy Piano/02. Seeker.mp3', '~/Music/Ys/02 monkey & bear.mp3', '~/Music/anke/DR-100_0809-mucke-tanja.wav', '~/Music/diffusion/known-unknowns-03.wav', '~/Music/francis/urdance_gsm_movt1.wav', '~/Music/grafft/Lotte/210429_1959.mp3', '~/Music/grafft/MUSIC/20200505_Eingecremt.mp3', '~/Music/olli/Du Schweigst_REV2_=FSM=__44.1-24.wav', '~/Music/the who/Tommy/the who - Amazing journey.mp3', '~/Music/theDropper/02 CD Track 02.mp3']

In [None]:
SAMPLERATE = 44100
SLICE_SIZE = SAMPLERATE * 1
INTERLEAVE_FILES = 1

ds = AudioSliceIterableDataset(
    #"~/Music/", recursive=True, 
    FILENAMES1,
    sample_rate=SAMPLERATE,
    slice_size=SLICE_SIZE,
    # stride=50,
    interleave_files=INTERLEAVE_FILES,
    #shuffle_files=True,
    mono=True,
    seek_offset=0,
    #verbose=True,
)
# ds = IterableShuffle(ds, 1000)
for i, audio in zip(range(5), ds):
    play(audio)

In [None]:
count = 0
for audio in tqdm(ds):
    count += 1
print(f"{count:,.2f} seconds")

In [None]:
if 0:
    ds = AudioSliceIterableDataset(
        "~/Music/", recursive=True,
        sample_rate=SAMPLERATE,
        slice_size=SLICE_SIZE,
        interleave_files=INTERLEAVE_FILES,
        #shuffle_files=True,
        mono=True,
        seek_offset=0,
        max_slices_per_file=1,
        #verbose=True,
        with_filename=True,
    )

    files = []
    path_map = dict()
    for audio, filename in ds:
        filename = Path(filename)
        path_map.setdefault(filename.parent, []).append(str(filename).replace("/home/bergi", "~"))

    print([
        f[1] for f in path_map.values() if len(f) > 1
    ])


# spec dataset

In [None]:
NUM_MEL = 128
WIN_LENGTH = SAMPLERATE // 30
HOP_LENGTH = SAMPLERATE // NUM_MEL

ds_spec = TransformIterableDataset(
    ds, 
    transforms=[
        AT.MelSpectrogram(
            sample_rate=SAMPLERATE,
            n_fft=1024 * 2,
            win_length=WIN_LENGTH,
            hop_length=HOP_LENGTH,
            n_mels=NUM_MEL,
            #f_max=1000,
            power=1.,
            #mel_scale="slaney",
            #normalized=True,
        ),
        lambda x: (x[:, :, :NUM_MEL] / x.max()).clamp(0, 1),
    ],
)
img = next(iter(ds_spec))
print("spec shape:", img.shape[-2:])
VF.to_pil_image(VF.resize(img, [s * 2 for s in img.shape[-2:]], VF.InterpolationMode.NEAREST))

In [None]:
grid = [spec for i, spec in zip(range(5*5), ds_spec)]
VF.to_pil_image(make_grid(grid, nrow=5))

# spec patch dataset

In [None]:
ds_patch = ImagePatchIterableDataset(
    ds_spec,
    shape=(8, 8),
    #shape=(32, 32),
    interleave_images=INTERLEAVE_FILES,
)

grid = [patch for i, patch in zip(range(64*64), ds_patch)]
img = make_grid(grid, nrow=32)
VF.to_pil_image(VF.resize(img, [s * 3 for s in img.shape[-2:]], VF.InterpolationMode.NEAREST))

# freq band envelope

In [None]:
ds_freqband = ImagePatchIterableDataset(
    ds_spec,
    shape=(8, NUM_MEL),
    interleave_images=INTERLEAVE_FILES,
)

grid = [patch for i, patch in zip(range(64), ds_freqband)]
VF.to_pil_image(make_grid(grid, nrow=1))

# spec envelope

In [None]:
ds_env = ImagePatchIterableDataset(
    ds_spec,
    shape=(NUM_MEL, 8),
    interleave_images=INTERLEAVE_FILES,
)

grid = [patch for i, patch in zip(range(64), ds_env)]
VF.to_pil_image(make_grid(grid, nrow=32))

In [None]:
AT.MelSpectrogram?