In [1]:
import soundcard as sc
import soundfile as sf
import cv2
import numpy as np
import time
import keyboard

import torch
import torchaudio

import torch.optim as optim
import torch.nn as nn
from torchvision import utils, datasets
import torchvision.transforms as T
import torch.nn.functional as F

from utils.audio_utils import *
from utils.vaes import *    
from utils.data_utils import calculate_mean_std, split_train_val_test

import matplotlib.pyplot as plt

import os

from datetime import datetime

import math

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [93]:
audio_stream = RealTimeAudioStream()
# vae = torch.load(r"models\vae_mnist_2dim.pth")
vae.eval()

angle = 0

with torch.no_grad():
    with audio_stream.get_recorder() as mic:
        while not audio_stream.done:
            angle += 2
            angle_r = math.radians(angle)
            
            r_m = torch.tensor([[math.cos(angle_r), -math.sin(angle_r)], 
                                [math.sin(angle_r), math.cos(angle_r),]])

            rms, zcr = audio_stream.step(mic)

            rms = (rms.mean().item() - 0.05) * 20 # 0.3
            zcr = (zcr.mean().item() - 0.05) * 40# 0.14
            print(angle_r, rms, zcr, end="\r")
            z = (torch.tensor([rms, zcr]) @ r_m).unsqueeze(0).to("cuda") 
            sample = vae.decoder(z)

            image = sample[0].permute(1, 2, 0).detach().cpu().numpy()
            image = cv2.resize(image, (2048, 1024))

            cv2.imshow("generation", image)

            k = cv2.waitKey(33)
            if k==27:    # Esc key to stop
                cv2.destroyAllWindows()
                break

cv2.destroyAllWindows()


#audio_stream.stream()

RealTimeAudioStream initialized with 44032 sample rate
61.086523819801535 2.367666692080784 2.726562599999999901111

In [2]:
# MNIST dataset

batch_size_train = 256
batch_size_test = 256

torch.backends.cudnn.enabled = False

# MNIST Dataset
train_dataset = datasets.MNIST(root='./mnist_data/', train=True, transform=T.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./mnist_data/', train=False, transform=T.ToTensor(), download=False)


# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size_train, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size_test, shuffle=False)

examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)
example_data.shape

torch.Size([256, 1, 28, 28])

In [57]:
# custom dataset

batch_size = 1024

WIDTH = 128
HEIGHT = 64

dataset_dir = r"data_synthetic\1_only_orbita_bw"

# calculate dataset mean and std
raw_transforms = T.Compose([
    T.Resize((HEIGHT, WIDTH)), 
    T.ToTensor(),
])

raw_data = datasets.ImageFolder(root = dataset_dir, transform = raw_transforms)

# DATA_MEAN, DATA_STD = calculate_mean_std(raw_data)
DATA_MEAN = [0.8716, 0.8716, 0.8716]
DATA_STD  = [0.3035, 0.3035, 0.3035]

In [58]:
data_transforms = T.Compose([
    T.Resize((HEIGHT, WIDTH)), 
    T.ToTensor(),
    T.Normalize(mean=DATA_MEAN,std=DATA_STD),
    T.Grayscale(),
])

dataset = datasets.ImageFolder(root = dataset_dir, transform = data_transforms)

# split into Train, Val and Test
data = split_train_val_test(dataset, val=0.0, test=0.1, batch_size=batch_size)

examples = enumerate(data['train'])
batch_idx, (example_data, example_targets) = next(examples)
example_data.shape

torch.Size([1024, 1, 64, 128])

In [59]:
def eval_on_test(model, test_loader):
    model.eval()

    with torch.no_grad():
        latents = []
        labels = []
        for x, y in test_loader:
            mu, log_var = model.encoder(x.cuda())
            z = model.sampling(mu, log_var).cpu().numpy()

            latents.append(z)
            labels.append(y)

    latents = np.concatenate(latents, 0)
    labels = np.concatenate(labels, 0)
    model.train()

    return latents, labels

In [60]:
def visualize_latent_space(model, loss_items, experiment_name, test_loader):

    latents, labels = eval_on_test(model, test_loader)
    
    now = datetime.now()
    pic_name = now.strftime("%Y%m%d%H%M%S%f")

    extent = 5

    cmap = plt.cm.tab20
    bounds = np.linspace(0,10,11)
    fig, ax = plt.subplots()

    if extent is not None: 
        ax.set_xlim(-extent, extent)
        ax.set_ylim(-extent, extent)
    scat = ax.scatter(latents[:, 0], latents[:,1], s=2, marker='o', cmap=cmap, c=labels)
    cb = plt.colorbar(scat, spacing='proportional',ticks=bounds)

    title = f"Recon: {loss_items[0].item():2.3f}, KLD {loss_items[1].item():2.3f}"
    ax.set_title(title)

    path1 = rf'latent_space_vis\{experiment_name}'

    if not os.path.exists(path1):
        os.makedirs(path1)

    fig.savefig(path1 + rf'\{pic_name}.jpg')
    plt.close()

In [73]:
# return reconstruction error + KL divergence losses
def vae_loss(recon_x, x, mu, log_var):
    B, C, H, W = recon_x.shape
    # recons_loss = F.binary_cross_entropy(recon_x.view(B, -1), x.view(B, -1), reduction='mean')
    # EXPECTS [0, 1]
    recons_loss = F.mse_loss(recon_x.view(B, -1), x.view(B, -1), reduction="mean") * 2000
    KLD = -0.5 * torch.mean(1 + log_var - mu.pow(2) - log_var.exp()) # 1 + log(sigma**2) - mu**2 - sigma**2
    return recons_loss, KLD

In [74]:
def norm_image(image):
    dtype = image.dtype
    image = image.astype(float)
    image = image - np.min(image)
    image = image / np.max(image) * 255
    image = image.astype(dtype)
    return image

def save_recon(x_recon, experiment_name):
    image = x_recon[0].permute(1, 2, 0).detach().cpu().numpy()
    now = datetime.now()
    pic_name = now.strftime("%Y%m%d%H%M%S%f")
    path =  rf"latent_space_vis\{experiment_name}\recons"
    if not os.path.exists(path):
        os.makedirs(path)

    cv2.imwrite(os.path.join(path, f"{pic_name}.jpg"), norm_image(image))

In [83]:
def train(model, loss_f, train_loader, test_loader, optimizer, scheduler, epoch, experiment_name, vis=False):
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.to(device)

    for batch_idx, (x, _) in enumerate(train_loader):
        x = x.to(device)
        model.train()
        optimizer.zero_grad()
        x_recon, mu, log_var = model(x)
        x_recon = x_recon[:, 0, None, :, :]

        rec, KLD = loss_f(x_recon, x, mu, log_var)
        loss = rec + KLD

        loss.backward()
        optimizer.step()
        scheduler.step()
        print(scheduler.get_last_lr())

        if batch_idx % 25 == 0:
            if vis: 
                visualize_latent_space(model, (rec, KLD), experiment_name, test_loader)
                save_recon(x_recon, experiment_name)
                
            print("Epoch {:3} Iteration {:3}: recon: {:12.2f}, kld: {:8.2f}".format(epoch, batch_idx, rec.item(), KLD.item()))

    path =  rf"models\{experiment_name}"
    if not os.path.exists(path):
        os.makedirs(path)

    save_model_to = rf"{path}\vae_{epoch}.pth"
    torch.save(model.state_dict(), save_model_to)

    return loss


In [76]:
embedding_size = 2


# build model
vae = VAE(sample_x=example_data, hidden_dims=None, z_dim=embedding_size)
optimizer = optim.Adam(vae.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=40)


In [70]:
# ### sanity check
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# vae.to(device)
# vae.train()

# for batch_idx, (x, _) in enumerate(data['train']):
#     x = x.to(device)
#     out, mu, log_var = vae(x)
#     print(out.shape)
#     break


In [84]:
num_epochs = 2000

experiment_name = f"1_only_orbita_bw"

for epoch in range(1, num_epochs + 1):
    train(vae, vae_loss, data['train'], data['test'], optimizer, scheduler, epoch, experiment_name, vis=True)

[0.008247240241651828]
Epoch   1 Iteration   0: recon:      1262.48, kld:     4.89
[0.007938926261463197]
[0.007612492823580613]
[0.007269952498698518]
[0.0069134171618262736]
[0.006545084971875471]
[0.006167226819280166]
[0.005782172325201836]
[0.00539229547863981]
[0.005000000000000558]
Epoch   2 Iteration   0: recon:      1283.06, kld:     4.89
[0.004607704521361306]
[0.004217827674799349]
[0.0038327731807208795]
[0.0034549150281257108]
[0.0030865828381749054]
[0.002730047501302658]
[0.002387507176420559]
[0.002061073738537855]
[0.001752759758349335]
Epoch   3 Iteration   0: recon:      1275.94, kld:     4.88
[0.0014644660940674405]
[0.0011979701719999568]
[0.0009549150281254004]
[0.0007367991782296202]
[0.0005449673790581957]
[0.00038060233744362015]
[0.0002447174185242519]
[0.00013815039801164678]
[6.155829702431955e-05]
Epoch   4 Iteration   0: recon:      1279.44, kld:     5.15
[1.5413331334363534e-05]
[0.0]
[1.541333133436018e-05]
[6.155829702432944e-05]
[0.0001381503980116365]

Exception in thread Exception ignored in: <function _ConnectionBase.__del__ at 0x000001D64B4E4CA0>
Traceback (most recent call last):
  File "c:\Users\dan\anaconda3\envs\sound_vae\lib\multiprocessing\connection.py", line 132, in __del__
QueueFeederThread:
Traceback (most recent call last):
  File "c:\Users\dan\anaconda3\envs\sound_vae\lib\multiprocessing\queues.py", line 239, in _feed
    self._close()
  File "c:\Users\dan\anaconda3\envs\sound_vae\lib\multiprocessing\connection.py", line 277, in _close
    reader_close()
  File "c:\Users\dan\anaconda3\envs\sound_vae\lib\multiprocessing\connection.py", line 177, in close
    _CloseHandle(self._handle)
OSError: [WinError 6] The handle is invalid
    self._close()
  File "c:\Users\dan\anaconda3\envs\sound_vae\lib\multiprocessing\connection.py", line 277, in _close
    _CloseHandle(self._handle)
OSError: [WinError 6] The handle is invalid

During handling of the above exception, another exception occurred:

Traceback (most recent call last

Epoch  48 Iteration   0: recon:      1240.87, kld:     5.10
[0.00038060233744371827]
[0.00024471741852429517]
[0.0001381503980116865]
[6.155829702432782e-05]
[1.5413331334375738e-05]
[0.0]
[1.541333133436018e-05]
[6.155829702431391e-05]
[0.00013815039801162382]
Epoch  49 Iteration   0: recon:      1216.16, kld:     5.08
[0.00024471741852428926]
[0.0003806023374435873]
[0.0005449673790582568]
[0.0007367991782296576]
[0.0009549150281254049]
[0.0011979701720000132]
[0.0014644660940674574]
[0.0017527597583493052]
[0.0020610737385378876]
Epoch  50 Iteration   0: recon:      1228.29, kld:     4.78
[0.00238750717642054]
[0.0027300475013025834]
[0.003086582838175034]
[0.00345491502812565]
[0.0038327731807210356]
[0.0042178276747994485]
[0.004607704521361419]
[0.005000000000000685]
[0.005392295478639952]
Epoch  51 Iteration   0: recon:      1253.71, kld:     5.16
[0.005782172325201923]
[0.0061672268192804755]
[0.006545084971875589]
[0.006913417161826474]
[0.007269952498698796]
[0.00761249282358

In [85]:
torch.save(vae, r"models\vae_orbita.pth")

PicklingError: Can't pickle <class 'utils.vaes.VAE'>: it's not the same object as utils.vaes.VAE