# Shape-To: Explore Visual Sketches as Musical Controller

Training notebook for the deep feature consistent variational autoencoder (DFC VAE)  

The codebase uses [PyTorch-VAE](https://github.com/AntixK/PyTorch-VAE)

In [None]:
!pip install pytorch-lightning==2.1.2
!pip install --upgrade torch torchvision torchaudio


In [3]:
!git clone https://github.com/AntixK/PyTorch-VAE

Cloning into 'PyTorch-VAE'...
remote: Enumerating objects: 859, done.[K
remote: Total 859 (delta 0), reused 0 (delta 0), pack-reused 859[K
Receiving objects: 100% (859/859), 46.47 MiB | 531.00 KiB/s, done.
Resolving deltas: 100% (619/619), done.


## Train

In [1]:
%cd /home/jasper/Documents/PhD/Y0/vae_sketch_to_sound/web_app/PyTorchVAE

/home/jasper/Documents/PhD/Y0/vae_sketch_to_sound/src/PyTorchVAE


In [2]:
!pwd

import os
import yaml
import argparse
import numpy as np
from pathlib import Path
from models import *
from experiment import VAEXperiment
import torch.backends.cudnn as cudnn
from pytorch_lightning import Trainer
# from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.loggers import CSVLogger
# from pytorch_lightning.utilities.seed import seed_everything
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from dataset import VAEDataset
# from pytorch_lightning.plugins import DDPPlugin


/home/jasper/Documents/PhD/Y0/vae_sketch_to_sound/src/PyTorchVAE


In [3]:
# config_file = './configs/bhvae.yaml'
config_file = './configs/dfc_vae.yaml'

with open(config_file, 'r') as file:
    try:
        config = yaml.safe_load(file)
    except yaml.YAMLError as exc:
        print(exc)


# tb_logger =  TensorBoardLogger(save_dir=config['logging_params']['save_dir'],
#                                name=config['model_params']['name'],)
logger = CSVLogger(save_dir=config['logging_params']['save_dir'], name=config['model_params']['name'])
# seed_everything(config['exp_params']['manual_seed'], True)

model = vae_models[config['model_params']['name']](**config['model_params'])
experiment = VAEXperiment(model,
                          config['exp_params'])



In [4]:
model

DFCVAE(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (2): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (3): Sequential(
      (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (4): Sequen

In [5]:
from PIL import Image
import os, os.path
import numpy as np

from torchvision.transforms.functional import to_pil_image

imgs = []
labels = []

path = "/home/jasper/Documents/PhD/Y0/vae_sketch_to_sound/datasets/crop_640/1"
path = os.path.join(path,'')
size = 64

valid_images = [".jpg",".gif",".png",".tga"]

for i,f in enumerate(os.listdir(path)):
  ext = os.path.splitext(f)[1]
  filename = os.path.splitext(f)[0]
  if ext.lower() not in valid_images:
      continue
  img = np.array(Image.open(os.path.join(path,f)))
  img = torch.from_numpy(img)
  img = torch.nn.functional.interpolate(img.unsqueeze(0).unsqueeze(0), size=(size,size), mode='bilinear',antialias=True)
  
  imgs.append(img[0].numpy())

  if i%500==0:
    print(f'processing index {i}')

img_data_x = np.array(imgs)
img_data_y = np.array(labels)
np.random.shuffle(img_data_x)
print(f'collected data {img_data_x.shape}\ncollected label {img_data_y.shape}')

img_data_x = torch.from_numpy(img_data_x)

img_data_x = 1-img_data_x/255.

print(img_data_x.shape)
print(img_data_x.max())
print(img_data_x.min())

processing index 0
processing index 500
processing index 1000
processing index 1500
processing index 2000
collected data (2217, 1, 64, 64)
collected label (0,)
torch.Size([2217, 1, 64, 64])
tensor(1.)
tensor(0.)


In [6]:
data = VAEDataset(data_train= img_data_x[:2000],
                  data_val= img_data_x[2000:],
                  train_batch_size= 64,
                  val_batch_size= 64,
                  patch_size= 64, 
                  num_workers= 4, 
                  pin_memory=1)
data.setup()

In [9]:
from pytorch_lightning.callbacks import ProgressBar

class LitProgressBar(ProgressBar):

    def __init__(self):
        super().__init__()  # don't forget this :)
        self.enable = True

    def disable(self):
        self.enable = False

    def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
        super().on_train_batch_end(trainer, pl_module, outputs, batch, batch_idx)  # don't forget this :)
        # percent = (batch_idx / self.total_train_batches) * 100
        # sys.stdout.flush()
        # sys.stdout.write(f'{percent:.01f} percent complete \r')
        pass

bar = LitProgressBar()


In [10]:
runner = Trainer(logger=logger,
                 callbacks=[
                     # LearningRateMonitor(),
                     bar,
                     ModelCheckpoint(save_top_k=3, 
                                     every_n_epochs = 3000,
                                     dirpath =os.path.join(logger.log_dir , "checkpoints"), 
                                     monitor= "val_loss",
                                     save_last= True),
                 ],
                 # strategy=DDPPlugin(find_unused_parameters=False),
                 # strategy="ddp_notebook", 
                 log_every_n_steps = 16,
                 accelerator="gpu", 
                 devices=1,
                 **config['trainer_params'])


Path(f"{logger.log_dir}/Samples").mkdir(exist_ok=True, parents=True)
Path(f"{logger.log_dir}/Reconstructions").mkdir(exist_ok=True, parents=True)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [12]:
runner.fit(experiment, datamodule=data)

You are using a CUDA device ('NVIDIA GeForce RTX 4070 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
/home/jasper/miniforge3/envs/s2s2/lib/python3.10/site-packages/lightning_fabric/loggers/csv_logs.py:198: Experiment logs directory logs/DFCVAE/version_9 exists and is not empty. Previous log files in this directory will be deleted when the new ones are saved!
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type   | Params
---------------------------------
0 | model | DFCVAE | 147 M 
---------------------------------
3.3 M     Trainable params
143 M     Non-trainable params
147 M     Total params
588.099   Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=10000` reached.
