In [4]:
%load_ext autoreload
%autoreload 2
import math
import matplotlib.pyplot as plt
import numpy as np
import torch
from diff import datasets
from dataclasses import dataclass

import os

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
import numpy as np

# from diff.positional_embeddings import PositionalEmbedding
from diff.models import MLP
from diff.noise_scheduler import NoiseScheduler


@dataclass
class Config:
    experiment_name: str = 'base'
    dataset: str = 'dino'
    train_batch_size: int = 32
    eval_batch_size: int = 1000
    num_epochs: int = 200
    learning_rate: float = 1e-3
    num_timesteps: int = 50
    beta_schedule: str = "linear"
    embedding_size: int = 128
    hidden_size: int = 128
    hidden_layers: int = 3
    time_embedding: str = "sinusoidal"
    input_embedding: str = "sinusoidal"
    save_images_step: int = 1


config = Config()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
device = torch.device('cuda')
dataset = datasets.get_dataset('moons')

dataloader = DataLoader(
    dataset, batch_size=config.train_batch_size, shuffle=True, drop_last=True)

model = MLP(
    hidden_size=config.hidden_size,
    hidden_layers=config.hidden_layers,
    emb_size=config.embedding_size,
    time_emb=config.time_embedding,
    input_emb=config.input_embedding).to(device)

noise_scheduler = NoiseScheduler(
    num_timesteps=config.num_timesteps,
    beta_schedule=config.beta_schedule)

optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=config.learning_rate,
)

global_step = 0
frames = []
losses = []

print("Training model...")

for epoch in range(config.num_epochs):
    model.train()
    for step, batch in enumerate(dataloader):
        batch = batch[0]
        noise = torch.randn(batch.shape)
        timesteps = torch.randint(
            0, noise_scheduler.num_timesteps, (batch.shape[0],)
        ).long()
        noisy = noise_scheduler.add_noise(batch, noise, timesteps)
        noise_pred = model(noisy.to(device), timesteps.to(device))
        loss = F.mse_loss(noise_pred, noise.to(device))
        loss.backward(loss)

        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        optimizer.zero_grad()

        # progress_bar.update(1)
        # logs = {"loss": loss.detach().item(), "step": global_step}
        losses.append(loss.detach().item())

        # progress_bar.set_postfix(**logs)
        if global_step % 300 == 0:
            print(f'{epoch}|{global_step} loss={loss.detach().item():.2f}')

        global_step += 1

    if epoch % config.save_images_step == 0 or epoch == config.num_epochs - 1:
        # generate data with the model to later visualize the learning process
        model.eval()
        sample = torch.randn(config.eval_batch_size, 2)
        timesteps = list(range(len(noise_scheduler)))[::-1]
        for i, t in enumerate(timesteps):
            t = torch.from_numpy(np.repeat(t, config.eval_batch_size)).long()
            with torch.no_grad():
                residual = model(sample.to(device), t.to(device)).to('cpu')
            sample = noise_scheduler.step(residual, t[0], sample)
        frames.append(sample.numpy())


AssertionError: Torch not compiled with CUDA enabled

In [ ]:
print("Saving model...")
outdir = f"exps/{config.experiment_name}"
os.makedirs(outdir, exist_ok=True)
torch.save(model.state_dict(), f"{outdir}/model.pth")

print("Saving images...")
imgdir = f"{outdir}/images"
os.makedirs(imgdir, exist_ok=True)
frames = np.stack(frames)
xmin, xmax = -6, 6
ymin, ymax = -6, 6
for i, frame in enumerate(frames):
    plt.figure(figsize=(10, 10))
    plt.scatter(frame[:, 0], frame[:, 1])
    plt.xlim(xmin, xmax)
    plt.ylim(ymin, ymax)
    plt.savefig(f"{imgdir}/{i:04}.png")
    plt.close()

print("Saving loss as numpy array...")
np.save(f"{outdir}/loss.npy", np.array(losses))

print("Saving frames...")
np.save(f"{outdir}/frames.npy", frames)