In [1]:
import os
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
from torch import optim
from tqdm import tqdm

In [2]:
import logging
logging.basicConfig(format = "%(asctime)s - %(levelname)s: %(message)s",level = logging.INFO,datefmt="%I:%M:%S")

In [None]:
class Diffusion:
  '''
  Class to setup utils and necessary functions needed to build a basic diffusion model
  Params are the same as presented in the original paper(https://arxiv.org/abs/2006.11239)

  '''
  def __init(self,noise_steps = 1000,beta_start = 1e-4,beta_end = 0.02,img_size = 64,device="cuda"):
    '''
    Initialise variables generate noise 
    beta is used to generate noise; alpha = 1-beta 
    
    '''
    self.noise_steps = noise_steps
    self.beta_start = beta_start
    self.beta_end = beta_end
    self.img_size = img_size
    self.device = device

    self.beta = self.prepare_noise_schedule().to(device)
    self.alpha = 1-self.beta
    self.alpha_hat = torch.cumprod(self.alpha,dim =0)
  def prepare_noise_schedule(self):
    return torch.linspace(self.beta_start,self.beta_end,self.noise_step)

  def noise_images(self,x,t):
    '''
    Function which creates noise in images. [FORWARDS DIFFUSION PROCESS]
    Instead of adding noise at each timestep, we can directly reach at final timestep (t) 

    Returns : sqrt(alpha_hat) * X + sqrt(1-alpha_hat) * noise and noise
    '''
    sqrt_alpha_hat = torch.sqrt(self.alpha_hat[t])[:,None,None,None]
    sqrt_one_minus_alpha_hat = torch.sqrt(1.-self.alpha_hat[t])[:,None,None,None]
    e = torch.rand_like(x)
    return sqrt_alpha_hat * x + sqrt_one_minus_alpha_hat * e , e

  def sample(self,model,n):
    logging.info(f"Sampling {n} new images...")
    model.eval()
    with torch.no_grad():
      x = torch.randn((n,3,self.img_size,self.img_size)).to(self.device)
      for i in tqdm(reversed(range(1,self.noise_steps)),position = 0):
        t = (torch.ones(n) * i).long().to(self.device)
        predicted_noise = model(x,t)
        alpha = self.alpha[t][:,None,None,None]
        alpha_hat = self.alpha_hat[t][:,None,None,None]
        beta = self.beta[t][:,None,None,None]
        if(i>1):
          # Noise of each timestep ; final timestep will be clear image hence no noise
          noise  =torch.randn_like(x)
        else:
          noise = torch.zeros_like(x)
        x = 1/torch.sqrt(alpha) * (x - ((1-alpha) / (torch.sqrt(1-alpha_hat))) * predicted_noise) + torch.sqrt(beta) * noise
    model.train()
    x = (x.clamp(-1,1) + 1) / 2
    x = (x * 255).type(torch.uint8)
    return x











In [None]:
class UNet(nn.Module):
  '''
  Constructs UNet Architecture which is used in the Original Paper
  Uses Attention  and Conv blocks to prepare the encoder - decoder type structure of the Diffusion Model

  '''

  def __init__(self,c_in = 3,c_out = 3,time_dim = 256,device = "cuda"):
    super().__init__()
    self.device = device
    self.time_dim  = time_dim
    self.inc = DoubleConv(c_in,64)
    self.down1 = Down(64,128)
    self.sa2 = SelfAttention(128,32)
    self.down2 = Down(128,256)
    self.sa2 = SelfAttention(256,16)
    self.down3 = Down(256,256)
    self.sa3 = SelfAttention(256,8)

    self.bot1 = DoubeConv(256,512)
    self.bot2 = DoubeConv(512,512)
    self.bot3 = DoubleConv(512,256)

    self.up1 = Up(512,128)
    self.sa4 = SelfAttention(128,16)
    self.up2 = Up(256,64)
    self.sa5 = SelfAttention(64,32)
    self.up3 = Up(128,64)
    self.sa6 = SelfAttention(64,64)
    self.outc = nn.Conv2d(64,c_out,kernel_size = 1)
  def pos_encoding(self,t,channels):
    '''
    9:28
    '''



