In [1]:
from omegaconf import OmegaConf
import importlib
import torch
import sys

In [2]:
def instantiate_from_config(config):
    if not "target" in config:
        raise KeyError("Expected key `target` to instantiate.")
    return get_obj_from_str(config["target"])(**config.get("params", dict()))

def get_obj_from_str(string, reload=False):
    module, cls = string.rsplit(".", 1)
    if reload:
        module_imp = importlib.import_module(module)
        importlib.reload(module_imp)
    return getattr(importlib.import_module(module, package=None), cls)


In [3]:
cfg_path = "/kuacc/users/bbiner21/Github/latent-diffusion/configs/latent-diffusion/lsun_churches-ldm-kl-8.yaml"

config = OmegaConf.load(cfg_path) 

model = instantiate_from_config(config.model)


LatentDiffusion: Running in eps-prediction mode
modulator is False
*****Using Uformer-B******
DiffusionWrapper has 51.83 M params.
Keeping EMAs of 738.
making attention of type 'vanilla' with 512 in_channels
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla' with 512 in_channels
Restored from models/first_stage_models/kl-f8/model.ckpt
checkpoint path is models/first_stage_models/kl-f8/model.ckpt
Training LatentDiffusion as an unconditional model.


In [4]:
# model

In [6]:
x = torch.ones(4,4,32,32)
num_timesteps = 1000
t = torch.randint(0, num_timesteps, (x.shape[0],)).long()


In [7]:
out = model.model.diffusion_model(x,t)
out.shape

input x shape is torch.Size([4, 4, 32, 32])
after input proj y shape is torch.Size([4, 1024, 32])
after dropout proj y shape is torch.Size([4, 1024, 32])
query shape is torch.Size([64, 1, 64, 32])
key shape is torch.Size([64, 1, 64, 32])
attn shape is torch.Size([64, 1, 64, 64])
pool0 shape is torch.Size([4, 256, 64])
query shape is torch.Size([16, 2, 64, 32])
key shape is torch.Size([16, 2, 64, 32])
attn shape is torch.Size([16, 2, 64, 64])
query shape is torch.Size([16, 2, 64, 32])
key shape is torch.Size([16, 2, 64, 32])
attn shape is torch.Size([16, 2, 64, 64])
pool1 shape is torch.Size([4, 64, 128])
query shape is torch.Size([4, 4, 64, 32])
key shape is torch.Size([4, 4, 64, 32])
attn shape is torch.Size([4, 4, 64, 64])
query shape is torch.Size([4, 4, 64, 32])
key shape is torch.Size([4, 4, 64, 32])
attn shape is torch.Size([4, 4, 64, 64])
query shape is torch.Size([4, 4, 64, 32])
key shape is torch.Size([4, 4, 64, 32])
attn shape is torch.Size([4, 4, 64, 64])
query shape is torc

torch.Size([4, 4, 32, 32])

In [30]:
model.device

device(type='cpu')

In [25]:
print(sys.path)

['/scratch/users/bbiner21/Github/latent-diffusion', '/kuacc/users/bbiner21/.conda/envs/taming/lib/python38.zip', '/kuacc/users/bbiner21/.conda/envs/taming/lib/python3.8', '/kuacc/users/bbiner21/.conda/envs/taming/lib/python3.8/lib-dynload', '', '/kuacc/users/bbiner21/.local/lib/python3.8/site-packages', '/kuacc/users/bbiner21/.conda/envs/taming/lib/python3.8/site-packages', '/scratch/users/bbiner21/Github/latent-diffusion', '/kuacc/users/bbiner21/Github/Uformer']


In [24]:
sys.path.append("/kuacc/users/bbiner21/Github/Uformer")

In [27]:
from model import Uformer

In [19]:
model.monitor

'val/loss_simple_ema'

In [28]:
model_restoration = Uformer(img_size=128,embed_dim=16,win_size=8,token_projection='linear',token_mlp='leff',modulator=True,dd_in=4)

In [29]:
model_restoration

Uformer(
  embed_dim=16, token_projection=linear, token_mlp=leff,win_size=8
  (pos_drop): Dropout(p=0.0, inplace=False)
  (input_proj): InputProj(
    (proj): Sequential(
      (0): Conv2d(4, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): LeakyReLU(negative_slope=0.01, inplace=True)
    )
  )
  (output_proj): OutputProj(
    (proj): Sequential(
      (0): Conv2d(32, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
  )
  (encoderlayer_0): BasicUformerLayer(
    dim=16, input_resolution=(128, 128), depth=2
    (blocks): ModuleList(
      (0): LeWinTransformerBlock(
        dim=16, input_resolution=(128, 128), num_heads=1, win_size=8, shift_size=0, mlp_ratio=4.0,modulator=None
        (norm1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
        (attn): WindowAttention(
          dim=16, win_size=(8, 8), num_heads=1
          (qkv): LinearProjection(
            (to_q): Linear(in_features=16, out_features=16, bias=True)
            (to_kv): Linear(in_