In [1]:
# just locally running the notebook found at https://github.com/openai/glide-text2im/blob/main/notebooks/text2im.ipynb
!pip install git+https://github.com/openai/glide-text2im

Collecting git+https://github.com/openai/glide-text2im
  Cloning https://github.com/openai/glide-text2im to /tmp/pip-req-build-5wrike12
  Running command git clone -q https://github.com/openai/glide-text2im /tmp/pip-req-build-5wrike12
  Resolved https://github.com/openai/glide-text2im to commit 9cc8e563851bd38f5ddb3e305127192cb0f02f5c


In [2]:
from PIL import Image
from IPython.display import display
import torch as th
import os

In [3]:
print(th.__version__)

1.9.0


In [4]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [5]:
from glide_text2im.download import load_checkpoint
from glide_text2im.model_creation import (
    create_model_and_diffusion,
    model_and_diffusion_defaults,
    model_and_diffusion_defaults_upsampler
)

In [6]:
# has_cuda = False
has_cuda = th.cuda.is_available() # This sporadically me a CUDA: Out of Memory Error when True
device = th.device('cuda' if has_cuda else 'cpu')
print(device)

cuda


In [7]:
options = model_and_diffusion_defaults()
options['use_fp16'] = has_cuda
options['timestep_respacing'] = '100'

In [8]:
model, diffusion = create_model_and_diffusion(**options)
model.eval()
if has_cuda:
    model.convert_to_fp16()
model.to(device)
model.load_state_dict(load_checkpoint('base', device))
print('total base parameters', sum(x.numel() for x in model.parameters()))

total base parameters 385030726


In [9]:
options_up = model_and_diffusion_defaults_upsampler()
options_up['use_fp16'] = has_cuda
options_up['timestep_respacing'] = 'fast27' # use 27 diffusion steps for very fast sampling

In [10]:
model_up, diffusion_up = create_model_and_diffusion(**options_up)
model_up.eval()
if has_cuda:
    model_up.convert_to_fp16()
model_up.to(device)
model_up.load_state_dict(load_checkpoint('upsample', device))
# getting a GPU OOM
print('total upsampler parameters', sum(x.numel() for x in model_up.parameters()))

RuntimeError: CUDA error: out of memory

In [13]:
print('total upsampler parameters', sum(x.numel() for x in model_up.parameters()))

total upsampler parameters 398361286


In [12]:
print(th.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    2926 MB |    2926 MB |    4413 MB |    1486 MB |
|       from large pool |    2836 MB |    2836 MB |    4297 MB |    1461 MB |
|       from small pool |      89 MB |      89 MB |     115 MB |      25 MB |
|---------------------------------------------------------------------------|
| Active memory         |    2926 MB |    2926 MB |    4413 MB |    1486 MB |
|       from large pool |    2836 MB |    2836 MB |    4297 MB |    1461 MB |
|       from small pool |      89 MB |      89 MB |     115 MB |      25 MB |
|---------------------------------------------------------------

In [14]:
def show_images(batch: th.Tensor):
    """Display a batch of images inline."""
    scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
    reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
    display(Image.fromarray(reshaped.numpy()))

In [15]:
# Sampling parameters
prompt = "an oil painting of a corgi"
batch_size = 1
guidance_scale = 3.0

# Tune this parameter to control the sharpness of 256x256 images.
# a value of 1.0 is sharper but sometimes results in grainy artifacts.
upsample_temp = 0.997

In [23]:
## Sample from the base model

# Create the text tokens to feed to the model.
tokens = model.tokenizer.encode(prompt)
tokens, mask = model.tokenizer.padded_tokens_and_mask(tokens, options['text_ctx'])


In [25]:

# Create the classifier-free guidance tokens (empty)
full_batch_size = batch_size * 2
uncond_tokens, uncond_mask = model.tokenizer.padded_tokens_and_mask( [], options['text_ctx'])


In [33]:
print(th.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    2926 MB |    2926 MB |    4413 MB |    1486 MB |
|       from large pool |    2836 MB |    2836 MB |    4297 MB |    1461 MB |
|       from small pool |      89 MB |      89 MB |     115 MB |      25 MB |
|---------------------------------------------------------------------------|
| Active memory         |    2926 MB |    2926 MB |    4413 MB |    1486 MB |
|       from large pool |    2836 MB |    2836 MB |    4297 MB |    1461 MB |
|       from small pool |      89 MB |      89 MB |     115 MB |      25 MB |
|---------------------------------------------------------------

In [31]:
mytokens = th.tensor([tokens] * batch_size + [uncond_tokens] * batch_size, device = device)

RuntimeError: CUDA error: out of memory

In [26]:

# Pack the tokens together into model kwargs.
model_kwargs = dict(
    tokens = th.tensor([tokens] * batch_size + [uncond_tokens] * batch_size, device = device),
    mask = th.tensor([mask] * batch_size + [uncond_mask] * batch_size, device = device, dtype=th.bool),
)


RuntimeError: CUDA error: out of memory

In [None]:

# Create a classifier-free guidance sampling function

def model_fn(x_t, ts, **kwargs):
    half = x_t[: len(x_t) //2]
    combined = th.cat([half, half], dim = 0)
    model_out = model(combined, ts, **kwargs)
    eps, rest = model_out[:, :3], model_out[:, 3:]
    cond_eps, uncond_eps = th.split(eps, len(eps) // 2, dim = 0)
    half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps)
    eps = th.cat([half_eps, half_eps], dim = 0)
    return th.cat([eps, rest], dim = 1)

# Sample from the base model
model.del_cache()
samples = diffusion.p_sample_loop(
    model_fn,
    (full_batch_size, 3, options["image_size"], options["image_size"]),
    device=device,
    clip_denoised=True,
    progress=True,
    model_kwargs=model_kwargs,
    cond_fn=None,
)[:batch_size]
model.del_cache()


In [None]:

show_images(samples)

In [22]:
print(th.cuda.memory.empty_cache())

None
