## Translator

In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-zh-en")

In [5]:
from transformers import pipeline
translator = pipeline('translation_zh_to_en', model=model, tokenizer=tokenizer)

translator("你好呀，欢迎来到我的世界！")

[{'translation_text': 'Hello, welcome to my world!'}]

## Text 2 Image

In [32]:
import argparse, os, sys, glob
import torch
import numpy as np
from omegaconf import OmegaConf
from PIL import Image
from tqdm import tqdm, trange
from einops import rearrange
from torchvision.utils import make_grid

from ldm.util import instantiate_from_config
from ldm.models.diffusion.ddim import DDIMSampler
from ldm.models.diffusion.plms import PLMSSampler
from munch import Munch
import gc

In [8]:
def load_model_from_config(config, ckpt, verbose=False):
    print(f"Loading model from {ckpt}")
    pl_sd = torch.load(ckpt, map_location="cpu")
    sd = pl_sd["state_dict"]
    model = instantiate_from_config(config.model)
    m, u = model.load_state_dict(sd, strict=False)
    if len(m) > 0 and verbose:
        print("missing keys:")
        print(m)
    if len(u) > 0 and verbose:
        print("unexpected keys:")
        print(u)

    model.cuda()
    model.eval()
    return model

In [26]:
config = OmegaConf.load("configs/latent-diffusion/txt2img-1p4B-eval.yaml")
config

{'model': {'base_learning_rate': 5e-05, 'target': 'ldm.models.diffusion.ddpm.LatentDiffusion', 'params': {'linear_start': 0.00085, 'linear_end': 0.012, 'num_timesteps_cond': 1, 'log_every_t': 200, 'timesteps': 1000, 'first_stage_key': 'image', 'cond_stage_key': 'caption', 'image_size': 32, 'channels': 4, 'cond_stage_trainable': True, 'conditioning_key': 'crossattn', 'monitor': 'val/loss_simple_ema', 'scale_factor': 0.18215, 'use_ema': False, 'unet_config': {'target': 'ldm.modules.diffusionmodules.openaimodel.UNetModel', 'params': {'image_size': 32, 'in_channels': 4, 'out_channels': 4, 'model_channels': 320, 'attention_resolutions': [4, 2, 1], 'num_res_blocks': 2, 'channel_mult': [1, 2, 4, 4], 'num_heads': 8, 'use_spatial_transformer': True, 'transformer_depth': 1, 'context_dim': 1280, 'use_checkpoint': True, 'legacy': False}}, 'first_stage_config': {'target': 'ldm.models.autoencoder.AutoencoderKL', 'params': {'embed_dim': 4, 'monitor': 'val/rec_loss', 'ddconfig': {'double_z': True, 'z_

In [10]:
model = load_model_from_config(config, "models/ldm/text2img-large/model.ckpt")
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = model.to(device)

Loading model from models/ldm/text2img-large/model.ckpt
LatentDiffusion: Running in eps-prediction mode
DiffusionWrapper has 872.30 M params.
making attention of type 'vanilla' with 512 in_channels
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla' with 512 in_channels


In [28]:
def generate_img(prompt):
    opt = Munch({
        "plms": False,
        "outdir": './outputs/txt2img-samples',
        "ddim_steps": 200,
        "ddim_eta": 0.0,
        "n_iter": 1,
        "H": 256,
        "W": 256,
        "n_samples": 4,
        "scale": 5.0
    })

    if opt.plms:
        sampler = PLMSSampler(model)
    else:
        sampler = DDIMSampler(model)

    os.makedirs(opt.outdir, exist_ok=True)
    outpath = opt.outdir



    sample_path = os.path.join(outpath, "samples")
    os.makedirs(sample_path, exist_ok=True)
    base_count = len(os.listdir(sample_path))

    all_samples=list()
    with torch.no_grad():
        with model.ema_scope():
            uc = None
            if opt.scale != 1.0:
                uc = model.get_learned_conditioning(opt.n_samples * [""])
            for n in trange(opt.n_iter, desc="Sampling"):
                c = model.get_learned_conditioning(opt.n_samples * [prompt])
                shape = [4, opt.H//8, opt.W//8]
                samples_ddim, _ = sampler.sample(S=opt.ddim_steps,
                                                 conditioning=c,
                                                 batch_size=opt.n_samples,
                                                 shape=shape,
                                                 verbose=False,
                                                 unconditional_guidance_scale=opt.scale,
                                                 unconditional_conditioning=uc,
                                                 eta=opt.ddim_eta)

                x_samples_ddim = model.decode_first_stage(samples_ddim)
                x_samples_ddim = torch.clamp((x_samples_ddim+1.0)/2.0, min=0.0, max=1.0)

                for x_sample in x_samples_ddim:
                    x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
                    # Image.fromarray(x_sample.astype(np.uint8)).save(os.path.join(sample_path, f"{base_count:04}.png"))
                    base_count += 1
                all_samples.append(x_samples_ddim)


    # additionally, save as grid
    grid = torch.stack(all_samples, 0)
    grid = rearrange(grid, 'n b c h w -> (n b) c h w')
    grid = make_grid(grid, nrow=opt.n_samples)

    # to image
    grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()
    produce_img = Image.fromarray(grid.astype(np.uint8))
    return produce_img

In [33]:
gc.collect()
torch.cuda.empty_cache()

In [35]:
import gradio as gr

def greet(zh_input, en_input):
    if en_input:
        prompt = en_input
    else:
        prompt = translator(zh_input)[0]['translation_text']
        
    
    return generate_img(prompt)

demo = gr.Interface(fn=greet,
                    title="Text2Image",
                    description="中文和英文只要输入任意一个即可，如果同时输入，以英文为主！",
                    inputs=[
                        gr.Text(placeholder="请输入中文信息", label='zh'),
                        gr.Text(placeholder="请输入英文信息", label='en')
                           ],
                    outputs=gr.Image(shape=(200, 200)),
                    examples=[
                        ["穿皮鞋的熊猫在打篮球", ""],
                        ["", "Green ducks dance on the red roof"]
                    ])

demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7861/
Running on public URL: https://18083.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)


(<gradio.routes.App at 0x7f9a934134c0>,
 'http://127.0.0.1:7861/',
 'https://18083.gradio.app')

Traceback (most recent call last):
  File "/opt/conda/envs/ldm/lib/python3.8/site-packages/gradio/routes.py", line 255, in run_predict
    output = await app.blocks.process_api(
  File "/opt/conda/envs/ldm/lib/python3.8/site-packages/gradio/blocks.py", line 599, in process_api
    predictions, duration = await self.call_function(fn_index, processed_input)
  File "/opt/conda/envs/ldm/lib/python3.8/site-packages/gradio/blocks.py", line 514, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "/opt/conda/envs/ldm/lib/python3.8/site-packages/anyio/to_thread.py", line 31, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/opt/conda/envs/ldm/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
    return await future
  File "/opt/conda/envs/ldm/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 867, in run
    result = context.run(func, *args)
  File "/opt/conda/envs/ldm/lib/python3.8/si