<a href="https://colab.research.google.com/github/cksdlakstp12/chegyedan-computational-hanmadang/blob/main/DalleFastAPIServer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install fastapi nest-asyncio pyngrok uvicorn
!pip install transformers

from google.colab import output, drive
output.clear()
drive.mount('/content/drive')

Mounted at /content/drive


# Imagen API

In [None]:
!pip install imagen-pytorch
output.clear()

In [None]:
from imagen_pytorch import Unet, Imagen, ImagenTrainer
from transformers import ElectraModel, ElectraTokenizer

## Load Imagen Model

In [None]:
unet1 = Unet(
    dim = 32,
    cond_dim = 512,
    dim_mults = (1, 2, 4, 8),
    num_resnet_blocks = 3,
    layer_attns = (False, True, True, True),
    layer_cross_attns = (False, True, True, True)
)

unet2 = Unet(
    dim = 32,
    cond_dim = 512,
    dim_mults = (1, 2, 4, 8),
    num_resnet_blocks = (2, 4, 8, 8),
    layer_attns = (False, False, False, True),
    layer_cross_attns = (False, False, False, True)
)

imagen = Imagen(
    unets = (unet1, unet2),
    image_sizes = (64, 256),
    timesteps = 1000,
    cond_drop_prob = 0.1
).cuda()

imagen_trainer = ImagenTrainer(imagen)

In [None]:
imagen_trainer.load('./drive/MyDrive/KoImagen/unet1_1_unet2_1.pt')
imagen_trainer.eval()

## Load Tokenizer

In [None]:
electra_model = ElectraModel.from_pretrained("monologg/koelectra-base-v3-discriminator")
tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-v3-discriminator")

## Design API

In [None]:
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware

import io
from starlette.responses import StreamingResponse

import cv2
import numpy as np

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"]
)

@app.get("/")
def home():
  return "Wellcome!"

@app.get("/generate_images/{text}")
async def generate_images(text: str):
  inputs = tokenizer.encode_plus(text, max_length = 256, add_special_tokens=True, padding="max_length", truncation=True)
  inputs = {
      'input_ids' : torch.tensor(inputs['input_ids']).reshape(1, -1).cuda(),
      'token_type_ids' : torch.tensor(inputs['token_type_ids']).reshape(1, -1).cuda(),
      'attention_mask' : torch.tensor(inputs['attention_mask']).reshape(1, -1).cuda()
  }
  text_embeds = electra_model(**inputs).last_hidden_state.reshape(256, 768)
  image = imagen_trainer.sample(
      text_embeds = text_embeds, 
      cond_scale = 3.
  )
  image = image.detach().cpu().numpy()
  ret, image = cv2.imencode(".png", image.reshape(256, 256, 3))
  return StreamingResponse(io.BytesIO(image.tobytes()), media_type="image/png")

# Dalle API

In [None]:
!pip install dalle2-pytorch
!git clone https://github.com/openai/CLIP.git
output.clear()

In [None]:
import torch
from dalle2_pytorch import OpenAIClipAdapter
from dalle2_pytorch import DALLE2, DiffusionPriorNetwork, DiffusionPrior, DiffusionPriorTrainer
from dalle2_pytorch import Unet, Decoder, DecoderTrainer
from dalle2_pytorch.tokenizer import tokenizer

In [None]:
decoder_path = './drive/MyDrive/DALLK/english_model/decoder/epoch1.pt'
prior_path = './drive/MyDrive/DALLK/english_model/diffusion_prior/epoch1.pt'

device = torch.device('cuda')

## Load Model

### prior

In [None]:
# openai에서 비지도 학습한 clip을 사용
clip = OpenAIClipAdapter()

prior_network = DiffusionPriorNetwork(
    dim = 512,
    depth = 6,
    dim_head = 64,
    heads = 8
)

diffusion_prior = DiffusionPrior(
    net = prior_network.to(device),
    clip = clip.to(device),
    timesteps = 100,
    cond_drop_prob = 0.2
)

diffusion_prior_trainer = DiffusionPriorTrainer(
    diffusion_prior.to(device),
    lr = 3e-4,
    wd = 1e-2,
    ema_beta = 0.99,
    ema_update_after_step = 1000,
    ema_update_every = 10,
)

# 저장된 Diffusion 모델 불러오기
diffusion_prior_trainer.load(prior_path)

100%|████████████████████████████████████████| 354M/354M [00:01<00:00, 237MiB/s]


{'optimizer': {'state': {102: {'step': tensor(30821., device='cuda:0'),
    'exp_avg': tensor([[[ 2.2584e-08,  2.9911e-08, -2.6128e-08,  ..., -2.0072e-09,
               4.7375e-08,  2.1363e-08],
             [-5.8624e-09, -4.5767e-09, -5.6100e-09,  ..., -1.4388e-09,
               7.8343e-09,  6.2849e-10],
             [ 1.1785e-08,  3.5058e-08, -8.4858e-09,  ..., -3.2974e-08,
               4.5585e-08,  4.3443e-08],
             ...,
             [ 3.3445e-07,  2.3200e-07, -1.8368e-07,  ..., -8.1227e-08,
              -3.7280e-07, -1.0877e-08],
             [ 2.1200e-09, -1.3546e-08, -4.5747e-09,  ..., -6.0434e-09,
              -1.2878e-08,  4.5404e-09],
             [ 7.8751e-09, -2.5864e-10, -1.3218e-08,  ..., -3.2549e-08,
              -3.5595e-08,  1.1361e-08]]], device='cuda:0'),
    'exp_avg_sq': tensor([[[2.5613e-14, 1.2803e-13, 4.0022e-14,  ..., 1.9959e-14,
              1.5147e-13, 3.8138e-14],
             [1.4596e-15, 3.6181e-15, 6.6293e-16,  ..., 5.4207e-15,
            

### decoder 

In [None]:
unet1 = Unet(
    dim = 128,
    image_embed_dim = 512,
    cond_dim = 128,
    channels = 3,
    dim_mults=(1, 2, 4, 8),
    text_embed_dim = 512,
    cond_on_text_encodings = True  # set to True for any unets that need to be conditioned on text encodings (ex. first unet in cascade)
)

unet2 = Unet(
    dim = 16,
    image_embed_dim = 512,
    cond_dim = 128,
    channels = 3,
    dim_mults = (1, 2, 4, 8, 16)
)

decoder = Decoder(
    unet = (unet1.to(device), unet2.to(device)),
    image_sizes = (128, 256),
    clip = clip.to(device),
    timesteps = 1000,
    sample_timesteps = (250, 27),
    image_cond_drop_prob = 0.1,
    text_cond_drop_prob = 0.5
)

decoder_trainer = DecoderTrainer(
    decoder.to(device),
    lr = 3e-4,
    wd = 1e-2,
    ema_beta = 0.99,
    ema_update_after_step = 1000,
    ema_update_every = 10,
)

# 저장된 decoder 모델 불러오기
decoder_trainer.load(decoder_path)

NameError: ignored

### dalle2

In [None]:
dalle2 = DALLE2(
    prior = diffusion_prior_trainer.diffusion_prior,
    decoder = decoder_trainer.decoder
)
dalle2.eval()

## Design API

In [None]:
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware

import io
from starlette.responses import StreamingResponse

import cv2
import numpy as np

import clip as c

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"]
)

@app.get("/")
def home():
  return "Wellcome!"

@app.get("/generate_images/{text}")
async def generate_images(text: str):
  image = dalle2(
    c.tokenize(text).cuda(),
    cond_scale = 2
  )
  image = image.detach().cpu().numpy()
  ret, image = cv2.imencode(".png", image.reshape(256, 256, 3))
  return StreamingResponse(io.BytesIO(image.tobytes()), media_type="image/png")

# Hosting Server

/generate_images/a man standing in front of blackhole

In [None]:
import nest_asyncio
from pyngrok import ngrok
import uvicorn

ngrok_tunnel = ngrok.connect(8000)
print("Public URL :", ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)