Codigo por Will Stedden @bonkerfield

Traducido por @hypereikon

In [None]:
#@markdown Ejecutar esta celda y reiniciar el entorno de ejecucion, luego ejecutarla denuevo y seguir con las siguientes.
import subprocess

CUDA_version = [s for s in subprocess.check_output(["nvcc", "--version"]).decode("UTF-8").split(", ") if s.startswith("release")][0].split(" ")[-1]
print("CUDA version:", CUDA_version)

if CUDA_version == "10.0":
    torch_version_suffix = "+cu100"
elif CUDA_version == "10.1":
    torch_version_suffix = "+cu101"
elif CUDA_version == "10.2":
    torch_version_suffix = ""
else:
    torch_version_suffix = "+cu110"

! pip install torch==1.7.1{torch_version_suffix} torchvision==0.8.2{torch_version_suffix} -f https://download.pytorch.org/whl/torch_stable.html ftfy regex

In [None]:
#@markdown instalar big-sleep
!pip install big-sleep --upgrade

In [None]:
#@markdown Esta celda toma el texto y genera las imagenes. 

#@markdown el parametro span determina cuantas palabras por cada inferencia, y iterations a cuantas iteraciones llega (mientras mas iteraciones mas "profundo" sera el resultado)
from IPython.display import Image, display
import string
import torch
from torchvision.utils import save_image
import numpy as np

from big_sleep import Imagine
from big_sleep.clip import tokenize

from nltk.corpus import stopwords

from skimage.measure import compare_ssim

import cv2
from pathlib import Path

import PIL
from PIL import ImageFont, ImageDraw

TEXT = 'story_hallucinator' 
SAVE_EVERY = 1
SAVE_PROGRESS = True
LEARNING_RATE = 0.1
ITERATIONS =  1

def train_step(self, epoch, i, rand=0):
  total_loss = 0

  for _ in range(self.gradient_accumulate_every):
      losses = self.model(self.encoded_text) 
      loss = (sum(losses) / self.gradient_accumulate_every) + rand*np.random.randn()
      total_loss += loss
      loss.backward()

  self.optimizer.step()
  self.optimizer.zero_grad()

  if (i + 1) % self.save_every == 0:
      with torch.no_grad():
          # best = torch.topk(losses[2], k = 1, largest = False)[1]
          mres = self.model.model()
          image = mres[len(mres)-1].cpu()
          num = i // self.save_every
          save_image(image, Path(f'./{self.textpath}.{num}.png'))

model = Imagine(
    text = TEXT,
    save_every = SAVE_EVERY,
    lr = LEARNING_RATE,
    iterations = ITERATIONS,
    save_progress = SAVE_PROGRESS
)
filename = TEXT.replace(' ', '_')
### from https://a.ttent.io/n by Terise Cruven
all_text = "Los bailarines dicen por armar boche que si les cantan bailan toda la noche. Toda la noche si flor de zapallo en la cancha es aonde se ven los gallos." #@param {type: "string"}


burnin=20 #
checkin_gap = 10
long_sim_gap = 10
span =  5#@param {type: "number"}
iterations =  600#@param {type: "number"}
display_gap = 50
similarity = 0.9

words = all_text.split()
all_text_list = [" ".join(words[i:i+span]) for i in range(0, len(words), span)]
all_text_full = all_text_list

iter_num = 0
last_one = 0
rand = 0
model.text = " ".join(words[:span])
model.encoded_text = tokenize(model.text).cuda()
for j in range(burnin):
    train_step(model, 0, 0, rand)
for epoch in range(0, len(words), span):
    restart_point = iter_num
    i = 0
    while i < iterations:
        phrase = " ".join(words[epoch+((i*span)//iterations):epoch+span+((i*span)//iterations)])
        model.text = phrase.translate(str.maketrans('', '', string.punctuation))
        model.encoded_text = tokenize(model.text).cuda()
        train_step(model, epoch, iter_num, rand)
        
        if iter_num % display_gap == 0:
          print(f'iter: {iter_num} text={phrase}')
          image_cur = Image(f'./{filename}.{iter_num}.png')
          display(image_cur)
        
        if i % checkin_gap == 0 and i > 0:
          imageA = cv2.imread(f'./{filename}.{iter_num}.png')
          imageB = cv2.imread(f'./{filename}.{restart_point}.png')
          # convert the images to grayscale
          grayA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
          grayB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
          (score, diff) = compare_ssim(grayA, grayB, full=True)
          toinc = checkin_gap
          print(f'iter{iter_num}: rand={rand} sim={score} smooth={grayB.std()}, ext={((grayB < 50) | (grayB > 205)).mean()}')
          if score>similarity or grayB.std()<15 or ((grayB < 50) | (grayB > 205)).mean()>0.9:
              print(f'restart!')
              model = Imagine(
                  text = TEXT,
                  save_every = SAVE_EVERY,
                  lr = LEARNING_RATE,
                  iterations = ITERATIONS,
                  save_progress = SAVE_PROGRESS
              )
              model.text = " ".join(words[epoch:epoch+span]).translate(str.maketrans('', '', string.punctuation))
              model.encoded_text = tokenize(model.text).cuda()
              for j in range(burnin):
                train_step(model, epoch, iter_num, rand) 
              iter_num = restart_point
              i = 0
              rand = 0
              continue
        i += 1
        iter_num += 1
        
        
    for i in range(last_one,iter_num):
      msg_orig = " ".join(words[epoch:epoch+span])
      img = PIL.Image.open(f'./{filename}.{i}.png')
      W, H = img.size
      draw = ImageDraw.Draw(img)
      font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationMono-Bold.ttf", 18)
      msgs = [msg_orig]
      w, h = draw.textsize(msg_orig, font=font)
      if w>W:
        split = span // 2
        msgs = [" ".join(words[epoch:epoch+split]), " ".join(words[epoch+split:epoch+span])]
      for shift, msg in enumerate(msgs): 
        w, h = draw.textsize(msg, font=font)
        x, y = (W-w)/2, 7*(H-h)/8 + shift*h
        adj = 1
        #move right
        shadowColor = "black"
        draw.text((x-adj, y), msg, fill=shadowColor, font=font)
        #move left
        draw.text((x+adj, y), msg, fill=shadowColor, font=font)
        #move up
        draw.text((x, y+adj), msg, fill=shadowColor, font=font)
        #move down
        draw.text((x, y-adj), msg, fill=shadowColor, font=font)
        #diagnal left up
        draw.text((x-adj, y+adj), msg, fill=shadowColor, font=font)
        #diagnal right up
        draw.text((x+adj, y+adj), msg, fill=shadowColor, font=font)
        #diagnal left down
        draw.text((x-adj, y-adj), msg, fill=shadowColor, font=font)
        #diagnal right down
        draw.text((x+adj, y-adj), msg, fill=shadowColor, font=font)
        draw.text((x, y), msg, fill="white", font=font)
      img.save(f'./{filename}.{i}.png')
    last_one = iter_num



In [None]:
#@markdown crear video
fps_mp4 = 25 #@param {type: "number"}
filename_mp4 = "video_story2hall.mp4" #@param {type: "string"}

!ffmpeg -framerate $fps_mp4 -i story_hallucinator.%d.png -c:v libx264 -crf 17 $filename_mp4

In [None]:
#@markdown eliminar todos los archivos para probar otro input
!rm story_*