# The Big Sleep: BigGAN+CLIP

https://openai.com/blog/clip/

https://github.com/lucidrains/big-sleep


![CLIP 1](https://openaiassets.blob.core.windows.net/$web/clip/draft/20210104b/overview-a.svg "CLIP 1")
![CLIP 2](https://openaiassets.blob.core.windows.net/$web/clip/draft/20210104b/overview-b.svg "CLIP 2")

In [None]:
#@title Imports
!pip install big-sleep --upgrade
!pip install gTTS

from gtts import gTTS
import tqdm
from tqdm.notebook import trange
from IPython.display import Image, display

from big_sleep import Imagine
import os
from pathlib import Path

import moviepy.editor as mpy
import glob


In [None]:
#@title Test Big Sleep
#@markdown Text to visualize
txt = 'green dog on the snow' #@param {type:"string"}

#@markdown Number of epochs 
epochs = 20 #@param {type:"integer"}
#@markdown Number of iterations per epoch 
iterations = 1000 #@param {type:"integer"}
#@markdown Save every
save_every = 100 #@param {type:"integer"}

model = Imagine(
    text = txt,
    save_every = save_every,
    lr = 5e-2,
    iterations = iterations,
    save_progress = True,
)
path = "/content/test_images"
Path(path).mkdir(parents=True, exist_ok=True)
os.chdir(path)
for epoch in trange(epochs, desc = 'epochs'):
    for i in trange(iterations, desc = 'iteration'):
        model.train_step(epoch, i)

        if i == 0 or i % model.save_every != 0:
            continue

        filename = txt.replace(' ', '_')[:255]
        image = Image(f'{path}/{filename}.png')
        display(image)

In [34]:
#@title Text preparation
#@markdown Drag-n-drop txt file to the Files sidebar, paste the path to the file you want to use, then run this cell
text_file = '/content/Test.txt' #@param {type:"string"}

#@markdown All the punctuation marks will be replaced with this symbol and text will be splitted into smaller chunks
delim = '.' #@param {type:"string"}


with open(text_file, 'r') as f:
  txt = f.read()
txt = txt.translate(str.maketrans({k:delim for k in '!"\'#$%&()*+,—./:;<=>?@[\]^_`{|}~'})).replace('\n', ' ').replace('====================', ". ")
# txt = txt.replace('—', delim).replace(':', delim).replace('.', delim).replace(',', delim).replace('?', delim).replace('!', delim).replace('\n', ' ').replace('====================', ". ")
txt_splitted = [t.strip() for t in txt.split(delim) if t!='']


In [None]:
#@title Print processed text
print(len(txt_splitted))
print(txt_splitted)

In [None]:
#@title Visualize

#@markdown Number of epochs
epochs = 10 #@param {type:"integer"}
#@markdown Number of iterations per epoch
iterations =  100#@param {type:"integer"}
#@markdown Save every
save_every = 10#@param {type:"integer"}


Path("/content/text_vis").mkdir(parents=True, exist_ok=True)
for s, sent in enumerate(tqdm.tqdm(txt_splitted)):
  path = f"/content/text_vis/{s:06}"
  Path(path).mkdir(parents=True, exist_ok=True)
  os.chdir(path)
  model = Imagine(
      text = sent.replace('/',' '),
      save_every = save_every,
      lr = 5e-2,
      iterations = iterations,
      save_progress = True,
  )
  for epoch in trange(epochs, desc = 'epochs'):
    for i in trange(iterations, desc = 'iteration'):
        model.train_step(epoch, i)
  

In [None]:


accents = {
    'Australia':'com.au','United Kingdom':'co.uk', 'United States':'com', 'Canada':'ca', 'India':'co.in', 'Ireland':'ie', 'South Africa':'co.za'
}

#@title Make clips
#@markdown This cell joins generated images to videoclip for each sentence. Then merge them

#@markdown FPS (doesn't work with audio)
fps = 1 #@param {type:"integer"}

#@markdown Add voice (fps will be ignored)
add_voice = True #@param {type:"boolean"}
#@markdown Language (currently this notebook supports only English)
lang = 'en' #@param ['en']
#@markdown Accent (works for English language only)
acc = 'United Kingdom' #@param ['Australia','United Kingdom', 'United States', 'Canada', 'India', 'Ireland', 'South Africa']

Path("/content/vids").mkdir(parents=True, exist_ok=True)
# clips = []
for i in trange(len(txt_splitted), desc = 'clips'):
  if Path(f"/content/text_vis/{i:06}").is_dir():
    images_list = sorted(glob.glob(f"/content/text_vis/{i:06}/*.png"))
    clip = mpy.ImageSequenceClip(images_list, fps=fps)
    if add_voice:
      tts = gTTS(txt_splitted[i], lang=lang, tld=accents[acc])
      tts.save(f"/content/vids/{i:06}.mp3")
      audio = mpy.AudioFileClip(f"/content/vids/{i:06}.mp3")
      clip = clip.set_duration(audio.duration)
      clip = clip.set_audio(audio)
      # clip.audio = mpy.CompositeAudioClip([audio])
    clip.write_videofile(f"/content/vids/{i:06}.mp4",temp_audiofile="tempaudio.m4a",codec="libx264",remove_temp=False,audio_codec='aac')
    # clips.append(clip)

# final = mpy.concatenate_videoclips(clips)
# final.write_videofile("/content/result.mp4")





In [None]:
#@title Concatinate videos
#@markdown The result video will be in **output.mp4**
%cd /content/vids
!find *.mp4 | sed 's:\ :\\\ :g'| sed 's/^/file /' > fl.txt; ffmpeg -f concat -i fl.txt -c copy ../output.mp4; rm fl.txt
