In [1]:
import os
import pandas as pd
import numpy as np
from google.colab import drive
from IPython.display import clear_output

drive.mount('/content/gdrive')

code_folder = '/content/gdrive/MyDrive/AI Music Visuals Share/New Codes'
output_basedir = os.path.join(code_folder, 'output_latent_noise')

fp = os.path.join(code_folder, 'prompts_excel.xlsx')

df_prompt = pd.read_excel(fp, 'prompts', index_col=0)
df_noise = pd.read_excel(fp, 'noise', dtype={'seed': str})
df_noise

Mounted at /content/gdrive


Unnamed: 0,name,seed,duration
0,rainbow spiral wave0,1011141381797677,10
1,vine cave,7078380737840840,10
2,falling in cave,5451964648290255,10
3,overgrown fantasy,5618257995059877,10
4,fractal vines,2922127385296522,10
5,red cave1,4866412378884770,10
6,succulent,5842234013956479,10


In [2]:
%%capture
!pip install diffusers
!pip install transformers scipy ftfy accelerate

!pip install -U --no-deps stable_diffusion_videos # stable_diffusion_videos wants old version of diffusers which doesn't allow for text embeddings
!pip install realesrgan av

from stable_diffusion_videos import make_video_pyav
import torch
from diffusers import StableDiffusionPipeline

In [3]:
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4",
                                               torch_dtype=torch.float16,
                                               safety_checker=None
                                               )  


pipe = pipe.to("cuda")


Downloading (…)ain/model_index.json:   0%|          | 0.00/543 [00:00<?, ?B/s]

Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/492M [00:00<?, ?B/s]

Downloading (…)rocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

Downloading (…)nfig-checkpoint.json:   0%|          | 0.00/209 [00:00<?, ?B/s]

Downloading (…)cheduler_config.json:   0%|          | 0.00/313 [00:00<?, ?B/s]

Downloading (…)_encoder/config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

Downloading (…)_checker/config.json:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

Downloading (…)tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

Downloading (…)_pytorch_model.bin";:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

Downloading (…)b28/unet/config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

Downloading (…)tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

Downloading (…)0b28/vae/config.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Downloading (…)_pytorch_model.bin";:   0%|          | 0.00/335M [00:00<?, ?B/s]



In [4]:
def make_latent_steps(start_latent, stop_latent, steps):
    delta_latent = (stop_latent - start_latent)/float(steps)
    latent_steps = [start_latent + delta_latent*i for i in range(steps + 1)]

    #Check that start and end values are equal to targets within rounding errors
    # assert torch.isclose(latent_steps[0], from_latent, atol=1e-4).all()
    # assert torch.isclose(latent_steps[-1], to_latent, atol=1e-2).all()

    return latent_steps

width = 512
height = 512

latent_width = width // 8
latent_height = height // 8

generator = torch.Generator(device="cuda")

In [5]:
skip_existing = False

max_seed_characters = 4 # Take the first few numbers of the seed for the name
num_interpolation_steps = 3
num_inference_steps = 5

# scales = [0.2,0.2,0.2,0.2,0.2,0.25,0.3,0.5,0.5,0.6,1]
scales = [0.2, 0.2, 0.2]
num_images = len(scales)

for i, row in df_noise.iterrows():

    prompt_name = row['name']
    guidance_scale = float(df_prompt['guidance_scale'][prompt_name])
    seed = int(row['seed'])
    duration = row['duration']

    output_name = "{}_{}".format(prompt_name, seed)
    output_dir = os.path.join(output_basedir, output_name)

    if os.path.exists(output_dir):
        if skip_existing:
            print("{} already exists, skipping".format(output_name))
            continue
        else:
            print("{} already exists, deleting images".format(output_name))
            for fn in os.listdir(output_dir):
              os.remove(os.path.join(output_dir, fn))
    else:
        os.makedirs(output_dir)

    fps = (num_interpolation_steps*num_images)/duration

    # Generate Initial Latent

    generator.manual_seed(seed)

    latent_choose = torch.randn(
    (1, pipe.unet.in_channels, latent_height, latent_width),
    generator = generator,
    device = "cuda"
        )

    j=0 

    current_latent = latent_choose

    for i in range(num_images):

      latent_noise = torch.randn(
        (1, pipe.unet.in_channels, latent_height, latent_width),
        device = "cuda"
      )

      scale = scales[i]
      target_latent = latent_choose + latent_noise*scale

      latent_steps = make_latent_steps(current_latent, target_latent, num_interpolation_steps)

      for latent_step in latent_steps:

        j += 1

        
        print("{}: Making image {} out of {} within batch {}".format(output_name, j, num_images*len(latent_steps), i))

        with torch.autocast("cuda"):
            images = pipe(
                [prompt_name],
                guidance_scale=guidance_scale,
                latents = latent_step,
                num_inference_steps = num_inference_steps
            )

        output_image = images.images[0]

        # for i, image in enumerate(images.images):
        output_image.save(os.path.join(output_dir, '{0:04d}.png'.format(j)))

        clear_output(wait=True)

      current_latent = target_latent



    make_video_pyav(output_dir, 
                    output_filepath=os.path.join(output_basedir, '{}.mp4'.format(output_name)),
                    fps=fps
                    )



succulent_5842234013956479: Making image 12 out of 12 within batch 2


  0%|          | 0/5 [00:00<?, ?it/s]