In [9]:
import os
import pandas as pd
import numpy as np
from google.colab import drive
from IPython.display import clear_output

drive.mount('/content/gdrive')

code_folder = '/content/gdrive/MyDrive/AI Music Visuals Share/New Codes'
output_basedir = os.path.join(code_folder, 'output_latent_noise')

fp = os.path.join(code_folder, 'prompts_excel.xlsx')

df_prompt = pd.read_excel(fp, 'prompts', index_col=0)
df_noise = pd.read_excel(fp, 'noise', dtype={'seed': str})

df_noise = df_noise.where(df_noise['compute'] == 'y').dropna(how='all')
df_noise

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


Unnamed: 0,name,seed,duration,compute
7,red cave2,1488523408897636,20.0,y


In [3]:
%%capture
!pip install diffusers
!pip install transformers scipy ftfy accelerate

!pip install -U --no-deps stable_diffusion_videos # stable_diffusion_videos wants old version of diffusers which doesn't allow for text embeddings
!pip install realesrgan av

from stable_diffusion_videos import make_video_pyav
import torch
from diffusers import StableDiffusionPipeline

In [4]:
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4",
                                               torch_dtype=torch.float16,
                                               safety_checker=None
                                               )  


pipe = pipe.to("cuda")


Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]



In [5]:
def make_latent_steps(start_latent, stop_latent, steps):
    delta_latent = (stop_latent - start_latent)/float(steps)
    latent_steps = [start_latent + delta_latent*i for i in range(steps + 1)]

    #Check that start and end values are equal to targets within rounding errors
    # assert torch.isclose(latent_steps[0], from_latent, atol=1e-4).all()
    # assert torch.isclose(latent_steps[-1], to_latent, atol=1e-2).all()

    return latent_steps

width = 512
height = 512

latent_width = width // 8
latent_height = height // 8

generator = torch.Generator(device="cuda")

In [11]:
skip_existing = False

max_seed_characters = 4 # Take the first few numbers of the seed for the name
num_interpolation_steps = 50
num_inference_steps = 50

# scales = [0.2,0.2,0.2,0.2,0.2,0.25,0.3,0.5,0.5,0.6,1]
scales = [0.2, 0.2, 0.2]
num_images = len(scales)

for i, row in df_noise.iterrows():

    prompt_name = row['name']
    guidance_scale = float(df_prompt['guidance_scale'][prompt_name])
    seed = int(row['seed'])
    duration = row['duration']

    output_name = "{}_{}".format(prompt_name, seed)
    output_dir = os.path.join(output_basedir, output_name)

    if os.path.exists(output_dir):
        if skip_existing:
            print("{} already exists, skipping".format(output_name))
            continue
        else:
            print("{} already exists, deleting images".format(output_name))
            for fn in os.listdir(output_dir):
              os.remove(os.path.join(output_dir, fn))
    else:
        os.makedirs(output_dir)

    fps = (num_interpolation_steps*num_images)/duration

    # Generate Initial Latent

    generator.manual_seed(seed)

    latent_choose = torch.randn(
    (1, pipe.unet.in_channels, latent_height, latent_width),
    generator = generator,
    device = "cuda"
        )

    j=0 

    current_latent = latent_choose

    for i in range(num_images):

      latent_noise = torch.randn(
        (1, pipe.unet.in_channels, latent_height, latent_width),
        device = "cuda"
      )

      scale = scales[i]
      target_latent = latent_choose + latent_noise*scale

      latent_steps = make_latent_steps(current_latent, target_latent, num_interpolation_steps)

      # We don't do the last one to avoid doubling up. 
      for latent_step_number in range(1, len(latent_steps)):
        latent_step = latent_steps[latent_step_number]

        j += 1

        
        print("{}: Making image {} out of {} within batch {}".format(output_name, j, num_images*len(latent_steps), i))

        with torch.autocast("cuda"):
            images = pipe(
                [prompt_name],
                guidance_scale=guidance_scale,
                latents = latent_step,
                num_inference_steps = num_inference_steps
            )

        output_image = images.images[0]

        # for i, image in enumerate(images.images):
        output_number_string = str(j).zfill(6)
        output_image.save(os.path.join(output_dir, 'frame{}.png'.format(output_number_string)))

        clear_output(wait=True)

      current_latent = target_latent



    make_video_pyav(output_dir, 
                    output_filepath=os.path.join(output_basedir, '{}.mp4'.format(output_name)),
                    fps=fps
                    )



red cave2_1488523408897636: Making image 150 out of 153 within batch 2


  0%|          | 0/50 [00:00<?, ?it/s]