In [None]:
# Textual Inversion Training 
# -----------------------------------
# This notebook was built for stable diffusion fine-tuning
# It takes 3-8 training image urls as it's primary input
# Additionally it can take any number of text prompts
# It outputs any specified number of images
# Output images are saved in Google Drive
# -----------------------------------
# You can run as a Google Colab Notebook
# Not 100% cleaned up, definitely a work in progress

In [None]:
# Resources / Reference Notebooks
# -----------------------------------
# https://huggingface.co/docs/diffusers/training/text_inversion
# https://github.com/fastai/diffusion-nbs/blob/master/Stable%20Diffusion%20Deep%20Dive.ipynb
# https://www.youtube.com/watch?v=_7rMfsA24Ls
# https://huggingface.co/
# https://www.fast.ai/

In [1]:
# Training Images URL's
# -----------------------------------
# Paste in urls for the images you want to train your new concept on.
# These could be images that inspire you or images of your artwork etc.
# Upload your own images to https://imgur.com/ for urls
# 5-8 images is usually adequate for a training.
# Follow the format below.

# urls = [
#   "https://i.imgur.com/xNZW4TW.jpg",
#   "https://i.imgur.com/9NMokla.jpg",
#   "https://i.imgur.com/A4p0djw.jpg",
#   "https://i.imgur.com/ocYQtxp.jpg",
#   "https://i.imgur.com/CuchxlL.jpg",
#   "https://i.imgur.com/9uDvP2j.jpg",
#   "https://i.imgur.com/63W9Cyv.jpg",
# ]

urls = [] # <----- paste your image urls here

In [None]:
# Text Prompts
# -----------------------------------
# Add as many text prompts as desired.
# All text prompts will be run after initial training.
# Output images will go to your specified Google Drive folder.
# All prompts will be appended with "in the style of <your-model>"
# More prompts can be given later.

prompts = []

In [None]:
# Setup Information / Variables
# -----------------------------------
# [needs refactoring]


# Variables for model outputs...
images_per_prompt = 5
total_output_images = 175
ahx_model_number = 11

what_to_teach = "style" # ["object", "style"]
# `initializer_token` is a word that can summarise what your new concept is, to be used as a starting point
initializer_token = "painting"



# ------ DON'T CHANGE THESE ----------------

# Variables to save new model to hugging face...
save_concept_to_public_library = True
name_of_your_concept = f"ahx-model-{ahx_model_number}"
name_of_your_concept_dup = f"{name_of_your_concept}" # temporary for sanity check before uploading to concept library
hf_token_write = "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"

# Mount log in to google drive to save images...
from google.colab import drive
drive.mount('/content/drive') # <-- shouldn't change
root_path = '/content/drive/My Drive' # <-- shouldn't change

# Make your folder in Google Drive and define here...
your_path = f'/stable-diffusion/model-{ahx_model_number}-bulk' # <-- your folder
google_drive_path = f'{root_path}{your_path}'

# `placeholder_token` is the token you are going to use to represent your new concept (so when you prompt the model, you will say "A `<my-placeholder-token>` in an amusement park"). We use angle brackets to differentiate a token from other words/tokens, to avoid collision.
placeholder_token = f"<{name_of_your_concept}>" # {type:"string"}

In [None]:
# Library Installations
# -----------------------------------
# [boiler plate / needs refactoring]

!pip install -U -qq git+https://github.com/huggingface/diffusers.git
!pip install -qq accelerate transformers ftfy
!pip install -qq "ipywidgets>=7,<8"

In [None]:
# Memory Optimization
# -----------------------------------
# [boiler plate / needs refactoring]


!pip install -U --pre triton

from subprocess import getoutput
from IPython.display import HTML
from IPython.display import clear_output
import time

s = getoutput('nvidia-smi')
if 'T4' in s:
  gpu = 'T4'
elif 'P100' in s:
  gpu = 'P100'
elif 'V100' in s:
  gpu = 'V100'
elif 'A100' in s:
  gpu = 'A100'

while True:
    try: 
        gpu=='T4'or gpu=='P100'or gpu=='V100'or gpu=='A100'
        break
    except:
        pass
    print('[1;31mit seems that your GPU is not supported at the moment')
    time.sleep(5)

if (gpu=='T4'):
  %pip install -q https://github.com/TheLastBen/fast-stable-diffusion/raw/main/precompiled/T4/xformers-0.0.13.dev0-py3-none-any.whl
  
elif (gpu=='P100'):
  %pip install -q https://github.com/TheLastBen/fast-stable-diffusion/raw/main/precompiled/P100/xformers-0.0.13.dev0-py3-none-any.whl

elif (gpu=='V100'):
  %pip install -q https://github.com/TheLastBen/fast-stable-diffusion/raw/main/precompiled/V100/xformers-0.0.13.dev0-py3-none-any.whl

elif (gpu=='A100'):
  %pip install -q https://github.com/TheLastBen/fast-stable-diffusion/raw/main/precompiled/A100/xformers-0.0.13.dev0-py3-none-any.whl

In [None]:
# Hugging Face Login
# -----------------------------------
# [boiler plate / not needed]

from huggingface_hub import notebook_login
notebook_login()

In [13]:
# Import Libraries
# -----------------------------------
# [boiler plate / could use explanatory notes]


import argparse
import itertools
import random
import math
import os

import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from torch.utils.data import Dataset

import PIL
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from diffusers import AutoencoderKL, DDPMScheduler, PNDMScheduler, StableDiffusionPipeline, UNet2DConditionModel
from diffusers.optimization import get_scheduler
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
from PIL import Image
from torchvision import transforms
from tqdm.auto import tqdm
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer

ModuleNotFoundError: No module named 'numpy'

In [14]:
# Image Grid Function 
# -----------------------------------
# [boiler plate / could use refactor]


def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size
    
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

In [17]:
# Stable Diffusion Version 
# -----------------------------------

pretrained_model_name_or_path = "stabilityai/stable-diffusion-2" 

# other options are...
# "stabilityai/stable-diffusion-2"
# "stabilityai/stable-diffusion-2-base"
# "CompVis/stable-diffusion-v1-4"
# "runwayml/stable-diffusion-v1-5"

In [18]:
# Setup / Check Training Images
# -----------------------------------
# [boiler plate / could use refactor]


import requests
import glob
from io import BytesIO
import matplotlib.pyplot as plt

def download_image(url):
  try:
    response = requests.get(url)
  except:
    return None
  return Image.open(BytesIO(response.content)).convert("RGB")

images = list(filter(None,[download_image(url) for url in urls]))
save_path = "./my_concept"
if not os.path.exists(save_path):
  os.mkdir(save_path)
[image.save(f"{save_path}/{i}.jpeg") for i, image in enumerate(images)]
grid = image_grid(images, 1, len(images))

plt.figure(figsize=(9, 9))
plt.axis('off')
plt.imshow(grid)

In [20]:
# Prompt Templates for Training
# -----------------------------------
# [boiler plate / could use refactor]


imagenet_templates_small = [
    "a photo of a {}",
    "a rendering of a {}",
    "a cropped photo of the {}",
    "the photo of a {}",
    "a photo of a clean {}",
    "a photo of a dirty {}",
    "a dark photo of the {}",
    "a photo of my {}",
    "a photo of the cool {}",
    "a close-up photo of a {}",
    "a bright photo of the {}",
    "a cropped photo of a {}",
    "a photo of the {}",
    "a good photo of the {}",
    "a photo of one {}",
    "a close-up photo of the {}",
    "a rendition of the {}",
    "a photo of the clean {}",
    "a rendition of a {}",
    "a photo of a nice {}",
    "a good photo of a {}",
    "a photo of the nice {}",
    "a photo of the small {}",
    "a photo of the weird {}",
    "a photo of the large {}",
    "a photo of a cool {}",
    "a photo of a small {}",
]

imagenet_style_templates_small = [
    "a painting in the style of {}",
    "a rendering in the style of {}",
    "a cropped painting in the style of {}",
    "the painting in the style of {}",
    "a clean painting in the style of {}",
    "a dirty painting in the style of {}",
    "a dark painting in the style of {}",
    "a picture in the style of {}",
    "a cool painting in the style of {}",
    "a close-up painting in the style of {}",
    "a bright painting in the style of {}",
    "a cropped painting in the style of {}",
    "a good painting in the style of {}",
    "a close-up painting in the style of {}",
    "a rendition in the style of {}",
    "a nice painting in the style of {}",
    "a small painting in the style of {}",
    "a weird painting in the style of {}",
    "a large painting in the style of {}",
]

In [None]:
# Setup Dataset
# -----------------------------------
# [boiler plate / could use refactor]


class TextualInversionDataset(Dataset):
    def __init__(
        self,
        data_root,
        tokenizer,
        learnable_property="object",  # [object, style]
        size=512,
        repeats=100,
        interpolation="bicubic",
        flip_p=0.5,
        set="train",
        placeholder_token="*",
        center_crop=False,
    ):

        self.data_root = data_root
        self.tokenizer = tokenizer
        self.learnable_property = learnable_property
        self.size = size
        self.placeholder_token = placeholder_token
        self.center_crop = center_crop
        self.flip_p = flip_p

        self.image_paths = [os.path.join(self.data_root, file_path) for file_path in os.listdir(self.data_root)]

        self.num_images = len(self.image_paths)
        self._length = self.num_images

        if set == "train":
            self._length = self.num_images * repeats

        self.interpolation = {
            "linear": PIL.Image.LINEAR,
            "bilinear": PIL.Image.BILINEAR,
            "bicubic": PIL.Image.BICUBIC,
            "lanczos": PIL.Image.LANCZOS,
        }[interpolation]

        self.templates = imagenet_style_templates_small if learnable_property == "style" else imagenet_templates_small
        self.flip_transform = transforms.RandomHorizontalFlip(p=self.flip_p)

    def __len__(self):
        return self._length

    def __getitem__(self, i):
        example = {}
        image = Image.open(self.image_paths[i % self.num_images])

        if not image.mode == "RGB":
            image = image.convert("RGB")

        placeholder_string = self.placeholder_token
        text = random.choice(self.templates).format(placeholder_string)

        example["input_ids"] = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.tokenizer.model_max_length,
            return_tensors="pt",
        ).input_ids[0]

        # default to score-sde preprocessing
        img = np.array(image).astype(np.uint8)

        if self.center_crop:
            crop = min(img.shape[0], img.shape[1])
            h, w, = (
                img.shape[0],
                img.shape[1],
            )
            img = img[(h - crop) // 2 : (h + crop) // 2, (w - crop) // 2 : (w + crop) // 2]

        image = Image.fromarray(img)
        image = image.resize((self.size, self.size), resample=self.interpolation)

        image = self.flip_transform(image)
        image = np.array(image).astype(np.uint8)
        image = (image / 127.5 - 1.0).astype(np.float32)

        example["pixel_values"] = torch.from_numpy(image).permute(2, 0, 1)
        return example

In [22]:
# Load Tokenizer / Add Placeholder
# -----------------------------------
# This section loads the tokenizer and add the placeholder token as a additional special token
# This is boiler plate and could use refactoring and explanatory notes


tokenizer = CLIPTokenizer.from_pretrained(
    pretrained_model_name_or_path,
    subfolder="tokenizer",
)

# Add the placeholder token in tokenizer
num_added_tokens = tokenizer.add_tokens(placeholder_token)
if num_added_tokens == 0:
    raise ValueError(
        f"The tokenizer already contains the token {placeholder_token}. Please pass a different"
        " `placeholder_token` that is not already in the tokenizer."
    )

NameError: name 'CLIPTokenizer' is not defined

In [None]:
# Get Token Ids
# -----------------------------------
# This code will raise an error if the initializer string is not a single token
# It then converts the initializer token and the placeholder token to ids
# Clarification is needed on what the initializer token, placeholder token and ids are
# This is boiler plate and needs explanatory notes

token_ids = tokenizer.encode(initializer_token, add_special_tokens=False)

if len(token_ids) > 1:
    raise ValueError("The initializer token must be a single token.")

initializer_token_id = token_ids[0]
placeholder_token_id = tokenizer.convert_tokens_to_ids(placeholder_token)

In [None]:
# Load the Stable Diffusion Model
# -----------------------------------


#@title Load the Stable Diffusion model
# Load models and create wrapper for stable diffusion
# pipeline = StableDiffusionPipeline.from_pretrained(pretrained_model_name_or_path)
# del pipeline
text_encoder = CLIPTextModel.from_pretrained(
    pretrained_model_name_or_path, subfolder="text_encoder"
)
vae = AutoencoderKL.from_pretrained(
    pretrained_model_name_or_path, subfolder="vae"
)
unet = UNet2DConditionModel.from_pretrained(
    pretrained_model_name_or_path, subfolder="unet"
)

In [None]:
# Resize Token Embeddings
# -----------------------------------
# We have added the placeholder_token in the tokenizer
# We resize the token embeddings here for a new embedding vector in the token embeddings for our placeholder_token
# This is boiler plate and needs further explanatory notes

text_encoder.resize_token_embeddings(len(tokenizer))

In [None]:
# Initialize Placeholder Token
# -----------------------------------
# This section initializes the new added placeholder token
# This is initialized with the embeddings of the initializer token
# This is boiler plate code and needs further explanatory notes

token_embeds = text_encoder.get_input_embeddings().weight.data
token_embeds[placeholder_token_id] = token_embeds[initializer_token_id]

In [None]:
# Freeze Model Parameters
# -----------------------------------
# We are only training the newly added embedding vector
# So we freeze the rest of the model parameters
# This is boiler plate and needs more notes

def freeze_params(params):
    for param in params:
        param.requires_grad = False

# This freezes vae and unet...
freeze_params(vae.parameters())
freeze_params(unet.parameters())

# This freezes all the parameters except for the token embeddings in text encoder...
params_to_freeze = itertools.chain(
    text_encoder.text_model.encoder.parameters(),
    text_encoder.text_model.final_layer_norm.parameters(),
    text_encoder.text_model.embeddings.position_embedding.parameters(),
)
freeze_params(params_to_freeze)

In [None]:
# Create the Training Data
# -----------------------------------
# This is also boiler plate and needs more notes
# The noise scheduler in particular might be a good section to play with

# First we create the dataset and the dataloader...
train_dataset = TextualInversionDataset(
      data_root=save_path,
      tokenizer=tokenizer,
      size=vae.sample_size,
      placeholder_token=placeholder_token,
      repeats=100,
      learnable_property=what_to_teach, #Option selected above between object and style
      center_crop=False,
      set="train",
)

def create_dataloader(train_batch_size=1):
    return torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True) 

# Now we create the noise scheduler for the training...
noise_scheduler = DDPMScheduler.from_config(pretrained_model_name_or_path, subfolder="scheduler")

In [None]:
# The Actual Training
# -----------------------------------
# This section is boiler plate but has a lot of potential
# There are a lot of parameters that could be tweaked
# Good potential user inputs in a gradio interface

# First we define our hyperparameters for the training
# Tuning the learning_rate and the max_train_steps can improve results

# Setting up the training arguments / hyperparameters...
hyperparameters = {
    "learning_rate": 5e-04,
    "scale_lr": True,
    "max_train_steps": 2000,
    "save_steps": 250,
    "train_batch_size": 4,
    "gradient_accumulation_steps": 1,
    "gradient_checkpointing": True,
    "mixed_precision": "fp16",
    "seed": 42,
    "output_dir": "sd-concept-output"
}
!mkdir -p sd-concept-output

# And then the actual training function...
logger = get_logger(__name__)

def save_progress(text_encoder, placeholder_token_id, accelerator, save_path):
    logger.info("Saving embeddings")
    learned_embeds = accelerator.unwrap_model(text_encoder).get_input_embeddings().weight[placeholder_token_id]
    learned_embeds_dict = {placeholder_token: learned_embeds.detach().cpu()}
    torch.save(learned_embeds_dict, save_path)

def training_function(text_encoder, vae, unet):
    train_batch_size = hyperparameters["train_batch_size"]
    gradient_accumulation_steps = hyperparameters["gradient_accumulation_steps"]
    learning_rate = hyperparameters["learning_rate"]
    max_train_steps = hyperparameters["max_train_steps"]
    output_dir = hyperparameters["output_dir"]
    gradient_checkpointing = hyperparameters["gradient_checkpointing"]

    accelerator = Accelerator(
        gradient_accumulation_steps=gradient_accumulation_steps,
        mixed_precision=hyperparameters["mixed_precision"]
    )

    if gradient_checkpointing:
        text_encoder.gradient_checkpointing_enable()
        unet.enable_gradient_checkpointing()

    train_dataloader = create_dataloader(train_batch_size)

    if hyperparameters["scale_lr"]:
        learning_rate = (
            learning_rate * gradient_accumulation_steps * train_batch_size * accelerator.num_processes
        )

    # Initialize the optimizer
    optimizer = torch.optim.AdamW(
        text_encoder.get_input_embeddings().parameters(),  # only optimize the embeddings
        lr=learning_rate,
    )

    text_encoder, optimizer, train_dataloader = accelerator.prepare(
        text_encoder, optimizer, train_dataloader
    )

    weight_dtype = torch.float32
    if accelerator.mixed_precision == "fp16":
        weight_dtype = torch.float16
    elif accelerator.mixed_precision == "bf16":
        weight_dtype = torch.bfloat16

    # Move vae and unet to device
    vae.to(accelerator.device, dtype=weight_dtype)
    unet.to(accelerator.device, dtype=weight_dtype)

    # Keep vae in eval mode as we don't train it
    vae.eval()
    # Keep unet in train mode to enable gradient checkpointing
    unet.train()

    
    # We need to recalculate our total training steps as the size of the training dataloader may have changed.
    num_update_steps_per_epoch = math.ceil(len(train_dataloader) / gradient_accumulation_steps)
    num_train_epochs = math.ceil(max_train_steps / num_update_steps_per_epoch)

    # Train!
    total_batch_size = train_batch_size * accelerator.num_processes * gradient_accumulation_steps

    logger.info("***** Running training *****")
    logger.info(f"  Num examples = {len(train_dataset)}")
    logger.info(f"  Instantaneous batch size per device = {train_batch_size}")
    logger.info(f"  Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}")
    logger.info(f"  Gradient Accumulation steps = {gradient_accumulation_steps}")
    logger.info(f"  Total optimization steps = {max_train_steps}")
    # Only show the progress bar once on each machine.
    progress_bar = tqdm(range(max_train_steps), disable=not accelerator.is_local_main_process)
    progress_bar.set_description("Steps")
    global_step = 0

    for epoch in range(num_train_epochs):
        text_encoder.train()
        for step, batch in enumerate(train_dataloader):
            with accelerator.accumulate(text_encoder):
                # Convert images to latent space
                latents = vae.encode(batch["pixel_values"].to(dtype=weight_dtype)).latent_dist.sample().detach()
                latents = latents * 0.18215

                # Sample noise that we'll add to the latents
                noise = torch.randn_like(latents)
                bsz = latents.shape[0]
                # Sample a random timestep for each image
                timesteps = torch.randint(0, noise_scheduler.num_train_timesteps, (bsz,), device=latents.device).long()

                # Add noise to the latents according to the noise magnitude at each timestep
                # (this is the forward diffusion process)
                noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)

                # Get the text embedding for conditioning
                encoder_hidden_states = text_encoder(batch["input_ids"])[0]

                # Predict the noise residual
                noise_pred = unet(noisy_latents, timesteps, encoder_hidden_states.to(weight_dtype)).sample

                 # Get the target for loss depending on the prediction type
                if noise_scheduler.config.prediction_type == "epsilon":
                    target = noise
                elif noise_scheduler.config.prediction_type == "v_prediction":
                    target = noise_scheduler.get_velocity(latents, noise, timesteps)
                else:
                    raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")

                loss = F.mse_loss(noise_pred, target, reduction="none").mean([1, 2, 3]).mean()
                accelerator.backward(loss)

                # Zero out the gradients for all token embeddings except the newly added
                # embeddings for the concept, as we only want to optimize the concept embeddings
                if accelerator.num_processes > 1:
                    grads = text_encoder.module.get_input_embeddings().weight.grad
                else:
                    grads = text_encoder.get_input_embeddings().weight.grad
                # Get the index for tokens that we want to zero the grads for
                index_grads_to_zero = torch.arange(len(tokenizer)) != placeholder_token_id
                grads.data[index_grads_to_zero, :] = grads.data[index_grads_to_zero, :].fill_(0)

                optimizer.step()
                optimizer.zero_grad()

            # Checks if the accelerator has performed an optimization step behind the scenes
            if accelerator.sync_gradients:
                progress_bar.update(1)
                global_step += 1
                if global_step % hyperparameters["save_steps"] == 0:
                    save_path = os.path.join(output_dir, f"learned_embeds-step-{global_step}.bin")
                    save_progress(text_encoder, placeholder_token_id, accelerator, save_path)

            logs = {"loss": loss.detach().item()}
            progress_bar.set_postfix(**logs)

            if global_step >= max_train_steps:
                break

        accelerator.wait_for_everyone()


    # Create the pipeline using using the trained modules and save it.
    if accelerator.is_main_process:
        pipeline = StableDiffusionPipeline.from_pretrained(
            pretrained_model_name_or_path,
            text_encoder=accelerator.unwrap_model(text_encoder),
            tokenizer=tokenizer,
            vae=vae,
            unet=unet,
        )
        pipeline.save_pretrained(output_dir)
        # Also save the newly trained embeddings
        save_path = os.path.join(output_dir, f"learned_embeds.bin")
        save_progress(text_encoder, placeholder_token_id, accelerator, save_path)

import accelerate # <--- adds timestamps
accelerate.notebook_launcher(training_function, args=(text_encoder, vae, unet))

for param in itertools.chain(unet.parameters(), text_encoder.parameters()):
  if param.grad is not None:
    del param.grad # <--- frees some memory
  torch.cuda.empty_cache()

In [None]:
# Save the New Concept
# -----------------------------------
# This adds the newly trained concept to hugging face's library of concepts
# This makes it possible to reload later for subsequent prompting sessions
# It's all boiler plate and could probably be refactored

if(save_concept_to_public_library):
  from slugify import slugify
  from huggingface_hub import HfApi, HfFolder, CommitOperationAdd
  from huggingface_hub import create_repo
  repo_id = f"sd-concepts-library/{slugify(name_of_your_concept)}"
  output_dir = hyperparameters["output_dir"]
  if(not hf_token_write):
    with open(HfFolder.path_token, 'r') as fin: hf_token = fin.read();
  else:
    hf_token = hf_token_write
  #Join the Concepts Library organization if you aren't part of it already
  !curl -X POST -H 'Authorization: Bearer '$hf_token -H 'Content-Type: application/json' https://huggingface.co/organizations/sd-concepts-library/share/VcLXJtzwwxnHYCkNMLpSJCdnNFZHQwWywv
  images_upload = os.listdir("my_concept")
  image_string = ""
  repo_id = f"sd-concepts-library/{slugify(name_of_your_concept)}"
  for i, image in enumerate(images_upload):
      image_string = f'''{image_string}![{placeholder_token} {i}](https://huggingface.co/{repo_id}/resolve/main/concept_images/{image})
'''
  if(what_to_teach == "style"):
      what_to_teach_article = f"a `{what_to_teach}`"
  else:
      what_to_teach_article = f"an `{what_to_teach}`"
  readme_text = f'''---
license: mit
---
### {name_of_your_concept} on Stable Diffusion
This is the `{placeholder_token}` concept taught to Stable Diffusion via Textual Inversion. You can load this concept into the [Stable Conceptualizer](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_conceptualizer_inference.ipynb) notebook. You can also train your own concepts and load them into the concept libraries using [this notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_textual_inversion_training.ipynb).

Here is the new concept you will be able to use as {what_to_teach_article}:
{image_string}
'''
  #Save the readme to a file
  readme_file = open("README.md", "w")
  readme_file.write(readme_text)
  readme_file.close()
  #Save the token identifier to a file
  text_file = open("token_identifier.txt", "w")
  text_file.write(placeholder_token)
  text_file.close()
  #Save the type of teached thing to a file
  type_file = open("type_of_concept.txt","w")
  type_file.write(what_to_teach)
  type_file.close()
  operations = [
    CommitOperationAdd(path_in_repo="learned_embeds.bin", path_or_fileobj=f"{output_dir}/learned_embeds.bin"),
    CommitOperationAdd(path_in_repo="token_identifier.txt", path_or_fileobj="token_identifier.txt"),
    CommitOperationAdd(path_in_repo="type_of_concept.txt", path_or_fileobj="type_of_concept.txt"),
    CommitOperationAdd(path_in_repo="README.md", path_or_fileobj="README.md"),
  ]
  # create_repo(repo_id,private=True, token="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")
  print("--------------------->", hf_token)
  create_repo(repo_id,private=True, token=hf_token)
  api = HfApi()
  api.create_commit(
    repo_id=repo_id,
    operations=operations,
    commit_message=f"Upload the concept {name_of_your_concept} embeds and token",
    token=hf_token
  )
  api.upload_folder(
    folder_path=save_path,
    path_in_repo="concept_images",
    repo_id=repo_id,
    token=hf_token
  )

In [None]:
# Set Up the Pipeline
# -----------------------------------
# Once again this is boiler plate and should have notes added

from diffusers import DPMSolverMultistepScheduler
pipe = StableDiffusionPipeline.from_pretrained(
    hyperparameters["output_dir"],
    scheduler=DPMSolverMultistepScheduler.from_pretrained(hyperparameters["output_dir"], subfolder="scheduler"),
    torch_dtype=torch.float16,
).to("cuda")

In [None]:
# Saving Output Images 
# -----------------------------------
# This function adds output images to Google Drive
# This was a custom addition but could still use notes / refactoring

import cv2
import numpy as np

def add_image_to_drive(input_img, img_name):
  image_to_add = cv2.cvtColor(np.array(input_img), cv2.COLOR_BGR2RGB)
  image_name = f'{img_name}.png'
  cv2.imwrite(f"{google_drive_path}/{image_name}", image_to_add)

In [None]:
# Run the Stable Diffusion Pipeline
# -----------------------------------
# This section runs the text-to-image pipeline
# It keeps a count and iterates through all the user's text prompts
# It produces multiple images for each prompt as specified with random seeds
# After the image is produced it gets saved to the specified Google Drive folder
# The prompt and image number are saved as the files name
# This is modified but needs more refactoring and notes

# Set a cumulative count for entire session...
count = 0

# Check model name before making images so nothing on gets overridden on Google Drive ...
if name_of_your_concept != name_of_your_concept_dup:
  raise Exception("Something is wrong with your concept name, see above.")

for prompt in prompts:
  full_prompt = f'{prompt} in the style of <{name_of_your_concept}>'
  for i in range(images_per_prompt):
    image = pipe(full_prompt, num_inference_steps=30, guidance_scale=7.5).images
    file_name = prompt.replace(" ", "-") + f"-in-the-style-of-{name_of_your_concept}--{i+1}"
    add_image_to_drive(image[0], file_name)
    count += 1
    print(f"{file_name} added -- {count}/{total_output_images}")

In [None]:
# Additional Images
# -----------------------------------
# If the desired image count hasn't been reached yet more are made
# These are essentially empty prompts in the style of the new concept
# Often these produce the most interesting results
# Could definitely use cleaning up

i = 1
while count < total_output_images:
    prompt = f"<{name_of_your_concept}>"
    image = pipe(prompt, num_inference_steps=30, guidance_scale=7.5).images
    file_name = f"no-prompt-{name_of_your_concept}--{i}"
    add_image_to_drive(image[0], file_name)
    count += 1
    i += 1
    print(f"{file_name} added -- {count}/{total_output_images}")

In [None]:
# Supplemental Prompts
# -----------------------------------
# This is turned off by default but can be run repeatedly after training
# Section can be turned on by changing the "False" below to "True"
# Add as many additional prompts as you want
# They will all be appended with "in the style of <new-concept>"
# Set the images per prompt to the desired number as well
# The cumulative count will continue 

# This is all custom but could use refactoring


run_supplemental_prompts = True

images_per_supplemental_prompt = 5

# supplemental_prompts = [
#     "empty cities",
#     "distant forms",
# ]

supplemental_prompts = []

if run_supplemental_prompts:
  _total_supplemental_images = images_per_supplemental_prompt * len(supplemental_prompts)
  _supplemental_prompts_completed = 0

  for prompt in supplemental_prompts:
    full_prompt = f'{prompt} in the style of <{name_of_your_concept}>'
    for i in range(images_per_supplemental_prompt):
      image = pipe(full_prompt, num_inference_steps=30, guidance_scale=7.5).images
      file_name = prompt.replace(" ", "-") + f"-in-the-style-of-{name_of_your_concept}--{i+1}--{count}"
      add_image_to_drive(image[0], file_name)
      _supplemental_prompts_completed += 1
      count += 1
      print(f"{file_name} added -- {_supplemental_prompts_completed}/{_total_supplemental_images} -- {count} total")

In [None]:
# Randomized Prompts
# -----------------------------------
# This is similar to the section above but randomizes prompts
# It combines "styles" and "subjects" into randomly paired prompt strings
# It will continue 
# This is turned off by default but can be run repeatedly after training
# Section can be turned on by changing the "False" below to "True"
# Add as many additional prompts as you want
# They will all be appended with "in the style of <new-concept>"
# Set the images per prompt to the desired number as well
# The cumulative count will continue 

# This is all custom but could use refactoring


run_supplemental_prompts = False

import random

images_per_supplemental_prompt = 25

random_styles = []
random_subjects = []

if run_supplemental_prompts:
  _total_supplemental_images = images_per_supplemental_prompt
  _supplemental_prompts_completed = 0


  for i in range(images_per_supplemental_prompt):
    random_subject = random.choice(random_subjects)
    random_style = random.choice(random_styles)

    prompt = f"{random_style} of {random_subject}"
    full_prompt = f'{prompt} in the style of <{name_of_your_concept}> on a bright white background'

    image = pipe(full_prompt, num_inference_steps=30, guidance_scale=7.5).images
    file_name = prompt.replace(" ", "-") + f"-in-the-style-of-{name_of_your_concept}--{i+1}--{count}"
    add_image_to_drive(image[0], file_name)
    _supplemental_prompts_completed += 1

    count += 1

    print(f"{file_name} added -- {_supplemental_prompts_completed}/{_total_supplemental_images}")

  print("SUPPLEMENTAL PROMPT IMAGE GENERATION COMPLETE -----------------------------------")

In [None]:
# Prompt Sets
# -----------------------------------
# These are some optional ideas for prompts
# They can be copied from and added to the prompt sets above
# They should be added to freely as they aren't used anywhere in the code

_example_prompts = [
    "an abstract painting",
    "an oil painting",
    "a dark painting",
    "a light painting",
    "another world",
    "the voice",
    "voyaging",
    "winter",
    "alchemy",
    "light center",
    "white fire",
    "resurgence",
    "constellations",
    "mountains",
    "planets",
    "rocks",
    "stars",
    "abstract geometric forms",
    "abstract forms",
    "geometric forms",
    "sacred forms",
    "transcendental forms",
    "sacred visions",
    "an abstract landscape",
    "a desert landscape",
    "the ocean",
    "sunset clouds",
    "dawn clouds",
    "twilight",
    "dusk"
]

_example_styles = [
    "a photograph",
    "a photographic print",
    "a black and white photograph",
    "a sculpture",
    "a stone sculpture",
    "a drawing",
    "a pencil drawing",
    "a charcoal drawing",
    "a polaroid photograph",
    "a concrete sculpture",
    "an intaglio print"
]

_example_subjects = [
    "empty cities",
    "an empty city",
    "empty buildings",
    "an empty building",
    "an empty landscape",
    "an intaglio print",
    "a painting",
    "a painting",
    "an oil painting",
    "an oil painting",
    "a black and white photograph",
    "a polaroid photograph",
    "a pencil drawing",
    "a charcoal drawing",
    "distant vessels",
    "distant detailed forms",
    "distant intricate forms",
    "detailed forms",
    "intricate forms",
    "an abstract form",
    "an intricate abstract form",
    "an ornate abstract form",
    "a mechanical abstract form",
    "a geometric abstract form",
    "a detailed abstract form",
    "an organic abstract form",
    "an abstract form with a face",
    "an intricate abstract form with a face",
    "an ornate abstract form with a face",
    "a mechanical abstract form with a face",
    "a geometric abstract form with a face",
    "a detailed abstract form with a face",
    "an organic abstract form with a face",
    "a horse",
    "a dog",
    "a bird",
    "a cat",
    "a frog",
    "a bear",
    "a dolphin",
    "a dragon",
    "a wizard",
    "a knight",
    "a bull",
    "a cow",
    "a car",
    "a snail",
    "a crow",
    "a rabbit",
    "a penguin",
    "a pig",
    "a chicken",
    "a rat",
    "a house",
    "an abstract shape",
    "an abstract machine",
    "a human figure",
    "an alien",
    "a robot",
    "a planet",
    "a tree",
    "a mountain",
    "a rock",
    "a spaceship",
    "a hand",
    "a foot"
]