# Face-Dell, AI face generation model

## Capturing photos

In [None]:
from webcam_capture import capture_photos

capture_photos(output_dir='data/user/', num_photos=4)

## Variables definition

In [1]:
import torch
from transformers import CLIPTextModel, CLIPTokenizer
from diffusers import StableDiffusionPipeline, UNet2DConditionModel, PNDMScheduler

# Definitions
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_id = "runwayml/stable-diffusion-v1-5"
scheduler = PNDMScheduler.from_pretrained(model_id, subfolder="scheduler")
unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="unet")
tokenizer = CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer")
text_encoder = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder")

pipe = StableDiffusionPipeline.from_pretrained(
    model_id, 
    scheduler=scheduler,
    unet=unet,
    tokenizer=tokenizer,
    text_encoder=text_encoder,
    torch_dtype=torch.float,
    use_safetensors=True,
)

pipe.to(device)

  from .autonotebook import tqdm as notebook_tqdm
  deprecate("Transformer2DModelOutput", "1.0.0", deprecation_message)
Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00, 25.84it/s]


StableDiffusionPipeline {
  "_class_name": "StableDiffusionPipeline",
  "_diffusers_version": "0.28.2",
  "_name_or_path": "runwayml/stable-diffusion-v1-5",
  "feature_extractor": [
    "transformers",
    "CLIPImageProcessor"
  ],
  "image_encoder": [
    null,
    null
  ],
  "requires_safety_checker": true,
  "safety_checker": [
    "stable_diffusion",
    "StableDiffusionSafetyChecker"
  ],
  "scheduler": [
    "diffusers",
    "PNDMScheduler"
  ],
  "text_encoder": [
    "transformers",
    "CLIPTextModel"
  ],
  "tokenizer": [
    "transformers",
    "CLIPTokenizer"
  ],
  "unet": [
    "diffusers",
    "UNet2DConditionModel"
  ],
  "vae": [
    "diffusers",
    "AutoencoderKL"
  ]
}

## Fine-tuning

In [2]:
import json
import pandas as pd

# Load the dataset
file_path = 'data/dataset.ods'
data = pd.read_excel(file_path, engine='odf')
data.head()

# Extract the necessary columns
data_to_export = data[['URL', 'Prompt']].rename(columns={'URL': 'file_name', 'Prompt': 'text'})
data_to_export['file_name'] = data_to_export['file_name'].apply(lambda x: x.split("\\")[-1])  # Adjust file path

# Convert to list of dictionaries
dataset_list = data_to_export.to_dict(orient='records')

# Save to JSON file
json_file_path = 'data/dataset.json'
with open(json_file_path, 'w') as f:
    json.dump(dataset_list, f, indent=4)

json_file_path

'data/dataset.json'

In [4]:
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from dataclasses import dataclass

@dataclass
class TrainingConfig:
    image_size = 512  # the generated image resolution
    train_batch_size = 16
    eval_batch_size = 16  # how many images to sample during evaluation
    num_epochs = 50
    gradient_accumulation_steps = 1
    learning_rate = 1e-4
    lr_warmup_steps = 500
    save_image_epochs = 10
    save_model_epochs = 30
    mixed_precision = "fp16"  # `no` for float32, `fp16` for automatic mixed precision
    output_dir = "facegen"  # the model name locally and on the HF Hub

    push_to_hub = False  # whether to upload the saved model to the HF Hub
    hub_model_id = "kurouge/facegen"  # the name of the repository to create on the HF Hub
    hub_private_repo = True
    overwrite_output_dir = True  # overwrite the old model when re-running the notebook
    seed = 0

config = TrainingConfig()

In [6]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset("imagefolder", split="train", data_dir="data/images") 

Downloading data: 100%|██████████| 92/92 [00:00<00:00, 92425.38files/s]
Generating train split: 92 examples [00:00, 4871.62 examples/s]


In [33]:
import os
import pandas as pd
data_to_export = pd.read_csv('data/dataset/metadata.csv', sep=",", encoding='latin1')
#export a csv to a json 
data_to_export.to_json('data/dataset/metadata.json', orient='records', indent=4)
json_data = pd.read_json('data/dataset/metadata.json')

# rename the files so they are auto-incremented (format is 0001.p, 0002.png, etc.)
for i, file in enumerate(os.listdir('data/dataset')):
    os.rename('data/dataset/' + file, 'data/dataset/' + str(i).zfill(4) + '.png')
    json_data['file_name'] = json_data['file_name'].replace(file, str(i).zfill(4) + '.png')

# save the json file
json_data.to_json('data/dataset/metadata2.json', orient='records', indent=4)

In [None]:
from torchvision import transforms

# Transform 
preprocess = transforms.Compose(
    [
        transforms.Resize((config.image_size, config.image_size)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5]),
    ]
)

def transform(examples):
    images = [preprocess(image.convert("RGB")) for image in examples["image"]]
    return {"images": images}

dataset.set_transform(transform)

In [None]:
dataset[0]

In [5]:
from accelerate import Accelerator
# Training script

accelerator = Accelerator()

optimizer = torch.optim.Adam(unet.parameters(), lr=config.learning_rate)
learning_rate = 5e-6
training_dataloader = DataLoader(dataset, batch_size=config.train_batch_size, shuffle=True)
model = unet


model, optimizer, training_dataloader, scheduler = accelerator.prepare(
     model, optimizer, training_dataloader, scheduler
  )

NameError: name 'dataset' is not defined

In [None]:
loss_function = torch.nn.CrossEntropyLoss()

for batch in training_dataloader:
    optimizer.zero_grad()
    inputs, targets = batch
    inputs = inputs.to(device)
    targets = targets.to(device)
    outputs = model(inputs)
    loss = loss_function(outputs, targets)
    accelerator.backward(loss)
    optimizer.step()
    scheduler.step()

In [2]:
for batch in training_dataloader:
    print(batch)

NameError: name 'training_dataloader' is not defined

## Dreambooth


## Image generation

In [None]:
import torch
from diffusers import StableDiffusionPipeline, UNet2DConditionModel
ft_model_id = "FaceGen/media/dreambooth/FaceGen_LLDM61"
model_id = "runwayml/stable-diffusion-v1-5"
unet = UNet2DConditionModel.from_pretrained(f"{ft_model_id}/unet", subfolder="unet")
pipeline = StableDiffusionPipeline.from_pretrained(model_id, unet=unet, torch_dtype=torch.float, safety_checker=None, use_safetensors=True).to("cuda")

images = pipeline(prompt="A photo of sks man", num_inference_steps=100, guidance_scale=10).images[0]
images