In [None]:
# @title Install requirements
!git clone https://github.com/InstantID/InstantID
%cd InstantID
!pip install -r gradio_demo/requirements.txt
!pip install timm==0.6.7
!pip install diffusers==0.27.2

In [None]:
# Patch para compatibilidade do huggingface_hub com diffusers
import huggingface_hub

if not hasattr(huggingface_hub, "cached_download"):
    huggingface_hub.cached_download = huggingface_hub.hf_hub_download


In [None]:
# @title Download required models

!python gradio_demo/download_models.py -y

In [None]:
# Ajustar huggingface_hub para a faixa exigida pelo diffusers
!pip install -q --force-reinstall "huggingface_hub==0.34.0"


In [None]:
!pip install -q "insightface==0.7.3"


In [None]:
# Instalar dependências do insightface
!pip install -q "onnxruntime-gpu" "onnxruntime"


In [None]:
!pip install -q "controlnet-aux"


In [None]:
# @title Set up the pipeline

import diffusers
from diffusers.utils import load_image
from diffusers.models import ControlNetModel, AutoencoderKL
from diffusers import DPMSolverMultistepScheduler
import cv2
import torch
import numpy as np
from PIL import Image

from insightface.app import FaceAnalysis
from pipeline_stable_diffusion_xl_instantid_img2img import (
    StableDiffusionXLInstantIDImg2ImgPipeline,
    draw_kps,
)
from controlnet_aux import ZoeDetector

# prepare 'antelopev2' under ./models
app = FaceAnalysis(name="antelopev2", root="./", providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
app.prepare(ctx_id=0, det_size=(640, 640))

# prepare models under ./checkpoints
face_adapter = "./checkpoints/ip-adapter.bin"
controlnet_path = "diffusers/controlnet-zoe-depth-sdxl-1.0"

# load IdentityNet + SDXL público
identitynet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
zoedepthnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)

vae = AutoencoderKL.from_pretrained(
    "stabilityai/sdxl-vae",
    torch_dtype=torch.float16,
)

pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    vae=vae,
    controlnet=[identitynet, zoedepthnet],
    torch_dtype=torch.float16,
)

pipe.scheduler = DPMSolverMultistepScheduler.from_config(
    pipe.scheduler.config,
    use_karras_sigmas=True,
)
pipe.to("cuda")
pipe.load_ip_adapter_instantid(face_adapter)
pipe.set_ip_adapter_scale(0.8)


In [None]:
# === GERAÇÃO EM LOTE A PARTIR DO GOOGLE DRIVE (SEM FaceAnalysis) ===

import os
import glob
from PIL import Image
from google.colab import drive

# 1. Montar o Drive
drive.mount("/content/drive")

# 2. Pastas de entrada e saída
input_folder = "/content/drive/MyDrive/dataset/real_fake/faces_real"
output_folder = "/content/drive/MyDrive/dataset_gerado/instantid_xl"
os.makedirs(output_folder, exist_ok=True)

print("Lendo fotos reais em:", input_folder)

# 3. Lista de imagens reais
fotos_reais = sorted(
    glob.glob(os.path.join(input_folder, "*.jpg")) +
    glob.glob(os.path.join(input_folder, "*.jpeg")) +
    glob.glob(os.path.join(input_folder, "*.png"))
)

print("Encontradas", len(fotos_reais), "imagens.")

if len(fotos_reais) == 0:
    raise SystemExit(f"Nenhuma imagem encontrada em {input_folder}")

# 4. Estilos (3 variações por rosto)
estilos = [
    "Professional face portrait, studio lighting, neutral expression, realistic skin texture",
    "Close-up photo, happy smiling expression, natural sunlight, outdoor park background",
    "Cinematic headshot, neon night lighting reflections on face, mysterious style",
]

# 5. Loop principal (sem usar FaceAnalysis)
for i, caminho in enumerate(fotos_reais, start=1):
    nome_id = os.path.splitext(os.path.basename(caminho))[0]
    print(f"\n[{i}/{len(fotos_reais)}] Processando:", nome_id)

    face_image = Image.open(caminho).convert("RGB")

    # usamos a própria imagem como referência/control_image
    images = [face_image, face_image]

    for v, prompt in enumerate(estilos, start=1):
        result = pipe(
            prompt=prompt,
            image=images,
            num_inference_steps=25,
            guidance_scale=5.0,
        )
        img = result.images[0]

        out_path = os.path.join(output_folder, f"{nome_id}_fake_var{v}.png")
        img.save(out_path)
        print(f"   ✓ Variação {v} salva em:", out_path)

print("\n✓ FINALIZADO! Imagens geradas em:", output_folder)


In [None]:
# TESTE: ver se o detector acha rosto em 1 imagem
from PIL import Image
import cv2
import numpy as np
import os

teste_path = "/content/drive/MyDrive/dataset/real_fake/faces_real/real_id1_0000.mp4_frame_00000.jpg"  # ajuste se o nome for diferente
print("Usando imagem de teste:", teste_path, os.path.exists(teste_path))

img = Image.open(teste_path).convert("RGB")
img_np = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

infos = app.get(img_np)
print("Qtde de rostos detectados:", len(infos))

if infos:
    print("BBox do maior rosto:", infos[0]["bbox"])
else:
    print("Nenhum rosto detectado.")


In [None]:
# @title Load your LoRA!
# @markdown You can load a LoRA directly from Hugging Face by browsing [here](https://huggingface.co/models?library=diffusers&other=lora), or download a LoRA from CivitAI/Tensor.Art and place it on the colab folder.
pipe.load_lora_weights(
    "Norod78/sdxl-chalkboarddrawing-lora",
    weight_name="SDXL_ChalkBoardDrawing_LoRA_r8.safetensors"
)
pipe.enable_sequential_cpu_offload()

In [None]:
# @title Generate!

prompt = "A colorful ChalkBoardDrawing of a man" # @param {type:"string"}
negative_prompt = "blurry, ultra-realism, detailed" # @param {type:"string"}
# @markdown The higher the `denoising_strength`, more similar to the original image.
denoising_strength = 0.85 # @param {type:"slider", min:0, max:1, step:0.01}
guidance_scale = 7 # @param {type:"number"}
face_control_strength = 0.8 # @param {type:"slider", min:0, max:1, step: 0.01}
depth_control_strength = 0.8 # @param {type:"slider", min:0, max:1, step: 0.01}

image = pipe(
    prompt,
    negative_prompt=negative_prompt,
    width=1024,
    height=1024,
    image_embeds=face_emb,
    image=face_image,
    strength=denoising_strength,
    control_image=images,
    num_inference_steps=20,
    guidance_scale = guidance_scale,
    controlnet_conditioning_scale=[face_control_strength, depth_control_strength],
).images[0]
image