In [1]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import matplotlib.pyplot as plt
import torch
import matplotlib.pyplot as plt
from diffusers import StableDiffusionPipeline, UNet2DConditionModel
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [1]:
import os
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms

class SketchToAnimeDataset(Dataset):
    def __init__(self, sketch_dir, anime_dir, image_size=512):
        self.sketch_dir = sketch_dir
        self.anime_dir = anime_dir
        self.sketches = sorted(os.listdir(sketch_dir))
        self.animes = sorted(os.listdir(anime_dir))
        self.transform = transforms.Compose([
            transforms.Resize((image_size, image_size)),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5])
        ])

    def __len__(self):
        return len(self.sketches)

    def __getitem__(self, idx):
        sketch_path = os.path.join(self.sketch_dir, self.sketches[idx])
        anime_path = os.path.join(self.anime_dir, self.animes[idx])

        sketch = Image.open(sketch_path).convert("RGB")
        anime = Image.open(anime_path).convert("RGB")

        return {
            "sketch": self.transform(sketch),
            "anime": self.transform(anime)
        }

# Inicializamos dataset
# dataset = SketchToAnimeDataset("dataset/sketches", "dataset/anime")
# loader = DataLoader(dataset, batch_size=2, shuffle=True)


# Ejemplo de uso
dataset = SketchToAnimeDataset(r"D:\Ciencias\Drawnime\data\train\sketches", r"D:\Ciencias\Drawnime\data\train\faces")
loader = DataLoader(dataset, batch_size=2, shuffle=True)

# solo 500 imagenes
loader = DataLoader(torch.utils.data.Subset(dataset, range(500)), batch_size=4, shuffle=True)

# Mostrar ejemplos
batch = next(iter(loader))
plt.figure(figsize=(10, 4))
for i in range(4):
    plt.subplot(2, 4, i+1)
    plt.imshow(((batch["sketch"][i].permute(1, 2, 0) * 0.5) + 0.5))
    plt.title("Sketch")
    plt.axis("off")
    plt.subplot(2, 4, i+5)
    plt.imshow(((batch["anime"][i].permute(1, 2, 0) * 0.5) + 0.5))
    plt.title("Anime")
    plt.axis("off")
plt.show()


NameError: name 'torch' is not defined

In [3]:
from diffusers import StableDiffusionPipeline
from diffusers.models.attention_processor import LoRAAttnProcessor
import torch
import os

model_id = "runwayml/stable-diffusion-v1-5"

pipe = StableDiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    variant="fp16"
).to("cuda")

# Reducir uso de VRAM
pipe.enable_attention_slicing()
pipe.enable_vae_tiling()
pipe.unet.enable_gradient_checkpointing()

# Crear y asignar procesadores LoRA a todas las capas de atención
lora_attn_procs = {}
for name in pipe.unet.attn_processors.keys():
    lora_attn_procs[name] = LoRAAttnProcessor()

pipe.unet.set_attn_processor(lora_attn_procs)

# Verificamos parámetros entrenables
trainable_params = sum(p.numel() for p in pipe.unet.parameters() if p.requires_grad)
print(f"Parámetros LoRA entrenables: {trainable_params:,}")


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]`torch_dtype` is deprecated! Use `dtype` instead!
Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00, 16.38it/s]


Parámetros LoRA entrenables: 859,520,964


In [None]:
from torch import nn, optim
from tqdm.auto import tqdm

optimizer = optim.AdamW(
    [p for p in pipe.unet.parameters() if p.requires_grad],
    lr=1e-4
)
loss_fn = nn.MSELoss()

epochs = 5
pipe.unet.train()

for epoch in range(epochs):
    for batch in tqdm(loader, desc=f"Epoch {epoch+1}/{epochs}"):
        sketch = batch["sketch"].to("cuda", dtype=torch.float16)
        anime = batch["anime"].to("cuda", dtype=torch.float16)

        with torch.cuda.amp.autocast():
            # Codificar el sketch con el VAE
            latents = pipe.vae.encode(sketch).latent_dist.sample() * 0.18215

            # Predicción UNet (ruido → imagen anime)
            noise_pred = pipe.unet(latents, timestep=torch.tensor([0], device="cuda")).sample

            # Target: latentes del anime
            target_latents = pipe.vae.encode(anime).latent_dist.sample() * 0.18215

            loss = loss_fn(noise_pred, target_latents)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{epochs} - Loss: {loss.item():.4f}")

# Guardamos LoRA entrenado
os.makedirs("lora_anime", exist_ok=True)
pipe.unet.save_attn_procs("lora_anime")
print("✅ LoRA guardado en ./lora_anime/")


  scaler = torch.cuda.amp.GradScaler()  # para AMP seguro
Epoch 1/5:   0%|          | 1/500 [00:02<22:24,  2.70s/it]

✅ Epoch 1/5 — Loss: 1.2891


Epoch 1/5:   0%|          | 2/500 [00:12<54:51,  6.61s/it]

✅ Epoch 1/5 — Loss: nan


Epoch 1/5:   1%|          | 3/500 [00:20<1:02:44,  7.57s/it]

✅ Epoch 1/5 — Loss: nan


Epoch 1/5:   1%|          | 4/500 [00:28<1:04:22,  7.79s/it]

✅ Epoch 1/5 — Loss: nan


Epoch 1/5:   1%|          | 4/500 [00:36<1:16:15,  9.22s/it]


KeyboardInterrupt: 

In [None]:
from diffusers import StableDiffusionImg2ImgPipeline
from PIL import Image

pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16
).to("cuda")

pipe.unet.load_attn_procs("./lora-anime-enhancer")

image = Image.open("test_sketch.png").convert("RGB")
prompt = "high quality anime style, clean lines, vibrant colors"

result = pipe(prompt=prompt, image=image, strength=0.7, guidance_scale=7.5)
result.images[0].save("output.png")