In [1]:
import torch
import torchvision.transforms as transforms
import torch.nn as nn
from diffusers import StableDiffusionImg2ImgPipeline
import math
from PIL import Image
import os
from torch.utils.data import Dataset , DataLoader
import torch.nn.functional as F
from torch.cuda.amp import autocast


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_id = "runwayml/stable-diffusion-v1-5"
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id , dtype = torch.float16).to("cuda")

Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: 
```
pip install accelerate
```
.
Keyword arguments {'dtype': torch.float16} are not expected by StableDiffusionImg2ImgPipeline and will be ignored.
Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00,  9.75it/s]


we used this model as its quite stable and familiar with anime ,


we will be modifying key and the value , not query and projection so basically attention

In [None]:
class LoRA(nn.Module):
    def __init__(self , original , rank , alpha):
        super().__init__()

        self.original = original
        self.in_feat = original.in_features
        self.out_feat = original.out_features
        self.scaling  = alpha / rank

        self.lora_1 = nn.Parameter(torch.randn(self.in_feat , rank))
        self.lora_2 = nn.Parameter(torch.zeros(rank , self.out_feat))

    def forward(self , x):
        return self.original(x) + torch.matmul(self.lora_2 , torch.matmul(self.lora_1 , x)) * self.scaling

we made lora 2 weights zero so it made the new condition same as model like N = Wx + bias , so we don't want to change in start

In [None]:
pipe.unet.requires_grad_(False)
pipe.unet.to('cuda' , dtype = torch.float16)
target_layers = ["to_k", "to_v"]
for name , layer in pipe.unet.named_modules():
    if any(target in name for target in target_layers) and isinstance(layer , nn.Linear):
        parent_name = ".".join(name.split(".")[:-1])
        layer_name = name.split(".")[-1]
        parent = pipe.unet.get_submodule(parent_name)

        new = LoRA(layer , rank = 8 , alpha = 8)

        new.to("cuda" , dtype = torch.float16)
        setattr(parent , layer_name , new)



we changed the value of to_v and to_k to lora instead of linear

In [7]:
class Datasett(Dataset):
    def __init__(self , tokenizer , img_dir , cap_info , size = 512):
        self.tokenizer = tokenizer
        self.img_dir = img_dir
        self.cap_dir = cap_info
        self.size = size
        self.images = [f for f in os.listdir(img_dir) if f.endswith(("png" , "jpg"))]
        self.transform = transforms.Compose([
            transforms.Resize((size, size)),
            transforms.ToTensor(),
            transforms.Normalize([0.5] , [0.5])
        ])
    
    def __len__(self): return len(self.images)
    def __getitem__(self, index):
        img = self.images[index]
        img_path = os.path.join(self.img_dir , img)  
        image = Image.open(img_path).convert('RGB')
        image = self.transform(image)

        name = [str("0" * (4 - len(str(x))) + str(x)) for x in range(1 , 1892)]

        file_name = name[index] + ".txt"
        file_path = os.path.join(self.cap_dir , file_name)

        with open(file_path , "r")  as f:
            caption = f.read().strip()
        
        token = self.tokenizer(caption , padding = "max_length" , truncation = True , return_tensors = "pt").input_ids[0]

        return image , token

In [9]:
train_da = Datasett(tokenizer = pipe.tokenizer , img_dir = r"C:\Users\sharm\Downloads\dell\finetune\dataset_512\anime_images" , cap_info = r"C:\Users\sharm\Downloads\dell\finetune\dataset_512\info")

train_loader = DataLoader(train_da , batch_size = 4 , shuffle = True)


In [11]:
pipe.unet.to(dtype = torch.float32)
pipe.vae.to(dtype = torch.float32)
params = [p for p in pipe.unet.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params , lr = 1e-5)

In [None]:
for epoch in range(2):
    running_loss = 0.0
    pipe.unet.train()
    for image , cap in train_loader:
        image , cap = image.to("cuda" , dtype = torch.float32) , cap.to("cuda")

        with torch.no_grad():
            latents = pipe.vae.encode(image).latent_dist.sample() * 0.18215
            text = pipe.text_encoder(cap)[0]

        noise = torch.randn_like(latents).to("cuda", dtype=torch.float32)
        timestep = torch.randint(0 , 1000 , (latents.shape[0],) , device = "cuda").long()
        noise_latents = pipe.scheduler.add_noise(latents , noise , timestep)

        pred = pipe.unet(noise_latents,
                  timestep , 
                  text).sample
        
        loss = F.mse_loss(pred , noise , reduction = "mean")

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        running_loss += loss.item()

        print(f"epoch{epoch} | loss: {loss.item()} | total loss {running_loss}")


In [None]:
# 1. Load the clean base model first
pipe = StableDiffusionImg2ImgPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda")

# 2. Load your 90MB file
lora_checkpoint = torch.load(r"C:\Users\sharm\Downloads\dell\finetune\lora_only.pth")

# 3. Inject the weights (Strict=False is key here)
# This ignores the base weights and only updates the LoRA layers
pipe.unet.load_state_dict(lora_checkpoint, strict=False)
print("Successfully loaded Anime LoRA weights!")
prompt = "A professional anime portrait, sharp lines, studio ghibli style, high resolution"
# Use a lower guidance_scale (7-8) to see if the model follows your LoRA
from PIL import Image

# Load image properly
image = Image.open(r"C:\Users\sharm\Downloads\dell\finetune\as.png").convert("RGB")  # Must be RGB

# Pass to pipeline
result = pipe(
    prompt=prompt,
    image=image,  # PIL Image
    strength=0.75
).images[0]

result.save("ak.png")

Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: 
```
pip install accelerate
```
.
Loading pipeline components...:  29%|██▊       | 2/7 [00:02<00:05,  1.14s/it]`torch_dtype` is deprecated! Use `dtype` instead!
Loading pipeline components...: 100%|██████████| 7/7 [00:04<00:00,  1.70it/s]
  lora_checkpoint = torch.load(r"C:\Users\sharm\Downloads\dell\finetune\lora_only.pth")


Successfully loaded Anime LoRA weights!


 84%|████████▍ | 31/37 [04:44<00:52,  8.73s/it]

In [None]:


# Method B: Save only LoRA weights (manual extraction)
lora_weights = {}
for name, param in pipe.unet.named_parameters():
    if 'lora' in name.lower():
        lora_weights[name] = param

if lora_weights:
    torch.save(lora_weights, "lora_only.pth")
    print(f"Saved {len(lora_weights)} LoRA parameters")
else:
    print("No LoRA parameters found!")
