In [None]:
# Önce varsa mevcut mount'u kaldır
!fusermount -u /content/drive

# Sonra yeniden bağla
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
import os
os.chdir('/content/drive/MyDrive/Gen AI PROJECT/GenAI')
print("Çalışma dizini:", os.getcwd())

In [None]:
import os
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
from accelerate import Accelerator
from transformers import CLIPTextModel, CLIPTokenizer
from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
from peft import LoraConfig, get_peft_model
import pandas as pd

# --- Dataset tanımı ---
class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.df = pd.read_csv(csv_file)
        self.df.iloc[:, 0] = self.df.iloc[:, 0].astype(str).str.replace('\\', '/', regex=False)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx, 0]
        prompt = self.df.iloc[idx, 1]

        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Dosya bulunamadı: {img_path}")

        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, prompt

# --- Görsel dönüşümleri ---
transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

# --- Paths ---
CSV_PATH = '/content/drive/MyDrive/Gen AI PROJECT/GenAI/photo_labels.csv'
dataset = CustomDataset(CSV_PATH, transform=transform)
loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2)

# --- Accelerator ve Cihaz ---
accelerator = Accelerator(mixed_precision="fp16")
device = accelerator.device

# --- Model bileşenleri ---
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
vae = AutoencoderKL.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="vae")
unet = UNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="unet")
noise_scheduler = DDPMScheduler.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="scheduler")

vae.to(device)

# --- LoRA Config ---
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    target_modules=["to_q", "to_k", "to_v"],
    lora_dropout=0.05,
    bias="none"
)

unet_lora = get_peft_model(unet, lora_config)


# --- Optimizer ---
optimizer = torch.optim.AdamW(unet_lora.parameters(), lr=1e-4)

# --- Accelerator ile hazırlık ---
unet_lora, text_encoder, optimizer, loader = accelerator.prepare(
    unet_lora, text_encoder, optimizer, loader
)

# --- Eğitim döngüsü ---
max_train_steps = 1000
global_step = 0

for epoch in range(1000):
    for images, prompts in loader:
        tokens = tokenizer(
            prompts,
            padding="max_length",
            truncation=True,
            max_length=tokenizer.model_max_length,
            return_tensors="pt"
        ).to(device)

        text_embeds = text_encoder(**tokens).last_hidden_state

        with torch.no_grad():
            latents = vae.encode(images.to(device) * 2 - 1).latent_dist.sample()
            latents = latents * vae.config.scaling_factor

        noise = torch.randn_like(latents)
        timesteps = torch.randint(
            0,
            noise_scheduler.config.num_train_timesteps,
            (latents.shape[0],),
            device=device
        )

        noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)

        pred_noise = unet_lora(noisy_latents, timesteps, encoder_hidden_states=text_embeds).sample

        loss = F.mse_loss(pred_noise, noise)

        accelerator.backward(loss)
        optimizer.step()
        optimizer.zero_grad()

        global_step += 1
        print(f"Step {global_step} - Loss: {loss.item():.4f}")

        if global_step >= max_train_steps:
            break
    if global_step >= max_train_steps:
        break

# --- Model kaydetme ---
unet_lora.save_pretrained("sd15-lora-finetuned")
tokenizer.save_pretrained("sd15-lora-finetuned")
text_encoder.save_pretrained("sd15-lora-finetuned")


To create an output you can adjust prompt variable

In [None]:
import torch
from diffusers import (
    StableDiffusionPipeline,
    EulerAncestralDiscreteScheduler,
    AutoencoderKL,
    UNet2DConditionModel,
)
from transformers import CLIPTextModel, CLIPTokenizer
from peft import PeftModel

# Model and LoRA paths
model_id = "runwayml/stable-diffusion-v1-5"
lora_path = "./sd15-lora-finetuned"  # Replace with your LoRA weights path

# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer and text encoder
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14").to(device)

# Load VAE and UNet base models
vae = AutoencoderKL.from_pretrained(model_id, subfolder="vae").to(device)
unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="unet")

# Load LoRA weights into UNet
unet_lora = PeftModel.from_pretrained(unet, lora_path).to(device)

# Load Euler Ancestral Scheduler
scheduler = EulerAncestralDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")

from transformers import CLIPFeatureExtractor

feature_extractor = CLIPFeatureExtractor.from_pretrained("openai/clip-vit-large-patch14")

pipe = StableDiffusionPipeline(
    vae=vae,
    unet=unet_lora,
    tokenizer=tokenizer,
    text_encoder=text_encoder,
    scheduler=scheduler,
    safety_checker=None,
    feature_extractor=feature_extractor,
)
pipe = pipe.to(device)

# Inference parameters
prompt = "A man smiling towards the sea "
num_inference_steps = 50
guidance_scale = 7.5

# Generate image
image = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale,height=512, width=512).images[0]

# Save or display image
image.save("lora_output.png")
image.show()

after loading the model you can use it by just adjusting the proompt variable

In [None]:
# Inference parameters
prompt = "<fkylmz> side look smiling at beach"
num_inference_steps = 50
guidance_scale = 7.5

# Generate image
image = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale,height=512, width=512).images[0]

# Save or display image
image.save("lora_output.png")
image.show()

In [None]:
!ls sd15-lora-finetuned

In [None]:
!pip install -q torch torchvision

In [None]:
!pip install -q torch torchvision
