In [None]:
# Önce varsa mevcut mount'u kaldır
!fusermount -u /content/drive

# Sonra yeniden bağla
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import os
os.chdir('/content/drive/MyDrive/Gen AI PROJECT/GenAI')
print("Çalışma dizini:", os.getcwd())

Çalışma dizini: /content/drive/MyDrive/Gen AI PROJECT/GenAI


In [None]:
import os
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
from accelerate import Accelerator
from transformers import CLIPTextModel, CLIPTokenizer
from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
from peft import LoraConfig, get_peft_model
import pandas as pd

# --- Dataset tanımı ---
class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.df = pd.read_csv(csv_file)
        self.df.iloc[:, 0] = self.df.iloc[:, 0].astype(str).str.replace('\\', '/', regex=False)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx, 0]
        prompt = self.df.iloc[idx, 1]

        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Dosya bulunamadı: {img_path}")

        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, prompt

# --- Görsel dönüşümleri ---
transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

# --- Paths ---
CSV_PATH = '/content/drive/MyDrive/Gen AI PROJECT/GenAI/photo_labels.csv'
dataset = CustomDataset(CSV_PATH, transform=transform)
loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2)

# --- Accelerator ve Cihaz ---
accelerator = Accelerator(mixed_precision="fp16")
device = accelerator.device

# --- Model bileşenleri ---
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
vae = AutoencoderKL.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="vae")
unet = UNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="unet")
noise_scheduler = DDPMScheduler.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="scheduler")

vae.to(device)

# --- LoRA Config ---
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    target_modules=["to_q", "to_k", "to_v"],
    lora_dropout=0.05,
    bias="none"
)

unet_lora = get_peft_model(unet, lora_config)


# --- Optimizer ---
optimizer = torch.optim.AdamW(unet_lora.parameters(), lr=1e-4)

# --- Accelerator ile hazırlık ---
unet_lora, text_encoder, optimizer, loader = accelerator.prepare(
    unet_lora, text_encoder, optimizer, loader
)

# --- Eğitim döngüsü ---
max_train_steps = 1000
global_step = 0

for epoch in range(1000):
    for images, prompts in loader:
        tokens = tokenizer(
            prompts,
            padding="max_length",
            truncation=True,
            max_length=tokenizer.model_max_length,
            return_tensors="pt"
        ).to(device)

        text_embeds = text_encoder(**tokens).last_hidden_state

        with torch.no_grad():
            latents = vae.encode(images.to(device) * 2 - 1).latent_dist.sample()
            latents = latents * vae.config.scaling_factor

        noise = torch.randn_like(latents)
        timesteps = torch.randint(
            0,
            noise_scheduler.config.num_train_timesteps,
            (latents.shape[0],),
            device=device
        )

        noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)

        pred_noise = unet_lora(noisy_latents, timesteps, encoder_hidden_states=text_embeds).sample

        loss = F.mse_loss(pred_noise, noise)

        accelerator.backward(loss)
        optimizer.step()
        optimizer.zero_grad()

        global_step += 1
        print(f"Step {global_step} - Loss: {loss.item():.4f}")

        if global_step >= max_train_steps:
            break
    if global_step >= max_train_steps:
        break

# --- Model kaydetme ---
unet_lora.save_pretrained("sd15-lora-finetuned")
tokenizer.save_pretrained("sd15-lora-finetuned")
text_encoder.save_pretrained("sd15-lora-finetuned")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/905 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/961k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.52k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

Step 1 - Loss: 0.1411
Step 2 - Loss: 0.2144
Step 3 - Loss: 0.0141
Step 4 - Loss: 0.3218
Step 5 - Loss: 0.0956
Step 6 - Loss: 0.1469
Step 7 - Loss: 0.2034
Step 8 - Loss: 0.0907
Step 9 - Loss: 0.1751
Step 10 - Loss: 0.1982
Step 11 - Loss: 0.1743
Step 12 - Loss: 0.2970
Step 13 - Loss: 0.0843
Step 14 - Loss: 0.1147
Step 15 - Loss: 0.1962
Step 16 - Loss: 0.2349
Step 17 - Loss: 0.2155
Step 18 - Loss: 0.1889
Step 19 - Loss: 0.0352
Step 20 - Loss: 0.1965
Step 21 - Loss: 0.3280
Step 22 - Loss: 0.2571
Step 23 - Loss: 0.0194
Step 24 - Loss: 0.2423
Step 25 - Loss: 0.2119
Step 26 - Loss: 0.0909
Step 27 - Loss: 0.2200
Step 28 - Loss: 0.0187
Step 29 - Loss: 0.2720
Step 30 - Loss: 0.0655
Step 31 - Loss: 0.0265
Step 32 - Loss: 0.4666
Step 33 - Loss: 0.1728
Step 34 - Loss: 0.0772
Step 35 - Loss: 0.1500
Step 36 - Loss: 0.3809
Step 37 - Loss: 0.1096
Step 38 - Loss: 0.1971
Step 39 - Loss: 0.0779
Step 40 - Loss: 0.0344
Step 41 - Loss: 0.0696
Step 42 - Loss: 0.1188
Step 43 - Loss: 0.2055
Step 44 - Loss: 0.50

To create an output you can adjust prompt variable

In [12]:
import torch
from diffusers import (
    StableDiffusionPipeline,
    EulerAncestralDiscreteScheduler,
    AutoencoderKL,
    UNet2DConditionModel,
)
from transformers import CLIPTextModel, CLIPTokenizer
from peft import PeftModel

# Model and LoRA paths
model_id = "runwayml/stable-diffusion-v1-5"
lora_path = "./sd15-lora-finetuned"  # Replace with your LoRA weights path

# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer and text encoder
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14").to(device)

# Load VAE and UNet base models
vae = AutoencoderKL.from_pretrained(model_id, subfolder="vae").to(device)
unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="unet")

# Load LoRA weights into UNet
unet_lora = PeftModel.from_pretrained(unet, lora_path).to(device)

# Load Euler Ancestral Scheduler
scheduler = EulerAncestralDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")

from transformers import CLIPFeatureExtractor

feature_extractor = CLIPFeatureExtractor.from_pretrained("openai/clip-vit-large-patch14")

pipe = StableDiffusionPipeline(
    vae=vae,
    unet=unet_lora,
    tokenizer=tokenizer,
    text_encoder=text_encoder,
    scheduler=scheduler,
    safety_checker=None,
    feature_extractor=feature_extractor,
)
pipe = pipe.to(device)

# Inference parameters
prompt = "A man smiling towards the sea "
num_inference_steps = 50
guidance_scale = 7.5

# Generate image
image = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale,height=512, width=512).images[0]

# Save or display image
image.save("lora_output.png")
image.show()

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


  0%|          | 0/50 [00:00<?, ?it/s]

after loading the model you can use it by just adjusting the proompt variable

In [None]:
# Inference parameters
prompt = "<fkylmz> side look smiling at beach"
num_inference_steps = 50
guidance_scale = 7.5

# Generate image
image = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale,height=512, width=512).images[0]

# Save or display image
image.save("lora_output.png")
image.show()

  0%|          | 0/50 [00:00<?, ?it/s]

In [None]:
!ls sd15-lora-finetuned

adapter_config.json	   merges.txt	      special_tokens_map.json
adapter_model.safetensors  model.safetensors  tokenizer_config.json
config.json		   README.md	      vocab.json


In [None]:
!pip install -q torch torchvision

In [None]:
!pip install -q torch torchvision
