# sd+ipadapterPromptsuz

In [None]:
# =========================
# SD3.5-LARGE + IP-Adapter (Img2Img) — PROMPTSUZ, UPSCALE/RESIZE YOK
# =========================

# --- Kurulum (Colab ise aç) ---
# !pip -q install -U "diffusers>=0.33.0" "transformers>=4.43.0" accelerate safetensors pillow opencv-python bitsandbytes

# from huggingface_hub import login
# login()  # HF token gir

import os, glob, gc, contextlib, torch
import numpy as np
from PIL import Image
from tqdm import tqdm
import cv2
from itertools import islice

from diffusers import StableDiffusion3Img2ImgPipeline
from transformers import SiglipVisionModel, SiglipImageProcessor

# -------- Bellek/performans --------
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64,expandable_segments:True"
torch.set_float32_matmul_precision("high")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# -------- Model/Adapter ID'leri --------
MODEL_ID         = "stabilityai/stable-diffusion-3.5-large"
IPADAPTER_ID     = "InstantX/SD3.5-Large-IP-Adapter"
IPADAPTER_REV    = "f1f54ca369ae759f9278ae9c87d46def9f133c78"
IMAGE_ENCODER_ID = "google/siglip-so400m-patch14-384"

# -------- Promptsuz --------
BASE_PROMPT = ""   # boş -> metin etkisi yok
NEG_PROMPT  = ""   # boş -> negatif istem yok

# -------- Parametreler --------
GUIDANCE           = 1.0      # CFG kapalı gibi davranır
STEPS              = 24
STRENGTH           = 0.24     # 0.18–0.30 arası, daha düşük -> giriş yapısı daha çok korunur
IP_SCALE           = 0.90     # 0.8–1.0 kompozisyonu görselden güçlü alır
USE_EMBEDS         = True
VARIANTS_PER_IMAGE = 1
BASE_SEED          = 2025

# -------- Boyut işlemleri --------

PAD_TO_MULTIPLE_OF_16 = True

def list_images(folder: str):
    exts = (".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tif", ".tiff")
    files = []
    for e in exts:
        files += glob.glob(os.path.join(folder, f"*{e}"))
    return sorted(files)

def pad_to_mult16(img: Image.Image) -> Image.Image:
    if not PAD_TO_MULTIPLE_OF_16:
        return img
    w, h = img.size
    pad_w = (16 - (w % 16)) % 16
    pad_h = (16 - (h % 16)) % 16
    if pad_w == 0 and pad_h == 0:
        return img
    # Kenarlara minimal padding (refleksiyon doğal durur)
    im = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    top, bottom = 0, pad_h
    left, right = 0, pad_w
    im_pad = cv2.copyMakeBorder(im, top, bottom, left, right, borderType=cv2.BORDER_REFLECT_101)
    im_pad = cv2.cvtColor(im_pad, cv2.COLOR_BGR2RGB)
    return Image.fromarray(im_pad)

# -------- IO --------
CANDIDATE_DIRS = ["/content/drive/MyDrive/boat_dataset/fullphoto"]
INPUT_DIR = next((d for d in CANDIDATE_DIRS if os.path.isdir(d)), None)
assert INPUT_DIR, f"Girdi klasörü bulunamadı. Şunlardan biri olmalı: {CANDIDATE_DIRS}"

OUT_DIR = "/content/boat_out_sd35_i2i_ip4"
os.makedirs(OUT_DIR, exist_ok=True)

# Kaç görsel işlenecek (None: hepsi)
max_images = None

# -------- SigLIP --------
feature_extractor = SiglipImageProcessor.from_pretrained(IMAGE_ENCODER_ID)
image_encoder = SiglipVisionModel.from_pretrained(IMAGE_ENCODER_ID, torch_dtype=torch.float16)

# -------- Pipeline --------
pipe_kwargs = dict(
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True,
    low_cpu_mem_usage=True,
)

# Text encoder 3'ü düşür (VRAM kazanımı)
DROP_T5 = True
if DROP_T5:
    pipe_kwargs.update(text_encoder_3=None, tokenizer_3=None)

print("Model yükleniyor:", MODEL_ID)
pipe = StableDiffusion3Img2ImgPipeline.from_pretrained(
    MODEL_ID,
    feature_extractor=feature_extractor,  # IP-Adapter için processor
    image_encoder=image_encoder,          # SigLIP encoder
    **pipe_kwargs,
)

# Varsa yine de T5'i at
if DROP_T5:
    if hasattr(pipe, "text_encoder_3"): pipe.text_encoder_3 = None
    if hasattr(pipe, "tokenizer_3"):    pipe.tokenizer_3 = None

# Güvenlik denetçisini kapat (opsiyonel)
if hasattr(pipe, "safety_checker"): pipe.safety_checker = None
if hasattr(pipe, "requires_safety_checker"): pipe.requires_safety_checker = False

# IP-Adapter yükle
pipe.load_ip_adapter(
    IPADAPTER_ID,
    weight_name="ip-adapter.bin",
    revision=IPADAPTER_REV,
)
pipe.set_ip_adapter_scale(IP_SCALE)

# VAE/attention optimizasyonları
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()
try:
    pipe.enable_sdpa()
except Exception:
    pipe.enable_attention_slicing()

# CPU offload
if device.type == "cuda":
    pipe.enable_model_cpu_offload()
else:
    pipe.to("cpu")

try:
    pipe.set_progress_bar_config(disable=True)
except Exception:
    pass

def print_devices(pipe):
    try: print("unet     :", next(pipe.unet.parameters()).device)
    except: pass
    try: print("vae      :", next(pipe.vae.parameters()).device)
    except: pass
    try: print("te1      :", next(pipe.text_encoder.parameters()).device)
    except: pass
    try: print("te2      :", next(pipe.text_encoder_2.parameters()).device)
    except: pass
    try: print("img_enc  :", next(pipe.image_encoder.parameters()).device)
    except: pass
    try:
        mod = getattr(pipe, "ip_adapter", None) or getattr(pipe, "image_proj_model", None)
        if mod is not None:
            print("ip_adapter:", next(mod.parameters()).device)
    except:
        pass

print_devices(pipe)

# -------- Görselleri topla --------
all_imgs = list_images(INPUT_DIR)
assert all_imgs, f"Girdi klasöründe görsel yok: {INPUT_DIR}"

iterable = all_imgs if max_images is None else islice(all_imgs, max_images)
total_to_process = len(all_imgs) if max_images is None else min(len(all_imgs), int(max_images))

print(f"Bulunan görsel: {len(all_imgs)}; İşlenecek: {total_to_process}")
print(f"Çıkış klasörü: {OUT_DIR}")

processed = 0
skipped = 0

# Autocast bağlamı
autocast_ctx = (
    torch.autocast(device_type="cuda", dtype=torch.float16)
    if device.type == "cuda" else contextlib.nullcontext()
)

for idx, path in enumerate(tqdm(iterable, total=total_to_process, desc="Processing")):
    try:
        img = Image.open(path).convert("RGB")
    except Exception as e:
        print("Skip (okuma hatası):", path, e)
        skipped += 1
        continue

    # Ölçekleme yok -> sadece 16'ya pad (opsiyonel)
    base_img = pad_to_mult16(img)

    # Deterministik üretim için seed
    g = torch.Generator(device=device).manual_seed(BASE_SEED + idx)
    do_cfg = (GUIDANCE > 1.0)  # 1.0'da False

    if USE_EMBEDS:
        ip_embeds = pipe.prepare_ip_adapter_image_embeds(
            ip_adapter_image=base_img,
            device=device,
            num_images_per_prompt=1,
            do_classifier_free_guidance=do_cfg,
        )

    for k in range(VARIANTS_PER_IMAGE):
        with autocast_ctx, torch.inference_mode():
            kwargs = dict(
                prompt=BASE_PROMPT,          # ""
                negative_prompt=NEG_PROMPT,  # ""
                image=base_img,              # ORİJİNAL BOYUT
                strength=STRENGTH,
                guidance_scale=GUIDANCE,
                num_inference_steps=STEPS,
                generator=g,
            )
            if USE_EMBEDS:
                kwargs["ip_adapter_image_embeds"] = ip_embeds
            else:
                kwargs["ip_adapter_image"] = base_img

            result = pipe(**kwargs)

        out_img = result.images[0]
        name = os.path.splitext(os.path.basename(path))[0]
        suffix = f"_PRMPLESS_ORIGSIZE_st{STRENGTH}_ip{IP_SCALE}_gs{GUIDANCE}_s{STEPS}"
        if PAD_TO_MULTIPLE_OF_16:
            suffix += "_PAD16"
        if VARIANTS_PER_IMAGE > 1:
            suffix += f"_v{k+1}"
        out_path = os.path.join(OUT_DIR, f"{name}{suffix}.png")
        out_img.save(out_path)

        del out_img, result

    processed += 1
    del base_img, img
    if device.type == "cuda":
        torch.cuda.empty_cache()
    gc.collect()

print(f"Bitti. İşlenen: {processed}, Atlanan: {skipped}, Çıktı klasörü: {OUT_DIR}")



Device: cuda
Model yükleniyor: stabilityai/stable-diffusion-3.5-large



A mixture of fp16 and non-fp16 filenames will be loaded.
Loaded fp16 filenames:
[text_encoder_3/model.fp16-00001-of-00002.safetensors, text_encoder_3/model.fp16-00002-of-00002.safetensors, text_encoder/model.fp16.safetensors, text_encoder_2/model.fp16.safetensors, text_encoder_3/model.safetensors.index.fp16.json]
Loaded non-fp16 filenames:
[vae/diffusion_pytorch_model.safetensors, transformer/diffusion_pytorch_model-00001-of-00002.safetensors, transformer/diffusion_pytorch_model.safetensors.index.json, transformer/diffusion_pytorch_model-00002-of-00002.safetensors
If this behavior is not expected, please check your folder structure.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

vae      : cpu
te1      : cpu
te2      : cpu
img_enc  : cpu
Bulunan görsel: 52; İşlenecek: 52
Çıkış klasörü: /content/boat_out_sd35_i2i_ip4


Processing: 100%|██████████| 52/52 [22:00<00:00, 25.39s/it]

Bitti. İşlenen: 52, Atlanan: 0, Çıktı klasörü: /content/boat_out_sd35_i2i_ip4





In [None]:
!zip -r /content/file6.zip /content/boat_out_sd35_i2i_ip4

  adding: content/boat_out_sd35_i2i_ip4/ (stored 0%)
  adding: content/boat_out_sd35_i2i_ip4/MVI_0895_NIR_Haze_frame410_jpg.rf.e7caf4f8b28f15b1f9983e152b14e543_PRMPLESS_ORIGSIZE_st0.24_ip0.9_gs1.0_s24_PAD16.png (deflated 0%)
  adding: content/boat_out_sd35_i2i_ip4/MVI_1587_VIS_frame585_jpg.rf.c7440040e62549436cd4bf3bea6c1282_PRMPLESS_ORIGSIZE_st0.24_ip0.9_gs1.0_s24_PAD16.png (deflated 1%)
  adding: content/boat_out_sd35_i2i_ip4/MVI_1587_VIS_frame150_jpg.rf.5f70fb969b4d20b195149ebb4e101139_PRMPLESS_ORIGSIZE_st0.24_ip0.9_gs1.0_s24_PAD16.png (deflated 0%)
  adding: content/boat_out_sd35_i2i_ip4/MVI_0790_VIS_OB_frame205_jpg.rf.ffb3b1e06b4bbc3c6218575f4de1c5a5_PRMPLESS_ORIGSIZE_st0.24_ip0.9_gs1.0_s24_PAD16.png (deflated 0%)
  adding: content/boat_out_sd35_i2i_ip4/MVI_1584_VIS_frame455_jpg.rf.be55fcaa2115ae121576be9c8d6c6ab3_PRMPLESS_ORIGSIZE_st0.24_ip0.9_gs1.0_s24_PAD16.png (deflated 0%)
  adding: content/boat_out_sd35_i2i_ip4/MVI_1523_NIR_frame525_jpg.rf.e0249e1bee87a2a2136fd6cc7a7957d1_PR