In [None]:
# 1.1  Çakışan Colab paketlerini temizle
!pip uninstall -y -q bigframes gcsfs

# 1.2  datasets'in istediği sürüm aralığında fsspec kur
!pip install -q "fsspec>=2023.1.0,<2025.3.0"

# 1.3  Gerekli kütüphanelerin en yeni sürümleri
!pip install -q --upgrade \
    transformers datasets accelerate evaluate peft pillow huggingface_hub


[0m

In [None]:
import transformers, inspect
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments

print("transformers sürümünüz:", transformers.__version__)
print("Seq2SeqTrainer bulundu:", Seq2SeqTrainer)
print("Seq2SeqTrainingArguments bulundu:", Seq2SeqTrainingArguments)
print("TrainingArguments init parametreleri:",
      list(inspect.signature(Seq2SeqTrainingArguments).parameters.keys())[:10], "…")


transformers sürümünüz: 4.51.3
Seq2SeqTrainer bulundu: <class 'transformers.trainer_seq2seq.Seq2SeqTrainer'>
Seq2SeqTrainingArguments bulundu: <class 'transformers.training_args_seq2seq.Seq2SeqTrainingArguments'>
TrainingArguments init parametreleri: ['output_dir', 'overwrite_output_dir', 'do_train', 'do_eval', 'do_predict', 'eval_strategy', 'prediction_loss_only', 'per_device_train_batch_size', 'per_device_eval_batch_size', 'per_gpu_train_batch_size'] …


In [None]:
from datasets import load_dataset
from transformers import (
    BlipProcessor, BlipForConditionalGeneration
)

# 2.1  Model + processor
MODEL_ID = "Salesforce/blip-image-captioning-base"
processor = BlipProcessor.from_pretrained(MODEL_ID)
model     = BlipForConditionalGeneration.from_pretrained(MODEL_ID)

# 2.2  Flickr8k (%90 train, %10 val) — caption_0 sütunu var
train = load_dataset("jxie/flickr8k", split="train[:90%]")
val   = load_dataset("jxie/flickr8k", split="train[90%:]")

# 2.3  Dönüştürme fonksiyonu
def transform(batch):
    enc = processor(
        images=batch["image"],
        text=batch["caption_0"],          # her görüntü için ilk caption
        padding="max_length",
        truncation=True,
        max_length=30,
        return_tensors="pt"
    )
    enc["labels"] = enc["input_ids"].clone()  # labels ve input_ids aynı kalsın
    return enc                                # input_ids'i SİLME!

train = train.map(transform, batched=True, remove_columns=train.column_names)
val   = val.map(transform,   batched=True, remove_columns=val.column_names)

train.set_format("torch", columns=["pixel_values", "input_ids", "labels"])
val.set_format("torch",   columns=["pixel_values", "input_ids", "labels"])


Map:   0%|          | 0/5400 [00:00<?, ? examples/s]

Map:   0%|          | 0/600 [00:00<?, ? examples/s]

In [None]:
from transformers import (
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq
)

# 3.1  Eğitim argümanları
args = Seq2SeqTrainingArguments(
    output_dir="blip_ft",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=1,
    learning_rate=1e-5,
    fp16=True,                       # GPU hız
    remove_unused_columns=False,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=100,
    report_to="none"
)

# 3.2  Data collator — gerçek tokenizer (processor.tokenizer) ile
text_tokenizer = processor.tokenizer
data_collator  = DataCollatorForSeq2Seq(
    tokenizer=text_tokenizer,
    model=model,
    label_pad_token_id=-100
)

# 3.3  Seq2SeqTrainer
trainer = Seq2SeqTrainer(
    model=model,
    args=args,
    train_dataset=train,
    eval_dataset=val,
    data_collator=data_collator,
    tokenizer=text_tokenizer          # FutureWarning olabilir, sorun değil
)

# 3.4  Eğitimi başlat
trainer.train()
trainer.save_model("blip_ft")
print("✅ Fine‑tune tamam — blip_ft/ klasörü oluştu")


  trainer = Seq2SeqTrainer(
  batch["labels"] = torch.tensor(batch["labels"], dtype=torch.int64)


Epoch,Training Loss,Validation Loss
1,1.3933,1.290185


✅ Fine‑tune tamam — blip_ft/ klasörü oluştu


In [None]:
from datasets import load_dataset
import torch, evaluate
from tqdm.auto import tqdm

# -- 1. mapping öncesi: referanslar için
val_raw = load_dataset("jxie/flickr8k", split="train[90%:]")

# -- 2. mapping sonrası: tensörler için (varsa yükle, yoksa yeniden oluştur)
# val_ds = ... (senin fine-tune mapping sonrası kullandığın validation dataset)

bleu    = evaluate.load("bleu")
meteor  = evaluate.load("meteor")
rouge   = evaluate.load("rouge")

preds, refs = [], []

for i, ex in enumerate(tqdm(val_ds, desc="Val set tahmin")):
    pixel_values = ex["pixel_values"].unsqueeze(0).to(model.device)

    with torch.no_grad():
        gen = model.generate(
            pixel_values=pixel_values,
            num_beams=5, max_length=30
        )
    pred = processor.decode(gen[0], skip_special_tokens=True)
    preds.append(pred)
    refs.append([val_raw[i]["caption_0"]])   # her tahmine doğru referans eklenir

# Uzunluklar aynı mı kontrol et!
print("Tahmin sayısı:", len(preds))
print("Referans sayısı:", len(refs))

# Hesapla
print("BLEU    :", bleu.compute(predictions=preds, references=refs)["bleu"])
print("METEOR  :", meteor.compute(predictions=preds, references=refs)["meteor"])
print("ROUGE-L :", rouge.compute(predictions=preds, references=refs)["rougeL"])


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Val set tahmin:   0%|          | 0/600 [00:00<?, ?it/s]

Tahmin sayısı: 600
Referans sayısı: 600
BLEU    : 0.11185973465733956
METEOR  : 0.370355083016415
ROUGE-L : 0.4178894685575346


In [None]:
from huggingface_hub import login

login()
# Çalıştırınca "Token:" isteyen bir kutu açılır → oraya access token’ı gir.


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from huggingface_hub import HfApi

api = HfApi()
api.create_repo(
    repo_id="blip-flickr8k-ft",  # Sadece model adı!
    private=True                 # Gizli (sadece sen görebilirsin)
)


RepoUrl('https://huggingface.co/ialper/blip-flickr8k-ft', endpoint='https://huggingface.co', repo_type='model', repo_id='ialper/blip-flickr8k-ft')

In [None]:
from huggingface_hub import upload_folder

REPO = "ialper/blip-flickr8k-ft"
upload_folder(
    repo_id=REPO,
    repo_type="model",
    folder_path="blip_ft",   # Model klasörünün adı
    path_in_repo=""
)
print(f"🎉 Model yüklendi: https://huggingface.co/{REPO}")


optimizer.pt:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

Upload 8 LFS files:   0%|          | 0/8 [00:00<?, ?it/s]

scaler.pt:   0%|          | 0.00/988 [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

🎉 Model yüklendi: https://huggingface.co/ialper/blip-flickr8k-ft
