In [None]:
# %pip install transformers datasets torch --quiet

Note: you may need to restart the kernel to use updated packages.


In [None]:
from datasets import load_dataset

# Load your anime dataset (train split)
dataset = load_dataset("none-yet/anime-captions", split="train")

print("Total samples:", len(dataset))

: 

In [None]:
from transformers import GitProcessor, GitForCausalLM

processor = GitProcessor.from_pretrained("microsoft/git-base")
model = GitForCausalLM.from_pretrained("microsoft/git-base")


In [None]:
# %pip install peft

In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,          # rank of LoRA update matrices
    lora_alpha=16,
    target_modules=["q_proj","v_proj"],  # modules to fine-tune
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


In [None]:
from PIL import Image
import requests

def preprocess(example):
    img_data = example['image']
    
    # If it's a string, it could be a URL or file path
    if isinstance(img_data, str):
        if img_data.startswith("http"):
            image = Image.open(requests.get(img_data, stream=True).raw).convert("RGB")
        else:
            image = Image.open(img_data).convert("RGB")
    else:
        # Already a PIL Image
        image = img_data.convert("RGB")
    
    # Encode image + caption
    inputs = processor(images=image, text=example['text'], return_tensors="pt")
    return {
        "input_ids": inputs.input_ids[0],
        "attention_mask": inputs.attention_mask[0]
    }


In [None]:
# %pip install "accelerate>=0.26.0" --quiet


In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./git-anime-finetuned",
    per_device_train_batch_size=2,  # lower if GPU memory is small
    gradient_accumulation_steps=4,  # simulate larger batch size
    learning_rate=5e-5,
    num_train_epochs=3,
    fp16=True,  # use mixed precision if GPU supports
    save_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset
)

trainer.train()


In [None]:
model.save_pretrained("./git-anime-finetuned")
processor.save_pretrained("./git-anime-finetuned")
