In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load a pre-trained model for reward estimation
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
reward_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=1)

# Sample training data (prompt-response pairs with human-labeled scores)
data = [
    ("What is AI?", "AI stands for Artificial Intelligence.", 0.9),
    ("What is deep learning?", "Deep learning is a subset of machine learning.", 0.8),
    ("Tell me a joke.", "Why did the chicken cross the road?", 0.6)
]

# Convert data into tensors
inputs = tokenizer([d[1] for d in data], padding=True, truncation=True, return_tensors="pt")
labels = torch.tensor([d[2] for d in data]).unsqueeze(1)

# Define optimizer and loss function
optimizer = optim.AdamW(reward_model.parameters(), lr=5e-5)
criterion = nn.MSELoss()

# Training loop
reward_model.train()
for epoch in range(3):
    optimizer.zero_grad()
    outputs = reward_model(**inputs).logits
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")


  from .autonotebook import tqdm as notebook_tqdm
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.5691652297973633
Epoch 2, Loss: 0.34353339672088623
Epoch 3, Loss: 0.11656033992767334


In [2]:
from transformers import AutoModelForCausalLM
import random

# Load a small generative model (e.g., GPT-2)
generative_model = AutoModelForCausalLM.from_pretrained("gpt2")

def generate_samples(prompt, num_samples=5):
    """Generate multiple responses for a given prompt."""
    return [f"Generated response {i} for {prompt}" for i in range(num_samples)]

def rank_samples(samples):
    """Rank samples using the reward model."""
    inputs = tokenizer(samples, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        rewards = reward_model(**inputs).logits.squeeze()
    return [samples[i] for i in torch.argsort(rewards, descending=True)]

def fine_tune_model(model, dataset):
    """Fine-tune the generative model on high-reward samples."""
    model.train()
    optimizer = optim.AdamW(model.parameters(), lr=1e-5)
    
    for epoch in range(3):
        for sample in dataset:
            inputs = tokenizer(sample, return_tensors="pt")
            labels = inputs["input_ids"].clone()
            outputs = model(**inputs, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1} Fine-tuning loss: {loss.item()}")

# RAFT loop
for iteration in range(5):
    print(f"Iteration {iteration+1}...")
    prompt = "What is artificial intelligence?"
    samples = generate_samples(prompt)
    ranked_samples = rank_samples(samples)[:3]  # Select top-3 responses
    fine_tune_model(generative_model, ranked_samples)

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Iteration 1...
Epoch 1 Fine-tuning loss: 11.924062728881836
Epoch 2 Fine-tuning loss: 11.158195495605469
Epoch 3 Fine-tuning loss: 10.423539161682129
Iteration 2...
Epoch 1 Fine-tuning loss: 8.566413879394531
Epoch 2 Fine-tuning loss: 8.518267631530762
Epoch 3 Fine-tuning loss: 7.131834506988525
Iteration 3...
Epoch 1 Fine-tuning loss: 6.632737159729004
Epoch 2 Fine-tuning loss: 5.000913143157959
Epoch 3 Fine-tuning loss: 4.756820201873779
Iteration 4...
Epoch 1 Fine-tuning loss: 4.7517194747924805
Epoch 2 Fine-tuning loss: 4.390162944793701
Epoch 3 Fine-tuning loss: 3.733025312423706
Iteration 5...
Epoch 1 Fine-tuning loss: 2.687434673309326
Epoch 2 Fine-tuning loss: 1.3824565410614014
Epoch 3 Fine-tuning loss: 1.3723318576812744


In [4]:
from transformers import CLIPProcessor, CLIPModel
import requests
from PIL import Image

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

def compute_reward(image, text):
    """Compute the similarity score between an image and text."""
    inputs = clip_processor(text=[text], images=image, return_tensors="pt")
    scores = clip_model(**inputs).logits_per_image
    return scores.item()

# Example: Evaluating a generated image
image_path = "anime_boy.png"
image = Image.open(image_path)
reward_score = compute_reward(image, "A high-quality anime-style portrait")
print("Reward Score:", reward_score)


Reward Score: 27.507169723510742
