In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

print(torch.__version__)
print(torch.cuda.is_available())

2.6.0+cu118
True


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
import torch
import tqdm

# Load model and tokenizer
MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True)

# Load dataset (using a subset for efficiency)
dataset = load_dataset("squad", split="train[:500]")

  from .autonotebook import tqdm as notebook_tqdm
Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


In [3]:
from tqdm import tqdm

def generate_initial_answers(model, tokenizer, dataset, num_samples=1, max_length=100):
    initial_answers = []
    for sample in tqdm(dataset, desc="Generating Initial Answers", unit="sample"):
        input_text = sample['question']
        inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
        outputs = model.generate(**inputs, max_length=max_length, num_return_sequences=num_samples)
        decoded_answers = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
        initial_answers.append(decoded_answers)
    return initial_answers


In [4]:
def reward_function(predicted, reference):
    return 1 if reference.lower() in predicted.lower() else 0  # Binary reward


In [5]:
def generate_corrections(model, tokenizer, dataset, initial_answers, num_corrections=3, max_length=100):
    corrections = []
    for sample, answers in tqdm(zip(dataset, initial_answers), total=len(dataset), desc="Generating Corrections", unit="sample"):
        question = sample['question']
        corrected_versions = []
        for ans in answers:
            prompt = f"Question: {question}\nInitial Answer: {ans}\nCorrected Answer:"
            inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
            outputs = model.generate(**inputs, max_length=max_length, num_return_sequences=num_corrections)
            corrected_answers = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
            corrected_versions.extend(corrected_answers)
        corrections.append(corrected_versions)
    return corrections

In [6]:
def filter_corrections(initial_answers, corrected_answers, reference_answers):
    filtered_data = []
    for init, corrs, ref in tqdm(zip(initial_answers, corrected_answers, reference_answers), total=len(initial_answers), desc="Filtering Corrections", unit="sample"):
        best_correction = max(corrs, key=lambda x: reward_function(x, ref))
        if reward_function(best_correction, ref) > reward_function(init[0], ref):  # Ensures improvement
            filtered_data.append((init[0], best_correction))
    return filtered_data


In [14]:
def fine_tune_model(model, tokenizer, filtered_data, epochs=3):
    # Convert to dataset format
    train_data = [{"input_text": f"Question: {q}\nCorrected Answer: {a}"} for q, a in filtered_data]
    
    def tokenize_data(sample):
        return tokenizer(sample["input_text"], padding="max_length", truncation=True, max_length=128)
    
    tokenized_dataset = train_data.map(tokenize_data)

    training_args = TrainingArguments(
        output_dir="./stasc_model",
        per_device_train_batch_size=8,
        num_train_epochs=epochs,
        save_steps=500,
        save_total_limit=2,
        evaluation_strategy="epoch",
        logging_dir="./logs"
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset
    )

    for epoch in tqdm(range(epochs), desc="Fine-Tuning Model", unit="epoch"):
        trainer.train()


In [None]:
# Step 1: Generate Initial Answers
initial_answers = generate_initial_answers(model, tokenizer, dataset)

# Step 2: Generate Corrections
corrections = generate_corrections(model, tokenizer, dataset, initial_answers)

# Step 3: Filter Best Corrections
reference_answers = [sample['answers']['text'][0] for sample in dataset]  # Using dataset references
filtered_data = filter_corrections(initial_answers, corrections, reference_answers)

# Step 4: Fine-Tune the Model
fine_tune_model(model, tokenizer, filtered_data)


Generating Initial Answers:   0%|          | 0/500 [00:02<?, ?sample/s]


KeyboardInterrupt: 

: 