In [2]:
# Cell 1: Import necessary libraries
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Define the model checkpoint
model_checkpoint = "microsoft/phi-2"  # Example checkpoint, replace with actual model

# Load the pre-trained model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Check if GPU is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Using device: {device}")

config.json: 100%|██████████| 866/866 [00:00<00:00, 6.18MB/s]
model.safetensors.index.json: 100%|██████████| 35.7k/35.7k [00:00<00:00, 2.67MB/s]
model-00001-of-00002.safetensors: 100%|██████████| 5.00G/5.00G [01:45<00:00, 47.5MB/s]
model-00002-of-00002.safetensors: 100%|██████████| 564M/564M [00:11<00:00, 48.3MB/s]
Downloading shards: 100%|██████████| 2/2 [02:02<00:00, 61.10s/it] 
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.89it/s]
Some weights of the model checkpoint at microsoft/phi-2 were not used when initializing PhiForCausalLM: ['model.layers.5.self_attn.k_proj.weight', 'model.layers.2.self_attn.q_proj.weight', 'model.layers.22.self_attn.q_proj.bias', 'model.layers.17.self_attn.q_proj.bias', 'model.layers.26.self_attn.q_proj.bias', 'model.layers.25.self_attn.v_proj.weight', 'model.layers.18.self_attn.v_proj.weight', 'model.layers.7.self_attn.q_proj.bias', 'model.layers.14.self_attn.k_proj.weight', 'model.layers.31.self_attn.k_proj.bias', 'model.layers.17.self

Using device: cuda


In [3]:
# Cell 2: Define the Self-Play Fine-Tuning (SPIN) function[^1^][1]
def self_play_fine_tuning(model, tokenizer, dataset, num_iterations=3, batch_size=1):
    """
    Perform Self-Play Fine-Tuning (SPIN) on the given model.
    
    Args:
        model: The pre-trained language model.
        tokenizer: The tokenizer for the language model.
        dataset: The dataset to use for fine-tuning, containing prompts and responses.
        num_iterations: The number of self-play iterations to perform.
        batch_size: The batch size for training.
    """
    # Placeholder for the SPIN algorithm implementation
    # This should include generating synthetic data, training the main player,
    # and updating the opponent player as described in the paper[^2^][2].
    pass  # Replace with actual implementation

# Placeholder for the dataset
# Replace with the actual dataset used for fine-tuning
dataset = None

# Example call to the self_play_fine_tuning function
# self_play_fine_tuning(model, tokenizer, dataset)

In [None]:
# Cell 3: Load the dataset
import pandas as pd

# Replace with the actual path to the dataset
dataset_path = "path/to/dataset.csv"

# Load the dataset into a Pandas DataFrame
df = pd.read_csv(dataset_path)

# Preview the first few rows of the dataset
print("Dataset preview:")
print(df.head())

In [None]:
# Cell 4: Prepare the data
from transformers import TextDataset, DataCollatorForLanguageModeling

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"])

tokenized_datasets = df.map(tokenize_function, batched=True)

# Prepare the dataset for training
train_dataset = tokenized_datasets["train"]
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [None]:
# Cell 5: Train the model
from transformers import Trainer, TrainingArguments

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=1,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Define the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator,
)

# Train the model
trainer.train()

In [None]:
# Cell 6: Evaluate the model
import math
import numpy as np
from transformers import pipeline

# Define the evaluation function
def evaluate(model, tokenizer, text):
    # Generate text using the model
    generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
    generated_text = generator(text, max_length=100, do_sample=True, temperature=0.7)

    # Calculate the perplexity of the generated text
    input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
    with torch.no_grad():
        outputs = model(input_ids=input_ids, labels=input_ids)
        loss = outputs.loss
    perplexity = math.exp(loss)

    # Return the generated text and perplexity
    return generated_text[0]["generated_text"], perplexity.item()

# Evaluate the model on a sample text
sample_text = "The quick brown fox jumps over the lazy dog."
generated_text, perplexity = evaluate(model, tokenizer, sample_text)

# Print the generated text and perplexity
print("Generated text:")
print(generated_text)
print(f"Perplexity: {np.round(perplexity, 2)}")