# Generative AI Model Exploration

This notebook demonstrates how to use the generative AI model for text generation. It covers loading a trained model, generating text, and visualizing results.

In [None]:
import sys
import os
import torch
import yaml
import matplotlib.pyplot as plt
import numpy as np
from transformers import AutoTokenizer

# Add the project root directory to the Python path
sys.path.append(os.path.abspath('..'))

from src.models.model import GenerativeModel
from src.inference.predict import generate_text

## Load Configuration

In [None]:
def load_config(config_path):
    """Load configuration from YAML file."""
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    return config

# Load model configuration
model_config = load_config('../configs/model_config.yaml')
model_config

## Initialize Model

Here we'll initialize the model from a trained checkpoint. If no checkpoint is available, we'll initialize a new model from the configuration.

In [None]:
def load_model(model_path=None):
    """Load a trained model from checkpoint or initialize a new model."""
    model = GenerativeModel(model_config)
    
    if model_path and os.path.exists(model_path):
        print(f"Loading model from {model_path}")
        checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
        model.load_state_dict(checkpoint['model_state_dict'])
    else:
        print("Initializing new model from configuration")
    
    return model

# Try to load a trained model or initialize a new one
model_path = '../models/model_final.pt'  # Update this path to your model
model = load_model(model_path if os.path.exists(model_path) else None)

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_config["model_name"])

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model.to(device);

## Text Generation

Now let's generate some text using the model.

In [None]:
def generate(input_text, max_length=100, temperature=0.8, top_k=50, top_p=0.95):
    """Generate text based on input."""
    generated_text = generate_text(
        model,
        tokenizer,
        input_text,
        max_length=max_length,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
    )
    return generated_text

# Example input text
input_text = "The future of artificial intelligence is"

# Generate text
generated_text = generate(input_text)
print(f"Generated text:\n{generated_text}")

## Experimenting with Generation Parameters

Let's experiment with different generation parameters to see how they affect the output.

In [None]:
# Test different temperatures
temperatures = [0.5, 0.7, 0.9, 1.2]
results = []

for temp in temperatures:
    generated = generate(input_text, temperature=temp)
    results.append(generated)
    print(f"\nTemperature: {temp}")
    print(f"Generated text: {generated}")

## Visualizing Token Probabilities

Let's visualize the probability distribution for the next token in a sequence.

In [None]:
def get_next_token_probabilities(input_text, top_n=10):
    """Get probability distribution for the next token."""
    # Encode input text
    inputs = tokenizer(input_text, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    # Get model outputs
    with torch.no_grad():
        outputs = model.model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
    
    # Get logits for the last position
    next_token_logits = outputs.logits[0, -1, :]
    
    # Apply softmax to get probabilities
    next_token_probs = torch.softmax(next_token_logits, dim=0)
    
    # Get top tokens and their probabilities
    topk_probs, topk_indices = torch.topk(next_token_probs, top_n)
    
    # Convert to words
    topk_tokens = [tokenizer.decode([idx.item()]).strip() for idx in topk_indices]
    
    return topk_tokens, topk_probs.cpu().numpy()

# Get next token probabilities
tokens, probs = get_next_token_probabilities(input_text)

# Plot probabilities
plt.figure(figsize=(10, 6))
plt.bar(tokens, probs)
plt.title(f'Next token probabilities for: "{input_text}"')
plt.xlabel('Token')
plt.ylabel('Probability')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Conclusion

This notebook demonstrated how to use the generative AI model for text generation and how to experiment with different generation parameters. Further explorations could include fine-tuning the model on specific domains or testing different decoding strategies.