In [None]:
# Implementation of framework
import torch
import numpy as np
import matplotlib.pyplot as plt
from transformers import AutoModelForCausalLM, AutoTokenizer
class SemanticICLFramework:
    def __init__(self, model_name):
        """Initialize framework with pretrained model."""
        self.model_name = model_name
        self.model = AutoModelForCausalLM.from_pretrained(model_name, output_attentions=True)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.tokenizer.pad_token = self.tokenizer.eos_token

    def analyze_attention_heads(self, input_ids, attention_mask):
        """Analyze attention patterns from model."""
        outputs = self.model(input_ids, attention_mask=attention_mask, output_attentions=True)
        attentions = outputs.attentions
        
        # Get OV circuits for all layers
        ov_circuits = []
        for i in range(len(attentions)):
            ov_circuits.append(self.model.state_dict()[f"transformer.h.{i}.attn.c_proj.weight"])
            
        return attentions, ov_circuits

    def compute_relation_index(self, attentions, ov_circuits, triplets, input_ids):
        vocab_matrix = self.model.get_output_embeddings().weight
        num_layers = len(attentions)
        num_heads = attentions[0].size(1) 
        relation_indices = np.zeros((num_layers, num_heads))
        
        tokens = [self.tokenizer.decode(t) for t in input_ids[0].cpu().numpy()]
        
        for layer_idx, layer_attn in enumerate(attentions):
            for head_idx in range(num_heads):
                head_scores = []
                
                for triplet in triplets:
                    head, _, tail = triplet
                    
                    head_positions = [i for i, t in enumerate(tokens) if head in t]
                    tail_positions = [i for i, t in enumerate(tokens) if tail in t]
                    
                    if not head_positions or not tail_positions:
                        continue
                        
                    head_idx_pos = head_positions[0] 
                    tail_idx_pos = tail_positions[0]

                    attention_weights = layer_attn[0, head_idx, :, :].detach().cpu().numpy()
                    
                    # Get OV influence and normalize by maximum value
                    ov_influence = torch.matmul(ov_circuits[layer_idx][head_idx], vocab_matrix.T)
                    ov_influence = ov_influence / ov_influence.max()
                    
                    # Calculate normalized score 
                    qk_score = attention_weights[tail_idx_pos, head_idx_pos]
                    ov_score = ov_influence[tail_idx_pos].item()
                    head_scores.append(qk_score * ov_score)

                # Take mean across triplets instead of sum
                if head_scores:
                    relation_indices[layer_idx, head_idx] = np.mean(head_scores)

        # Normalize final scores to [0,1] range
        relation_indices = (relation_indices - relation_indices.min()) / (relation_indices.max() - relation_indices.min())
        
        return relation_indices
# Usage example:
def visualize_relation_indices(framework, relation_indices):
    display(create_heatmap_html(relation_indices))


# First, import required libraries and create helper functions
from IPython.display import HTML
import json

def create_heatmap_html(relation_indices, title="Semantic Relationship Analysis"):
    # Convert numpy array to list for JSON serialization
    data = relation_indices.tolist()
    
    # Create the HTML string
    html = f"""
    <div style="width: 800px; font-family: Arial, sans-serif;">
        <div style="margin-bottom: 20px;">
            <h2 style="color: #333;">{title}</h2>
            <p style="color: #666;">Analyzing how attention heads encode semantic relationships</p>
        </div>
        
        <div style="display: flex; gap: 20px;">
            <!-- Heatmap -->
            <div style="flex: 2;">
                <div id="heatmap" style="position: relative;">
                    <!-- Heatmap will be rendered here -->
                </div>
                <div style="margin-top: 10px; text-align: center; color: #666;">
                    <div>Attention Heads</div>
                </div>
                <div style="position: absolute; left: -30px; top: 50%; transform: rotate(-90deg); color: #666;">
                    Layers
                </div>
            </div>
            
            <!-- Explanation Panel -->
            <div style="flex: 1; background: #f5f5f5; padding: 15px; border-radius: 5px;">
                <h3 style="margin-top: 0;">How to Read This Visualization</h3>
                <ul style="padding-left: 20px; color: #444;">
                    <li>Each cell represents an attention head</li>
                    <li>Darker blue indicates stronger relationship encoding</li>
                    <li>Rows represent layers in the transformer</li>
                    <li>Columns show attention heads within each layer</li>
                </ul>
                
                <h3>Key Patterns</h3>
                <ul style="padding-left: 20px; color: #444;">
                    <li>Middle layers often encode more semantic relationships</li>
                    <li>Some heads specialize in specific relationships</li>
                    <li>Earlier layers capture simpler relationships</li>
                    <li>Later layers show more complex patterns</li>
                </ul>
            </div>
        </div>
    </div>

    <script>
        // Create heatmap
        const data = {json.dumps(data)};
        const container = document.getElementById('heatmap');
        
        // Create grid of cells
        data.forEach((row, i) => {{
            const rowDiv = document.createElement('div');
            rowDiv.style.display = 'flex';
            rowDiv.style.height = '20px';
            
            row.forEach((value, j) => {{
                const cell = document.createElement('div');
                cell.style.width = '20px';
                cell.style.height = '100%';
                cell.style.backgroundColor = `rgb(${{Math.floor(255 * (1 - value))}}, ${{Math.floor(255 * (1 - value))}}, 255)`;
                cell.style.border = '1px solid white';
                cell.title = `Layer ${{i+1}}, Head ${{j+1}}: ${{value.toFixed(3)}}`;
                rowDiv.appendChild(cell);
            }});
            
            container.appendChild(rowDiv);
        }});
    </script>
    """
    return HTML(html)



# Initialize framework with GPT2-XL
framework = SemanticICLFramework("gpt2-xl")

# Example semantic relationship triplets
examples = [
    {
        "text": "The pen is used for writing.",
        "triplets": [("pen", "Used-for", "writing")]
    },
    {
        "text": "A cat chases a mouse.", 
        "triplets": [("cat", "Chases", "mouse")]
    }
]

relation_indices_list = []

# Process examples
for example in examples:
    # Tokenize input
    tokens = framework.tokenizer(example["text"], 
                                return_tensors="pt",
                                truncation=True, 
                                padding=True)
    
    # Get attention patterns
    attentions, ov_circuits = framework.analyze_attention_heads(
        tokens["input_ids"], 
        tokens["attention_mask"]
    )
    
    # Compute relation indices
    relation_indices = framework.compute_relation_index(
        attentions,
        ov_circuits, 
        example["triplets"],
        tokens["input_ids"]
    )
    
    relation_indices_list.append(relation_indices)

# Average results and visualize
mean_relation_indices = np.mean(relation_indices_list, axis=0)
# After computing relation indices:
visualize_relation_indices(framework, relation_indices)





In [67]:
import random
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
from dataclasses import dataclass
from typing import List, Dict
import torch
from IPython.display import HTML, display
from jinja2 import Template


@dataclass
class ModelConfig:
    model_name: str
    n_prefix_tokens: int = 10
    learning_rate: float = 1e-4


class ConceptLearner:
    def __init__(self, config: ModelConfig):
        self.config = config
        self.model = AutoModelForCausalLM.from_pretrained(config.model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        self.model.resize_token_embeddings(len(self.tokenizer))

    def add_concept_tokens(self, tasks: List[str]):
        """
        Add new concept tokens for each task.
        """
        new_tokens = [f"<{task}_token_{i}>" for task in tasks for i in range(self.config.n_prefix_tokens)]
        self.tokenizer.add_tokens(new_tokens)
        self.model.resize_token_embeddings(len(self.tokenizer))

    def compute_token_contributions(self, text: str) -> List[Dict]:
        """
        Compute token contributions using self-attention scores aggregated across layers.
        """
        inputs = self.tokenizer(text, return_tensors="pt")
        outputs = self.model(**inputs, output_attentions=True)

        # Aggregate self-attention contributions across all layers
        layer_contributions = [
            layer_att.mean(dim=1).diagonal(offset=0, dim1=-2, dim2=-1)  # Self-attention per token
            for layer_att in outputs.attentions
        ]  # List of tensors with shape [batch_size, seq_length]
        
        attention_importance = torch.stack(layer_contributions, dim=0).mean(dim=0)  # Aggregate across layers

        # Normalize scores for better interpretability
        normalized_importance = attention_importance / attention_importance.sum(dim=-1, keepdim=True)

        # Map tokens to scores
        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"].squeeze(0))
        return [{"token": token, "score": float(score)} for token, score in zip(tokens, normalized_importance.squeeze().tolist())]


class DemonstrationSelector:
    def __init__(self, concept_learner: ConceptLearner):
        self.concept_learner = concept_learner

    def select_demonstrations(self, candidates: List[Dict], k: int) -> List[Dict]:
        """
        Select top-k demonstrations based on their scores.
        """
        scores = [(self._compute_demo_score(c), c) for c in candidates]
        scores.sort(reverse=True, key=lambda x: x[0])
        return [c for _, c in scores[:k]]
    def _compute_demo_score(self, demo: Dict) -> float:
        """
        Compute demonstration score based on task-specific alignment with latent variables.
        """
        inputs = self.concept_learner.tokenizer(demo["input"], return_tensors="pt")
        outputs = self.concept_learner.model(**inputs, output_hidden_states=True, output_attentions=True)

        # Extract logits and probabilities
        logits = outputs.logits[:, -1, :]  # Logits for the last token
        probs = torch.softmax(logits, dim=-1)

        # Tokenize the expected label
        label_tokens = self.concept_learner.tokenizer(
            demo["output"], add_special_tokens=False
        )["input_ids"]

        # Filter out-of-bounds tokens
        vocab_size = self.concept_learner.tokenizer.vocab_size
        label_tokens = [token_id for token_id in label_tokens if token_id < vocab_size]

        if not label_tokens:
            raise ValueError(f"No valid tokens found for output: {demo['output']}")

        # Aggregate probabilities for all valid label tokens
        label_probs = [probs[0, token_id].item() for token_id in label_tokens]

        # Return the average probability for the label
        return sum(label_probs) / len(label_probs)



    def evaluate_demonstrations(self, test_set: List[Dict], selected_demos: List[Dict]):
        """
        Evaluate selected demonstrations on a test set.
        """
        print("Evaluating selected demonstrations on test set...")
        for example in test_set:
            print(f"Test Input: {example['input']}, Expected Output: {example['output']}")
        print("Evaluation complete.")


class DashboardVisualizer:
    def __init__(self, demonstration_scores):
        """
        Initialize DashboardVisualizer with data for visualization.
        """
        self.demonstration_scores = demonstration_scores  # Scores for all demonstrations

    def create_dashboard(self):
        """
        Create an interactive HTML dashboard.
        """
        html_template = '''
        <div style="font-family: Arial, sans-serif; max-width: 900px; margin: 20px auto;">
            <style>
                .section-title {
                    font-weight: bold;
                    margin: 10px 0;
                    color: #2c5282;
                    font-size: 20px;
                }
                .demonstration-card {
                    margin: 10px 0;
                    padding: 15px;
                    border-radius: 8px;
                    background-color: #f8f9fa;
                    border: 1px solid #ddd;
                }
                .demonstration-header {
                    font-size: 16px;
                    font-weight: bold;
                    color: #2c5282;
                    margin-bottom: 10px;
                }
                .token-container {
                    margin-top: 10px;
                    display: flex;
                    flex-wrap: wrap;
                }
                .token {
                    display: inline-block;
                    margin: 5px;
                    padding: 5px 8px;
                    border-radius: 4px;
                    font-size: 14px;
                    position: relative;
                    cursor: pointer;
                    background-color: rgb(255, 255, 255);
                }
                .token[data-score] {
                    background-color: rgba(255, 69, 0, calc(var(--score) * 0.8 + 0.2));
                    color: black;
                }
                .token:hover {
                    background-color: rgba(255, 0, 0, 1);
                }
                .token:hover .tooltip {
                    display: block;
                }
                .tooltip {
                    display: none;
                    position: absolute;
                    top: -30px;
                    left: 50%;
                    transform: translateX(-50%);
                    background-color: #333;
                    color: white;
                    padding: 5px 8px;
                    border-radius: 4px;
                    font-size: 12px;
                    z-index: 10;
                }
            </style>
            
            <div class="section-title">Selected Demonstrations and Token Contributions</div>
            <div id="demonstration-container">
                {% for demo in demonstration_scores %}
                <div class="demonstration-card">
                    <div class="demonstration-header">Demonstration Score: {{ "%.3f"|format(demo.score) }}</div>
                    <div>
                        <b>Token Contributions:</b>
                        <div class="token-container">
                            {% for token in demo.token_contributions %}
                            <div class="token" style="--score: {{ token.score }}" data-score="{{ token.score }}">
                                {{ token.token }}
                                <span class="tooltip">Score: {{ "%.2f"|format(token.score) }}</span>
                            </div>
                            {% endfor %}
                        </div>
                    </div>
                </div>
                {% endfor %}
            </div>
        </div>
        '''
        template = Template(html_template)
        rendered_html = template.render(
            demonstration_scores=self.demonstration_scores,
        )
        display(HTML(rendered_html))



set_seed(42)

# Configuration
config = ModelConfig(model_name="gpt2", n_prefix_tokens=5, learning_rate=1e-3)
concept_learner = ConceptLearner(config)
demonstration_selector = DemonstrationSelector(concept_learner)

# Example Demonstrations
candidates = [
    {"input": "The movie was great.", "output": "positive"},
    {"input": "The book was boring.", "output": "negative"},
    {"input": "The article explained AI clearly.", "output": "informative"},
    {"input": "The novel was thrilling.", "output": "exciting"},
]

# Test Set
test_set = [
    {"input": "The film was engaging.", "output": "positive"},
    {"input": "The explanation was dry.", "output": "negative"},
]

# Select Demonstrations
selected_demos = demonstration_selector.select_demonstrations(candidates, k=2)

# Evaluate Selected Demonstrations
demonstration_selector.evaluate_demonstrations(test_set, selected_demos)

# Compute Token Contributions and Scores Dynamically
demonstration_scores = []
for demo in selected_demos:
    token_contributions = concept_learner.compute_token_contributions(demo["input"])
    demo_score = demonstration_selector._compute_demo_score(demo)
    demonstration_scores.append({"score": demo_score, "token_contributions": token_contributions})

# Create and Display Dashboard
dashboard = DashboardVisualizer(demonstration_scores=demonstration_scores)
dashboard.create_dashboard()


#


Evaluating selected demonstrations on test set...
Test Input: The film was engaging., Expected Output: positive
Test Input: The explanation was dry., Expected Output: negative
Evaluation complete.


In [72]:
import random
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
from dataclasses import dataclass
from typing import List, Dict
import torch
from IPython.display import HTML, display
from jinja2 import Template


@dataclass
class ModelConfig:
    model_name: str
    n_prefix_tokens: int = 10
    learning_rate: float = 1e-4


class ConceptLearner:
    def __init__(self, config: ModelConfig):
        self.config = config
        self.model = AutoModelForCausalLM.from_pretrained(
            config.model_name, output_hidden_states=True, output_attentions=True
        )
        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        self.model.resize_token_embeddings(len(self.tokenizer))

    def add_concept_tokens(self, tasks: List[str]):
        """
        Add new concept tokens for each task.
        """
        new_tokens = [f"<{task}_token_{i}>" for task in tasks for i in range(self.config.n_prefix_tokens)]
        self.tokenizer.add_tokens(new_tokens)
        self.model.resize_token_embeddings(len(self.tokenizer))

    def compute_token_contributions(self, text: str) -> List[Dict]:
        """
        Compute token contributions using task-specific latent variable alignment.
        """
        inputs = self.tokenizer(text, return_tensors="pt")
        outputs = self.model(**inputs)

        # Extract hidden states and attentions
        attentions = outputs.attentions  # Shape: [num_layers, batch_size, num_heads, seq_length, seq_length]
        hidden_states = outputs.hidden_states  # Shape: [num_layers+1, batch_size, seq_length, hidden_dim]

        # Compute attention contributions
        layer_contributions = [
            layer_att.mean(dim=1).sum(dim=2)  # Sum over tokens (cross-token contributions)
            for layer_att in attentions
        ]  # List of [batch_size, seq_length] tensors

        # Aggregate across layers with task-specific weighting
        num_layers = len(attentions)
        layer_weights = torch.linspace(0.1, 1.0, steps=num_layers).to(layer_contributions[0].device)
        weighted_contributions = torch.stack(layer_contributions, dim=0) * layer_weights[:, None, None]
        token_contributions = weighted_contributions.sum(dim=0)  # Shape: [batch_size, seq_length]

        # Normalize contributions
        normalized_contributions = token_contributions / token_contributions.sum(dim=1, keepdim=True)

        # Map tokens to scores
        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"].squeeze(0))
        return [{"token": token, "score": float(score)} for token, score in zip(tokens, normalized_contributions.squeeze().tolist())]


class DemonstrationSelector:
    def __init__(self, concept_learner: ConceptLearner):
        self.concept_learner = concept_learner

    def _compute_demo_score(self, demo: Dict) -> float:
        """
        Compute demonstration score using latent variable alignment.
        """
        inputs = self.concept_learner.tokenizer(demo["input"], return_tensors="pt")
        outputs = self.concept_learner.model(**inputs)

        # Extract hidden states
        hidden_states = outputs.hidden_states[-1]  # Last layer hidden states

        # Task-specific embedding (latent variable representation)
        task_embedding = self._get_task_embedding(demo["output"])

        # Compute alignment score (cosine similarity)
        alignment_score = torch.cosine_similarity(hidden_states.mean(dim=1), task_embedding, dim=-1).item()
        return alignment_score

    def _get_task_embedding(self, task_label: str) -> torch.Tensor:
        """
        Generate task-specific embedding (e.g., using the output label).
        """
        task_input = self.concept_learner.tokenizer(task_label, return_tensors="pt")
        task_hidden = self.concept_learner.model(**task_input).hidden_states[-1]
        return task_hidden.mean(dim=1)  # Aggregate embedding for the task

    def select_demonstrations(self, candidates: List[Dict], k: int) -> List[Dict]:
        """
        Select top-k demonstrations based on their scores.
        """
        scores = [(self._compute_demo_score(c), c) for c in candidates]
        scores.sort(reverse=True, key=lambda x: x[0])
        return [c for _, c in scores[:k]]


class DashboardVisualizer:
    def __init__(self, demonstration_scores):
        """
        Initialize DashboardVisualizer with data for visualization.
        """
        self.demonstration_scores = demonstration_scores  # Scores for all demonstrations

    def create_dashboard(self):
        """
        Create an interactive HTML dashboard.
        """
        html_template = '''
        <div style="font-family: Arial, sans-serif; max-width: 900px; margin: 20px auto;">
            <style>
                .section-title {
                    font-weight: bold;
                    margin: 10px 0;
                    color: #2c5282;
                    font-size: 20px;
                }
                .demonstration-card {
                    margin: 10px 0;
                    padding: 15px;
                    border-radius: 8px;
                    background-color: #f8f9fa;
                    border: 1px solid #ddd;
                }
                .demonstration-header {
                    font-size: 16px;
                    font-weight: bold;
                    color: #2c5282;
                    margin-bottom: 10px;
                }
                .token-container {
                    margin-top: 10px;
                    display: flex;
                    flex-wrap: wrap;
                }
                .token {
                    display: inline-block;
                    margin: 5px;
                    padding: 5px 8px;
                    border-radius: 4px;
                    font-size: 14px;
                    position: relative;
                    cursor: pointer;
                    background-color: rgb(255, 255, 255);
                }
                .token[data-score] {
                    background-color: rgba(255, 69, 0, calc(var(--score) * 0.8 + 0.2));
                    color: black;
                }
                .token:hover {
                    background-color: rgba(255, 0, 0, 1);
                }
                .token:hover .tooltip {
                    display: block;
                }
                .tooltip {
                    display: none;
                    position: absolute;
                    top: -30px;
                    left: 50%;
                    transform: translateX(-50%);
                    background-color: #333;
                    color: white;
                    padding: 5px 8px;
                    border-radius: 4px;
                    font-size: 12px;
                    z-index: 10;
                }
            </style>
            
            <div class="section-title">Selected Demonstrations and Token Contributions</div>
            <div id="demonstration-container">
                {% for demo in demonstration_scores %}
                <div class="demonstration-card">
                    <div class="demonstration-header">Demonstration Score: {{ "%.3f"|format(demo['score']) }}</div>
                    <div>
                        <b>Token Contributions:</b>
                        <div class="token-container">
                            {% for token in demo['token_contributions'] %}
                            <div class="token" style="--score: {{ token['score'] }}" data-score="{{ token['score'] }}">
                                {{ token['token'] }}
                                <span class="tooltip">Score: {{ "%.2f"|format(token['score']) }}</span>
                            </div>
                            {% endfor %}
                        </div>
                    </div>
                </div>
                {% endfor %}
            </div>
        </div>
        '''
        template = Template(html_template)
        rendered_html = template.render(
            demonstration_scores=self.demonstration_scores,
        )
        display(HTML(rendered_html))


# Seed for reproducibility
set_seed(42)

# Configuration
config = ModelConfig(model_name="gpt2", n_prefix_tokens=5, learning_rate=1e-3)
concept_learner = ConceptLearner(config)
demonstration_selector = DemonstrationSelector(concept_learner)

# Example Demonstrations
candidates = [
    {"input": "The movie was great.", "output": "positive"},
    {"input": "The book was boring.", "output": "negative"},
    {"input": "The article explained AI clearly.", "output": "informative"},
    {"input": "The novel was thrilling.", "output": "exciting"},
]

# Test Set
test_set = [
    {"input": "The film was engaging.", "output": "positive"},
    {"input": "The explanation was dry.", "output": "negative"},
]

# Select Demonstrations
selected_demos = demonstration_selector.select_demonstrations(candidates, k=2)

# Compute Token Contributions and Scores Dynamically
demonstration_scores = []
for demo in selected_demos:
    token_contributions = concept_learner.compute_token_contributions(demo["input"])
    demo_score = demonstration_selector._compute_demo_score(demo)
    demonstration_scores.append({"score": demo_score, "token_contributions": token_contributions})

# Create and Display Dashboard
dashboard = DashboardVisualizer(demonstration_scores=demonstration_scores)
dashboard.create_dashboard()


In [74]:
class DemonstrationSelector:
    def __init__(self, concept_learner: ConceptLearner):
        self.concept_learner = concept_learner

    def select_demonstrations(self, candidates: List[Dict], k: int):
        """
        Select top-k demonstrations based on model scores.
        """
        scores = [(self._compute_concept_score(c), c) for c in candidates]
        scores.sort(reverse=True, key=lambda x: x[0])
        return [c for _, c in scores[:k]]

    def _compute_concept_score(self, candidate: Dict):
        """
        Compute demonstration relevance score using log probabilities.
        """
        inputs = self.concept_learner.tokenizer(candidate["input"], return_tensors="pt")
        outputs = self.concept_learner.model(**inputs)
        logits = outputs.logits[:, :-1, :]  # Skip the last token prediction
        probs = torch.softmax(logits, dim=-1)

        # Tokenize output and calculate score for output tokens
        target_ids = self.concept_learner.tokenizer(candidate["output"], return_tensors="pt")["input_ids"].squeeze(0)
        scores = [probs[0, i, target_id].item() for i, target_id in enumerate(target_ids)]
        return sum(scores) / len(scores)  # Average log probabilities

    def evaluate_demonstrations(self, test_set: List[Dict], selected_demos: List[Dict]):
        """
        Evaluate selected demonstrations on a test set.
        """
        total_loss = 0
        for example in test_set:
            inputs = self.concept_learner.tokenizer(
                selected_demos + [example["input"]], return_tensors="pt", truncation=True
            )
            outputs = self.concept_learner.model(**inputs, labels=inputs["input_ids"])
            total_loss += outputs.loss.item()

        avg_loss = total_loss / len(test_set)
        print(f"Evaluation Loss: {avg_loss}")
        return avg_loss


class DashboardVisualizer:
    def __init__(self, demonstration_scores):
        """
        Initialize DashboardVisualizer with data for visualization.
        """
        self.demonstration_scores = demonstration_scores  # Scores for all demonstrations

    def create_dashboard(self):
        """
        Create an interactive HTML dashboard.
        """
        html_template = '''
        <div style="font-family: Arial, sans-serif; max-width: 900px; margin: 20px auto;">
            <style>
                .section-title {
                    font-weight: bold;
                    margin: 10px 0;
                    color: #2c5282;
                    font-size: 20px;
                }
                .demonstration-card {
                    margin: 10px 0;
                    padding: 15px;
                    border-radius: 8px;
                    background-color: #f8f9fa;
                    border: 1px solid #ddd;
                }
                .demonstration-header {
                    font-size: 16px;
                    font-weight: bold;
                    color: #2c5282;
                    margin-bottom: 10px;
                }
                .token-container {
                    margin-top: 10px;
                    display: flex;
                    flex-wrap: wrap;
                }
                .token {
                    display: inline-block;
                    margin: 5px;
                    padding: 5px 8px;
                    border-radius: 4px;
                    font-size: 14px;
                    position: relative;
                    cursor: pointer;
                    background-color: rgb(255, 255, 255);
                }
                .token[data-score] {
                    background-color: rgba(255, 69, 0, calc(var(--score) * 0.8 + 0.2));
                    color: black;
                }
                .token:hover {
                    background-color: rgba(255, 0, 0, 1);
                }
                .token:hover .tooltip {
                    display: block;
                }
                .tooltip {
                    display: none;
                    position: absolute;
                    top: -30px;
                    left: 50%;
                    transform: translateX(-50%);
                    background-color: #333;
                    color: white;
                    padding: 5px 8px;
                    border-radius: 4px;
                    font-size: 12px;
                    z-index: 10;
                }
            </style>
            
            <div class="section-title">Selected Demonstrations and Token Contributions</div>
            <div id="demonstration-container">
                {% for demo in demonstration_scores %}
                <div class="demonstration-card">
                    <div class="demonstration-header">Demonstration Score: {{ "%.3f"|format(demo.score) }}</div>
                    <div>
                        <b>Token Contributions:</b>
                        <div class="token-container">
                            {% for token in demo.token_contributions %}
                            <div class="token" style="--score: {{ token.score }}" data-score="{{ token.score }}">
                                {{ token.token }}
                                <span class="tooltip">Score: {{ "%.2f"|format(token.score) }}</span>
                            </div>
                            {% endfor %}
                        </div>
                    </div>
                </div>
                {% endfor %}
            </div>
        </div>
        '''
        template = Template(html_template)
        rendered_html = template.render(
            demonstration_scores=self.demonstration_scores,
        )
        display(HTML(rendered_html))


def main():
    set_seed(42)

    # Configuration
    config = ModelConfig(model_name="gpt2", n_prefix_tokens=5, learning_rate=1e-4)
    concept_learner = ConceptLearner(config)
    demonstration_selector = DemonstrationSelector(concept_learner)

    # Example Demonstrations
    candidates = [
        {"input": "The movie was great.", "output": "positive"},
        {"input": "The book was boring.", "output": "negative"},
        {"input": "The article explained AI clearly.", "output": "informative"},
        {"input": "The novel was thrilling.", "output": "exciting"},
    ]

    # Select Demonstrations
    selected_demos = demonstration_selector.select_demonstrations(candidates, k=2)

    # Mock Token Contributions for each Demonstration
    demonstration_scores = []
    for demo in selected_demos:
        tokens = demo["input"].split()
        token_contributions = [{"token": token, "score": random.uniform(0, 1)} for token in tokens]
        demonstration_scores.append({"score": random.uniform(0.5, 1), "token_contributions": token_contributions})

    # Create and Display Dashboard
    dashboard = DashboardVisualizer(demonstration_scores=demonstration_scores)
    dashboard.create_dashboard()


# Run the main example
main()


In [26]:
demo['input']

'The movie was great.'

In [21]:
token_contributions

[{'token': 'The', 'score': 1.0},
 {'token': 'Ġmovie', 'score': 0.9999999403953552},
 {'token': 'Ġwas', 'score': 1.0},
 {'token': 'Ġgreat', 'score': 1.0},
 {'token': '.', 'score': 1.0}]

In [15]:
# Mock Data for Demonstration Scores with Token Contributions
demonstration_scores = [
    {
        "score": 0.89,
        "token_contributions": [
            {"token": "The", "score": 0.4},
            {"token": "movie", "score": 0.8},
            {"token": "was", "score": 0.5},
            {"token": "great", "score": 0.9},
        ],
    },
    {
        "score": 0.76,
        "token_contributions": [
            {"token": "The", "score": 0.3},
            {"token": "book", "score": 0.7},
            {"token": "was", "score": 0.4},
            {"token": "boring", "score": 0.6},
        ],
    },
]

# Create and Display Dashboard
dashboard = DashboardVisualizer(
    demonstration_scores=demonstration_scores,
)
dashboard.create_dashboard()


In [None]:
# Mock Data for Demonstration Scores with Token Contributions
demonstration_scores = [
    {
        "score": 0.89,
        "token_contributions": [
            {"token": "The", "score": 0.4},
            {"token": "movie", "score": 0.8},
            {"token": "was", "score": 0.5},
            {"token": "great", "score": 0.9},
        ],
    },
    {
        "score": 0.76,
        "token_contributions": [
            {"token": "The", "score": 0.3},
            {"token": "book", "score": 0.7},
            {"token": "was", "score": 0.4},
            {"token": "boring", "score": 0.6},
        ],
    },
]

# Create and Display Dashboard
dashboard = DashboardVisualizer(
    token_contributions_demo1=None,  # No longer needed, integrated into demonstration_scores
    demonstration_scores=demonstration_scores,
)
dashboard.create_dashboard()
