In [106]:
import importlib
from pipeline import voice_in_head_strategy
importlib.reload(voice_in_head_strategy)
from pipeline.voice_in_head_strategy import VoiceInHeadStrategy

from pipeline.rollout_generator import RolloutGenerator

from pipeline.intervention_inserter import InterventionInserter

strategy = VoiceInHeadStrategy()
inserter = InterventionInserter(strategy)
rollout_generator = RolloutGenerator(max_tokens = 10000)
prompt = "What are some fun things to do in London?"
rollout = "<think> Okay, so I need to come up with some fun things to do in London. Let me think... I know London is a big city with lots of attractions. First, I remember the British Museum is a must-see. It has so many artifacts from around the world. Then there's the Tower of London, which is a historic site with the Crown Jewels. Wait, is that right? Yeah, the Tower of London is famous for the Crown Jewels and the Tower Bridge nearby. Oh, and the London Dungeons is a fun and spooky attraction where you can experience the darker side of London's history with interactive exhibits and spooky tours. It's a great option for those looking for a bit of thrills and chills. Don't forget to check out the London Eye for panoramic views of the city from high above the Thames.</think>"

goal_intervention = "Go for a day trip to Croydon."

intervened_text, suggested_interventions = inserter.apply(rollout=rollout, intervention_text=goal_intervention, prompt=prompt)
continued_output = rollout_generator.generate(intervened_text, 1)[0]
display(continued_output)

# Sentence embeddings

In [14]:
from sentence_transformers import SentenceTransformer
import numpy as np

def compute_similarity(text1: str, text2: str) -> float:
    """
    Compute cosine similarity between two texts using sentence embeddings.
    
    Args:
        text1: First text to compare
        text2: Second text to compare
        
    Returns:
        Cosine similarity score between 0 and 1
    """
    # Initialize the sentence embedding model
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    
    # Get embeddings for both texts
    embeddings = model.encode([text1, text2])
    
    # Compute cosine similarity
    similarity = np.dot(embeddings[0], embeddings[1]) / (
        np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1])
    )
    
    return float(similarity)

# Example usage
text1 = "Use a python script to solve this"
text2 = """squared is 4,000,000. So, the square root of 1,434,234 must be between 1000 and 2000. Let me narrow it down further. Let's try 1200 squared: 1,440,000. Oh, that's pretty close. So, 1200 squared is 1,440,000, which"""
similarity_score = compute_similarity(text1, text2)
print(f"Similarity between texts: {similarity_score:.4f}")


Similarity between texts: 0.1240


In [15]:
text3 = """ squared is 4,000,000. Wait, 1434234 is between those two, so the square root must be between 1000 and 2000. Let me narrow it down more. Maybe I can use a calculator or a more systematic approach. Alternatively, I can use a Python script to compute it accurately. Let me write a simple Python code snippet to calculate the square root of 143423"""
compute_similarity(text1, text3)

0.15942536294460297

In [None]:
# Test the pipeline's _select_best_intervention function
from pipeline.voice_in_head_strategy import VoiceInHeadStrategy

# Reload to get latest version
import importlib
from pipeline import voice_in_head_strategy
importlib.reload(voice_in_head_strategy)
from pipeline.voice_in_head_strategy import VoiceInHeadStrategy

# Create strategy instance
strategy = VoiceInHeadStrategy()

# Test with text1 as target and text2, text3 as candidates
text1 = "Use a python script to solve this"
text2 = """squared is 4,000,000. So, the square root of 1,434,234 must be between 1000 and 2000. Let me narrow it down further. Let's try 1200 squared: 1,440,000. Oh, that's pretty close. So, 1200 squared is 1,440,000, which"""
text3 = """ squared is 4,000,000. Wait, 1434234 is between those two, so the square root must be between 1000 and 2000. Let me narrow it down more. Maybe I can use a calculator or a more systematic approach. Alternatively, I can use a Python script to compute it accurately. Let me write a simple Python code snippet to calculate the square root of 143423"""

candidates = [text2, text3]
selected = strategy._select_best_intervention(text1, candidates)

print("\n" + "="*80)
print("SELECTED INTERVENTION:")
print("="*80)
print(selected[:200] + "...")
