In [1]:
import time, sys, os
import concurrent.futures
import numpy as np
import re
from datetime import datetime
from typing import Optional, List, Dict, Any, Tuple
from openai import OpenAI


class OnlineLM:
    """Online language model using API services."""
    
    def __init__(self, model_name: str, **kwargs):
        self.model = model_name  # Store model name as the model identifier
        self.temperature = kwargs.get("temperature", 0.7)  # Default temperature
        self._initialize()
        
    def _initialize(self):
        """Initialize the OpenAI client."""
        if "DEEPINFRA_TOKEN" not in os.environ:
            raise ValueError("DEEPINFRA_TOKEN environment variable is not set")
            
        self.openai = OpenAI(
            api_key=os.environ["DEEPINFRA_TOKEN"],
            base_url="https://api.deepinfra.com/v1/openai",
        )
    
    def _fetch_response(self, message_data):
        """Fetch a response from the API."""
        messages, max_tokens= message_data
        
        try:
            chat_completion = self.openai.chat.completions.create(
                model=self.model,
                messages=messages,
                max_tokens=max_tokens,
                temperature=self.temperature if self.temperature > 0 else 0,
                # top_k=20 if self.temperature>0.0 else None,
                # top_p=0.8 if self.temperature>0.0 else None,                
            )
            content = chat_completion.choices[0].message.content
            
            return content
        except Exception as e:
            return f"Error: {str(e)}", None
    
    def generate(self, 
                input_messages: List[Dict[str, Any]], 
                max_new_tokens: int = 100, 
                repeat_input: bool = False) -> Tuple[List[Any], Any]:
        """Generate text using the API."""
        
        # Prepare batch of requests
        request_data = []
        for messages in input_messages:
            request_data.append((messages, max_new_tokens))
        
        # Process in parallel
        results = []
        with concurrent.futures.ThreadPoolExecutor() as executor:
            api_results = list(executor.map(self._fetch_response, request_data))
            
            for i, content in enumerate(api_results):
                if repeat_input:
                    # Append generation to the original message
                    new_message = input_messages[i].copy()
                    new_message[-1] = new_message[-1].copy()
                    new_message[-1]['content'] += content
                    results.append(new_message)
                    # results.append((new_message, logits))
                else:
                    results.append(content)
                    # results.append((content, logits))
        
        # For API compatibility, return both results and a metadata object
        metadata = {"model": self.model, "online": True}
        time.sleep(0.02)
        return results, metadata

In [21]:
# Example usage
# model = OnlineLM("deepseek-ai/DeepSeek-R1")
model = OnlineLM("deepseek-ai/DeepSeek-R1-Distill-Llama-70B")
from datasets import load_dataset

data = load_dataset(
    "MelinaLaimon/stream-of-search", 
    revision="7fb82650ab67458027afff115964bcd04d3d2075", 
    split='train').select(range(0, 1000))

data = data.map(lambda x: { # type: ignore
    'test_prompt': [
        # {'role': 'system', 'content': SYSTEM_PROMPT},
        x["messages_sos"]["role"=="user"]
    ],
    # 'answer': extract_hash_answer(x['answer'])
})


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [23]:
data

Dataset({
    features: ['nums', 'target', 'solution', 'rating', 'search_type', 'heuristic', 'messages_optimal', 'messages_sos_react', 'messages_sos', 'messages_deepseek', 'test_prompt'],
    num_rows: 1000
})

In [None]:
sys.path.append("~/projects/sos/stream-of-search") # TODO You may need to change this
from src.result_parsers.countdown_trajectories import evaluate_countdown_trajectory

results, metadata = model.generate(data['test_prompt'], max_new_tokens=4096)

# set data['completions'] to results
data = data.add_column("completion", results)
data = data.add_column("parsed_results", list(map(evaluate_countdown_trajectory, data)))
results = sum([d['solved'] for d in data['parsed_results']]) / len(data['parsed_results'])


In [66]:
for i in range(10):
    print(f"Q {i}\n\n")
    print(data['completion'][i])
    print(data['parsed_results'][i])

Q 0


<think>
Okay, so I have this problem where I need to combine the numbers 53, 57, 13, and 48 using only addition, subtraction, multiplication, or division. The goal is to get 64, and I have to use each number exactly once. Hmm, let me think about how to approach this.

First, I'll list out the numbers: 53, 57, 13, 48. I need to use each one once, so I can't repeat any. I'm a bit overwhelmed because there are so many possible combinations. Maybe I should start by looking for pairs that can be combined to get something useful.

Let me consider the largest numbers first. 57 is the biggest, followed by 53. If I subtract 53 from 57, that gives me 4. That's a small number, but maybe useful later. Alternatively, if I subtract 57 from 53, I get -4, which could also be useful, but I'm not sure yet.

Looking at the smaller numbers, 13 and 48. If I subtract 13 from 48, I get 35. Adding them gives 61, which is close to 64. Wait, 61 plus 3 would be 64, but I don't have a 3 yet. Alternatively, 

In [67]:
sum([d['solved'] for d in data['parsed_results']]) / len(data['parsed_results'])

0.6

In [68]:
data['parsed_results']

[{'initial_numbers': [53, 57, 13, 48],
  'remarks': 'Could not find SOLUTION declaration.',
  'solved': False,
  'target': 64},
 {'initial_numbers': [9, 62, 67, 5],
  'remarks': "Trajectory is valid with operations: ['67-62=5', '9-5=4', '5*4=20']",
  'solved': True,
  'target': 20},
 {'initial_numbers': [87, 80, 47, 2],
  'remarks': "Trajectory is valid with operations: ['47*2=94', '94-80=14', '87-14=73']",
  'solved': True,
  'target': 73},
 {'initial_numbers': [10, 30, 42, 5],
  'remarks': "Trajectory is valid with operations: ['30+5=35', '42-10=32', '35+32=67']",
  'solved': True,
  'target': 67},
 {'initial_numbers': [66, 71, 74, 49],
  'remarks': 'Could not find SOLUTION declaration.',
  'solved': False,
  'target': 77},
 {'initial_numbers': [68, 72, 26, 44],
  'remarks': "Trajectory is valid with operations: ['72-68=4', '44+26=70', '70+4=74']",
  'solved': True,
  'target': 74},
 {'initial_numbers': [58, 82, 92, 13],
  'remarks': "Trajectory is valid with operations: ['92-58=34',

In [16]:
import glob
for file in glob.glob("../results/qwen*/stream-of-search_*.json"):
    # rename to *.json
    os.rename(file, file.replace("stream-of-search_", ""))
