In [9]:
import time, sys, os
import concurrent.futures
import numpy as np
import re
from datetime import datetime
from typing import Optional, List, Dict, Any, Tuple
from openai import OpenAI


class OnlineLM:
    """Online language model using API services."""
    
    def __init__(self, model_name: str, **kwargs):
        self.model = model_name  # Store model name as the model identifier
        self.temperature = kwargs.get("temperature", 0.7)  # Default temperature
        self._initialize()
        
    def _initialize(self):
        """Initialize the OpenAI client."""
        if "DEEPINFRA_TOKEN" not in os.environ:
            raise ValueError("DEEPINFRA_TOKEN environment variable is not set")
            
        self.openai = OpenAI(
            api_key=os.environ["DEEPINFRA_TOKEN"],
            base_url="https://api.deepinfra.com/v1/openai",
        )
    
    def _fetch_response(self, message_data):
        """Fetch a response from the API."""
        messages, max_tokens= message_data
        
        try:
            chat_completion = self.openai.chat.completions.create(
                model=self.model,
                messages=messages,
                max_tokens=max_tokens,
                temperature=self.temperature if self.temperature > 0 else 0,
                # top_k=20 if self.temperature>0.0 else None,
                # top_p=0.8 if self.temperature>0.0 else None,                
            )
            content = chat_completion.choices[0].message.content
            
            return content
        except Exception as e:
            return f"Error: {str(e)}", None
    
    def generate(self, 
                input_messages: List[Dict[str, Any]], 
                max_new_tokens: int = 100, 
                repeat_input: bool = False) -> Tuple[List[Any], Any]:
        """Generate text using the API."""
        
        # Prepare batch of requests
        request_data = []
        for messages in input_messages:
            request_data.append((messages, max_new_tokens))
        
        # Process in parallel
        results = []
        with concurrent.futures.ThreadPoolExecutor() as executor:
            api_results = list(executor.map(self._fetch_response, request_data))
            
            for i, content in enumerate(api_results):
                if repeat_input:
                    # Append generation to the original message
                    new_message = input_messages[i].copy()
                    new_message[-1] = new_message[-1].copy()
                    new_message[-1]['content'] += content
                    results.append(new_message)
                    # results.append((new_message, logits))
                else:
                    results.append(content)
                    # results.append((content, logits))
        
        # For API compatibility, return both results and a metadata object
        metadata = {"model": self.model, "online": True}
        time.sleep(0.02)
        return results, metadata

In [12]:
# Example usage
model = OnlineLM("deepseek-ai/DeepSeek-R1")
# model = OnlineLM("deepseek-ai/DeepSeek-R1-Distill-Llama-70B")
from datasets import load_dataset
data = load_dataset("MelinaLaimon/stream-of-search", split="train[:10%]")

In [14]:
data = data.map(lambda x: {
    'messages_deepseek': [
        {
            'role': message['role'],
            'content': message['content'] + " Note that the solution must exist" if message['role'] == "user" else message['content']
        }
        for message in x["messages_sos"] if 'role' in message
    ]
})

Map: 100%|██████████| 1000/1000 [00:00<00:00, 6442.26 examples/s]


In [17]:
data[0]

{'nums': [90, 11, 37, 95],
 'target': 55,
 'solution': ['90+95=185', '11*185=2035', '2035/37=55'],
 'rating': 0.813368055555555,
 'search_type': 'dfs',
 'heuristic': 'sum_heuristic',
 'messages_optimal': [{'content': "Combine these initial numbers [90, 11, 37, 95] using only arithmetic operations (+, -, *, /) to reach the target value 55. All initial numbers must be used exactly once.\nConclude with the final result in EXACTLY this format:\n```\nSOLUTION: YES/NO\nOPERATIONS: list of string of operations performed, each string involving only 1 operation. For example, ['A+B=C','C+D=E'] is allowed, ['A+B+D=E'] is not allowed\nRESULT: final_value\n```\n",
   'role': 'user'},
  {'content': "Current State: 55:[90, 11, 37, 95], Operations: []\nExploring Operation: 90+95=185, Resulting Numbers: [11, 37, 185]\nGenerated Node #2: [11, 37, 185] from Operation: 90+95=185\nCurrent State: 55:[11, 37, 185], Operations: ['90+95=185']\nExploring Operation: 11*185=2035, Resulting Numbers: [37, 2035]\nGe

In [None]:
sys.path.append("/cs/student/msc/ml/2024/ycheah/projects/sos/stream-of-search")
from src.result_parsers.countdown_trajectories import evaluate_countdown_trajectory
from datasets import Dataset, concatenate_datasets
from tqdm import tqdm


for i, data_batch in tqdm(enumerate(data.iter(batch_size=200))):
    data_batch = Dataset.from_dict(data_batch)
    
    results, metadata = model.generate(data_batch['messages_deepseek'], max_new_tokens=8192)
    data_batch = data_batch.add_column("completion", results)
    data_batch = data_batch.add_column("parsed_results", list(map(evaluate_countdown_trajectory, data_batch)))
    
    if i == 0: 
        data_new = data_batch
    else:
        data_new = concatenate_datasets([data_new, data_batch])
    
results = sum([d['solved'] for d in data_new['parsed_results']]) / len(data_new['parsed_results'])


0it [00:00, ?it/s]

In [None]:
import json
old_json_file_path = "data/sos_10k_b4_merged/train1_b4_t100_n10000_w_deepseek.json"
with open(old_json_file_path) as f:
    data_old = json.load(f)
    
for i in range(len(data_old)):
    entry = data_old[i]
    user_prompt = entry["messages_sos"][0]["content"]
    search_path = data_new[i]["completion"]
    entry["messages_deepseek"] = [
                    {"role": "user", "content": user_prompt},
                    {"role": "assistant", "content": search_path}
                ]
    

In [None]:
new_json_file_path = "data/sos_10k_b4_merged/train1_b4_t100_n10000_w_deepseek.json"
with open(new_json_file_path, "w+") as f:
    json.dump(data_old,f,indent=4)

In [None]:
from huggingface_hub import HfApi
api = HfApi()
api.upload_file(
    path_or_fileobj="data/sos_10k_b4_merged/train1_b4_t100_n10000_w_deepseek.json",
    path_in_repo="train1_b4_t100_n10000.json",
    repo_id="MelinaLaimon/stream-of-search",
    repo_type="dataset",
)

train1_b4_t100_n10000_w_deepseek.json: 100%|██████████| 479M/479M [00:26<00:00, 17.8MB/s] 


CommitInfo(commit_url='https://huggingface.co/datasets/MelinaLaimon/stream-of-search/commit/fbea142f66a888ef3c454d4fb0b431f480faafa8', commit_message='Upload train1_b4_t100_n10000.json with huggingface_hub', commit_description='', oid='fbea142f66a888ef3c454d4fb0b431f480faafa8', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MelinaLaimon/stream-of-search', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MelinaLaimon/stream-of-search'), pr_revision=None, pr_num=None)

In [27]:
entry.keys()

dict_keys(['nums', 'target', 'solution', 'search_path', 'rating', 'search_type', 'optimal_path', 'heuristic'])

In [None]:
with open(old_json_file_path) as f:
    json.dump(data_old, )

In [62]:
for i in range(10):
    print(f"Q {i}\n\n")
    print(data['completion'][i])
    print(data['parsed_results'][i])

Q 0


<think>
Okay, so I have this problem where I need to combine the numbers 53, 57, 13, and 48 using only addition, subtraction, multiplication, and division to get the target value of 64. All four numbers must be used exactly once. Hmm, let's see how I can approach this.

First, I should probably list out all the numbers: 53, 57, 13, 48. My goal is to use each of them once with arithmetic operations to reach 64. I remember that sometimes combining numbers in different orders or using operations in a certain way can lead to the desired result. Maybe I should try different combinations step by step.

Let me start by considering if adding any two numbers gives me something useful. For example, 53 + 57 is 110, which is way too big. 57 + 13 is 70, which is also higher than 64. 53 + 13 is 66, which is just a bit over. Hmm, 66 is close to 64, maybe I can subtract something from it. But wait, I have to use all four numbers, so I need to involve 48 as well.

Alternatively, maybe I should th

In [55]:
sum([d['solved'] for d in data['parsed_results']]) / len(data['parsed_results'])

0.8