In [None]:
import json
import pandas as pd
import glob as glob

methods = {
    "Baseline": "qwen-2.5-x.5B-instruct-countdown-baseline",
    "OP": "qwen-2.5-x.5B-instruct-sft-lora-countdown-optimal-seq8k-5k",
    "SoS": "qwen-2.5-x.5B-instruct-sft-lora-countdown-search-seq8k-5k", 
    "RSoS": "qwen-2.5-x.5B-instruct-sft-lora-countdown-search-react-correct-seq10k-5k", 
    "Distill": "qwen-2.5-x.5B-instruct-sft-lora-countdown-deepseek-correct-seq8k-5k"
}

tasks = {
    "Countdown": "test_128", 
    "Countdown-3": "countdown_3num_128", 
    "Countdown-5": "countdown_5num_128",
    "KnK": "knk"
}

sizes = ["0.5B", "1.5B"]

def parse_results_from_json(file):
    try:
        with open(file, 'r') as f:
            data = json.load(f)
        # print("File:", file)
        # Extract the relevant information from the JSON data   
        if "knk" in file:
            return data["scores"]["2ppl"]    
        if "countdown" in file or "test" in file:
            # hyperparams = data[0]['hyperparams']
            return data[1]['mean']*100
    except Exception as e:
        print("Error reading file:", file)
        print("Error message:", e)
        return None

# initialize results to store for sizes, method_key, task_key
results = {size: {method_key: {task_key: None for task_key in tasks.keys()} for method_key in methods.keys()} for size in sizes}

for size_val in sizes:
    for method_key, method_val in methods.items():
        for task_key, task_val in tasks.items():
            folder_to_look_for = f"./{method_val}/{task_val}*.json".replace("x.5B", size_val) 
            specific_file = glob.glob(folder_to_look_for)
            if specific_file:
                results[size_val][method_key][task_key] = parse_results_from_json(specific_file[0])
            
# above works, put their filepath in a pandas df
                 
# put results in a pandas df
df = pd.DataFrame.from_dict({(i, j): results[i][j] 
                           for i in results.keys() 
                           for j in results[i].keys()},
                           orient='index')
# Tranpose
df = df.transpose()
# make the df float up to 2 decimals
df = df.round(2)
df.to_latex("results.tex", index=True, float_format="%.2f")
df

Unnamed: 0_level_0,0.5B,0.5B,0.5B,0.5B,0.5B,1.5B,1.5B,1.5B,1.5B,1.5B
Unnamed: 0_level_1,Baseline,OP,SoS,RSoS,Distill,Baseline,OP,SoS,RSoS,Distill
Countdown,0.0,2.34,37.5,0.78,25.78,0.78,5.47,49.22,52.34,44.53
Countdown-3,0.0,1.56,33.59,0.0,42.97,3.12,4.69,57.81,62.5,71.09
Countdown-5,0.0,0.78,0.0,0.0,0.78,0.0,0.78,0.0,0.0,1.56
KnK,1.0,1.5,0.0,0.0,1.5,7.0,2.5,0.0,0.0,12.0


In [10]:
sizes = ["1.5B"]

methods = {
    "No rejection \nsampling": "qwen-2.5-x.5B-instruct-sft-lora-countdown-search-react-seq10k-5k",
    "With rejection \nsampling": "qwen-2.5-x.5B-instruct-sft-lora-countdown-search-react-correct-seq10k-5k"
}

tasks = {
    "Countdown:": "test_128", 
}

results = {size: {method_key: {task_key: None for task_key in tasks.keys()} for method_key in methods.keys()} for size in sizes}
for size_val in sizes:
    for method_key, method_val in methods.items():
        for task_key, task_val in tasks.items():
            folder_to_look_for = f"./{method_val}/{task_val}*.json".replace("x.5B", size_val) 
            specific_file = glob.glob(folder_to_look_for)
            if specific_file:
                results[size_val][method_key][task_key] = parse_results_from_json(specific_file[0])
            
# above works, put their filepath in a pandas df
                
# put results in a pandas df
df = pd.DataFrame.from_dict({(i, j): results[i][j] 
                        for i in results.keys() 
                        for j in results[i].keys()},
                        orient='index')
# Tranpose
df = df.transpose()
# make the df float up to 2 decimals
df = df.round(2)
df.to_latex("results.tex", index=True, float_format="%.2f")
df

Unnamed: 0_level_0,1.5B,1.5B
Unnamed: 0_level_1,No rejection \nsampling,With rejection \nsampling
Countdown:,,52.34


In [19]:
results = {size: {method_key: {task_key: None for task_key in tasks.keys()} for method_key in methods.keys()} for size in training_sizes}
results

{'1k': {'SoS': {'Countdown:': None}, 'Distill': {'Countdown:': None}},
 '5k': {'SoS': {'Countdown:': None}, 'Distill': {'Countdown:': None}}}

In [21]:
training_sizes = ["1k", "5k"]

methods = {
    "SoS": "qwen-2.5-1.5B-instruct-sft-lora-countdown-search",
    "Distill": "qwen-2.5-1.5B-instruct-sft-lora-countdown-deepseek-correct-seq8k"
}

tasks = {
    "Countdown:": "test_128", 
}

results = {size: {method_key: {task_key: None for task_key in tasks.keys()} for method_key in methods.keys()} for size in training_sizes}
for size_val in training_sizes:
    for method_key, method_val in methods.items():
        for task_key, task_val in tasks.items():
            folder_to_look_for = f"./{method_val}*{size_val}/{task_val}*.json"
            specific_file = glob.glob(folder_to_look_for)
            if specific_file:
                results[size_val][method_key][task_key] = parse_results_from_json(specific_file[0])
            
# above works, put their filepath in a pandas df
                
# put results in a pandas df
df = pd.DataFrame.from_dict({(i, j): results[i][j] 
                        for i in results.keys() 
                        for j in results[i].keys()},
                        orient='index')
# Tranpose
df = df.transpose()
# make the df float up to 2 decimals
df = df.round(2)
df.to_latex("1k-vs-5k.tex", index=True, float_format="%.2f")
df

Unnamed: 0_level_0,1k,1k,5k,5k
Unnamed: 0_level_1,SoS,Distill,SoS,Distill
Countdown:,32.81,31.25,52.34,44.53


In [29]:
# load json
with open("/cs/student/msc/ml/2024/ycheah/projects/sos/stream-of-search/results/qwen-2.5-1.5B-instruct-sft-lora-countdown-deepseek-correct-seq8k-5k/test_128_20250406-195241.json", 'r') as f:
    data = json.load(f)
    
i = 2
print(f"Solved: {data[i]['parsed_results']['solved']} \n", data[i]['completion'])

Solved: True 
 system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.
user
Combine these initial numbers [53, 57, 13, 48] using only arithmetic operations (+, -, *, /) to reach the target value 64. All initial numbers must be used exactly once.
Conclude with the final result in EXACTLY this format:
```
SOLUTION: YES/NO
OPERATIONS: list of string of operations performed, each string involving only 1 operation. For example, ['A+B=C','C+D=E'] is allowed, ['A+B+D=E'] is not allowed
RESULT: final_value
```

Note that the solution does exist. Verify your solutions before your present your final results and backtrack to correct mistakes from before your mistakes if you have to.
assistant
Okay, let's see. I need to combine the numbers 53, 57, 13, and 48 using each exactly once with arithmetic operations to get 64. Hmm. Let me think about possible combinations.

First, maybe start by trying some operations. Let's look at the larger numbers first. 57 is close to 64. If we su