In [None]:
import json
import os

import numpy as np

res_dir = "/home/nee7ne/EfficientCoT/results_final "
methods = ['coconut', 'codi', 'icot_si', 'pause', 'softcot', 'effi_cot']
datasets = ["coin_flip", "commonsense_qa", "gsm8k", "multiarith", "svamp"]
models = ["small", "mistral"]
res = {}

for mo in models:
    res[mo] = {}
    for me in methods:
        res[mo][me] = {}
        for d in datasets:
            res[mo][me][d] = {}
            file = f"{res_dir}/{me}/{mo}/{d}/evaluation_results.jsonl"
            if me == "effi_cot":
                file = f"{res_dir}/{me}/vanilla/{mo}/{d}/evaluation_results.jsonl"
            if not os.path.exists(file):
                continue
            with open(file) as f:
                lines = f.readlines()
                lines = [json.loads(l) for l in lines[:15] if l != "\n"]
            summary = {}
            for l in lines:
                if l['eval_temp'] not in summary:
                    summary[l['eval_temp']] = {'acc': [], 'time': []}
                summary[l['eval_temp']]['acc'].append(l['numerical_accuracy'] * 100)
                summary[l['eval_temp']]['time'].append(l['ave_sample_time'])
            best_temp, max_acc = 0, -1
            for temp in summary:
                if np.mean(summary[temp]['acc']) > max_acc:
                    best_temp = temp
                    max_acc = np.mean(summary[temp]['acc'])
            res[mo][me][d] = {'acc': summary[best_temp]['acc'], 'time': summary[best_temp]['time'], "temp":best_temp}


In [2]:
for mo in models:
    for d in sorted(datasets):
        acc_line = ""
        time_line = ""
        m = ""
        for me in methods:
            m += me + "\t"
            cur_acc = "N/A\t"
            cur_time = "N/A\t"
            if len(res[mo][me][d]) > 0:
                cur_acc = f"{np.mean(res[mo][me][d]['acc']):.2f} \scriptsize"+"{± "+f"{np.std(res[mo][me][d]['acc']):.2f}"+"}\t"
                cur_time = f"{np.mean(res[mo][me][d]['time']):.2f} \scriptsize"+"{± "+f"{np.std(res[mo][me][d]['time']):.2f}"+"}\t"
            acc_line += cur_acc
            time_line += cur_time
        print(acc_line)
        print(time_line)

32.67 \scriptsize{± 1.93}	66.33 \scriptsize{± 11.05}	25.33 \scriptsize{± 0.85}	50.50 \scriptsize{± 0.00}	73.17 \scriptsize{± 1.25}	90.00 \scriptsize{± 9.91}	
1.99 \scriptsize{± 0.12}	4.08 \scriptsize{± 0.12}	2.22 \scriptsize{± 0.01}	3.14 \scriptsize{± 0.00}	1.31 \scriptsize{± 0.00}	1.32 \scriptsize{± 0.00}	
13.00 \scriptsize{± 0.41}	74.50 \scriptsize{± 12.29}	20.00 \scriptsize{± 1.41}	50.00 \scriptsize{± 0.00}	63.50 \scriptsize{± 0.41}	76.83 \scriptsize{± 10.02}	
1.62 \scriptsize{± 0.01}	3.89 \scriptsize{± 0.01}	2.24 \scriptsize{± 0.00}	2.67 \scriptsize{± 0.01}	1.27 \scriptsize{± 0.07}	1.26 \scriptsize{± 0.02}	
6.50 \scriptsize{± 1.22}	8.17 \scriptsize{± 5.07}	9.83 \scriptsize{± 0.24}	4.67 \scriptsize{± 0.62}	2.17 \scriptsize{± 0.47}	11.00 \scriptsize{± 6.72}	
1.58 \scriptsize{± 0.01}	2.82 \scriptsize{± 1.61}	1.93 \scriptsize{± 0.23}	3.40 \scriptsize{± 0.03}	1.35 \scriptsize{± 0.01}	1.31 \scriptsize{± 0.00}	
2.23 \scriptsize{± 0.45}	8.17 \scriptsize{± 7.34}	2.80 \scriptsize{± 0.90}	0.5

In [9]:
lines = """
{"numerical_accuracy": 0.43, "close_match_rate": 0.45, "mean_relative_error": 0.8347282617080535, "median_relative_error": 0.04339995352079944, "exp_num": 0, "dataset": "svamp", "eval_temp": 0.1, "ave_sample_time": 1.3277779865264892, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.42, "close_match_rate": 0.435, "mean_relative_error": 1.133639945357705, "median_relative_error": 0.0857843137254902, "exp_num": 0, "dataset": "svamp", "eval_temp": 0.3, "ave_sample_time": 1.3098631191253662, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.415, "close_match_rate": 0.435, "mean_relative_error": 0.7132307012366528, "median_relative_error": 0.08333333333333334, "exp_num": 0, "dataset": "svamp", "eval_temp": 0.5, "ave_sample_time": 1.2989231753349304, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.405, "close_match_rate": 0.44, "mean_relative_error": 1.5853724209651472, "median_relative_error": 0.07275132275132275, "exp_num": 0, "dataset": "svamp", "eval_temp": 0.7, "ave_sample_time": 1.2883724343776704, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.385, "close_match_rate": 0.405, "mean_relative_error": 0.78657116327356, "median_relative_error": 0.07671568627450981, "exp_num": 0, "dataset": "svamp", "eval_temp": 0.9, "ave_sample_time": 1.294522762298584, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.415, "close_match_rate": 0.425, "mean_relative_error": 0.6827791824832995, "median_relative_error": 0.0784313725490196, "exp_num": 1, "dataset": "svamp", "eval_temp": 0.1, "ave_sample_time": 1.3032299590110779, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.41, "close_match_rate": 0.42, "mean_relative_error": 0.623756740435207, "median_relative_error": 0.0784313725490196, "exp_num": 1, "dataset": "svamp", "eval_temp": 0.3, "ave_sample_time": 1.296975346803665, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.42, "close_match_rate": 0.44, "mean_relative_error": 0.5329078921214886, "median_relative_error": 0.05823249447848944, "exp_num": 1, "dataset": "svamp", "eval_temp": 0.5, "ave_sample_time": 1.2840074217319488, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.395, "close_match_rate": 0.41, "mean_relative_error": 0.7726723976203438, "median_relative_error": 0.12434750186428038, "exp_num": 1, "dataset": "svamp", "eval_temp": 0.7, "ave_sample_time": 1.2730780780315398, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.395, "close_match_rate": 0.405, "mean_relative_error": 1.032121964643419, "median_relative_error": 0.10743801652892562, "exp_num": 1, "dataset": "svamp", "eval_temp": 0.9, "ave_sample_time": 1.2838754749298096, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.465, "close_match_rate": 0.48, "mean_relative_error": 0.60495249288651, "median_relative_error": 0.030940009447331128, "exp_num": 2, "dataset": "svamp", "eval_temp": 0.1, "ave_sample_time": 1.2835328423976897, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.455, "close_match_rate": 0.47, "mean_relative_error": 0.5336702361338369, "median_relative_error": 0.038098693759071114, "exp_num": 2, "dataset": "svamp", "eval_temp": 0.3, "ave_sample_time": 1.2913209044933318, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.44, "close_match_rate": 0.455, "mean_relative_error": 0.6662466363779771, "median_relative_error": 0.040883408945420134, "exp_num": 2, "dataset": "svamp", "eval_temp": 0.5, "ave_sample_time": 1.2849030780792237, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.42, "close_match_rate": 0.435, "mean_relative_error": 0.763867277281309, "median_relative_error": 0.08088235294117646, "exp_num": 2, "dataset": "svamp", "eval_temp": 0.7, "ave_sample_time": 1.2854688930511475, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
{"numerical_accuracy": 0.44, "close_match_rate": 0.46, "mean_relative_error": 0.5904118060251882, "median_relative_error": 0.038461538461538464, "exp_num": 2, "dataset": "svamp", "eval_temp": 0.9, "ave_sample_time": 1.2886021506786347, "student": "princeton-nlp/Sheared-LLaMA-1.3B", "teacher": "meta-llama/Llama-2-7b-chat-hf", "st_linear_lr": 0.0001, "st_linear_wd": 0.001, "st_linear_epochs": 5, "st_llm_lr": 1e-07, "st_llm_wd": 1e-05, "st_llm_epochs": 2, "cg_linear_lr": 0.0001, "cg_linear_wd": 0.001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 1e-05, "cg_llm_epochs": 2, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}
"""
lines = lines.split("\n")[1:-1]
idx = [i for i, l in enumerate(lines) if l == "\n"]
if len(idx) == 0:
    idx = 0
elif idx[-1] == len(lines) - 1:
    idx = 0
else:
    idx = idx[-1] + 1
lines = [json.loads(l) for l in lines[idx:] if l != "\n"]
summary = {}
for l in lines:
    if l['eval_temp'] not in summary:
        summary[l['eval_temp']] = {'acc': [], 'time': []}
    summary[l['eval_temp']]['acc'].append(l['numerical_accuracy'] * 100)
    summary[l['eval_temp']]['time'].append(l['ave_sample_time'])
best_temp, max_acc = 0, -1
for temp in summary:
    if np.mean(summary[temp]['acc']) > max_acc:
        best_temp = temp
        max_acc = np.mean(summary[temp]['acc'])
        
print(f"{np.mean(summary[best_temp]['acc']):.2f} ± {np.std(summary[best_temp]['acc']):.2f}\t")
print(f"{np.mean(summary[best_temp]['time']):.2f} ± {np.std(summary[best_temp]['time']):.2f}\t")
print(best_temp)

43.67 ± 2.09	
1.30 ± 0.02	
0.1


In [7]:
import json
res_path = "/home/nee7ne/EfficientCoT/results/effi_cot/vanilla/mistral/multiarith/evaluation_results.jsonl"
cur_res = {}
with open(res_path, "r") as f:
    lines = f.readlines()
    idx = 0
    while idx < len(lines):
        if lines[idx].strip() == "":
            idx +=1
            continue
        elif json.loads(lines[idx])["exp_num"] != 0:
            idx += 1
            continue
        acc = []
        for temp in [0.1, 0.3, 0.5, 0.7, 0.9]:
            cur_line = json.loads(lines[idx])
            if cur_line['eval_temp'] != temp:
                break
            acc.append(cur_line['numerical_accuracy'])
            idx += 1
        cur_params = (cur_line['st_linear_lr'], cur_line['st_linear_wd'], cur_line['st_linear_epochs'], cur_line['st_llm_lr'], cur_line['st_llm_wd'], cur_line['st_llm_epochs'], cur_line['cg_linear_lr'], cur_line['cg_linear_wd'], cur_line['cg_linear_epochs'], cur_line['cg_llm_lr'], cur_line['cg_llm_wd'], cur_line['cg_llm_epochs'])
        cur_res[cur_params] = max(acc)

In [8]:
sorted([(k,v) for k, v in cur_res.items() if v >= 0.185], key=lambda x: x[1])

[((0.0001, 0.001, 3, 1e-05, 0.001, 2, 0.0001, 0.01, 3, 1e-05, 0.001, 1),
  0.189),
 ((0.0001, 0.01, 1, 1e-07, 0.001, 2, 0.01, 0.0001, 5, 1e-05, 0.001, 1), 0.189),
 ((0.0001, 0.01, 1, 1e-07, 0.001, 2, 0.01, 0.01, 5, 1e-07, 0.001, 1), 0.189),
 ((0.0001, 0.01, 1, 1e-07, 0.001, 2, 0.01, 0.0001, 5, 1e-07, 0.001, 2), 0.189),
 ((0.01, 0.001, 1, 1e-05, 1e-05, 1, 0.0001, 0.001, 1, 1e-07, 0.001, 2), 0.189),
 ((0.0001, 0.01, 1, 1e-07, 0.001, 2, 0.01, 0.01, 3, 1e-07, 1e-05, 1), 0.2),
 ((0.001, 0.0001, 3, 1e-07, 1e-05, 1, 0.01, 0.001, 5, 1e-05, 0.001, 1), 0.2),
 ((0.0001, 0.01, 1, 1e-07, 0.001, 2, 0.001, 0.0001, 1, 1e-05, 0.001, 2),
  0.217)]

In [7]:
cur_line ={"numerical_accuracy": 0.211, "close_match_rate": 0.211, "mean_relative_error": 0.4133522429329931, "median_relative_error": 0.2, "exp_num": 0, "dataset": "multiarith", "eval_temp": 0.9, "ave_sample_time": 1.3804727421866523, "student": "optimum/mistral-1.1b-testing", "teacher": "mistralai/Mistral-7B-Instruct-v0.2", "st_linear_lr": 0.0001, "st_linear_wd": 0.01, "st_linear_epochs": 1, "st_llm_lr": 1e-07, "st_llm_wd": 0.001, "st_llm_epochs": 2, "cg_linear_lr": 0.001, "cg_linear_wd": 0.0001, "cg_linear_epochs": 5, "cg_llm_lr": 1e-07, "cg_llm_wd": 0.001, "cg_llm_epochs": 1, "train_max_contemp_tokens": 5, "eval_max_contemp_tokens": 1}

(cur_line['st_linear_lr'], cur_line['st_linear_wd'], cur_line['st_linear_epochs'], cur_line['st_llm_lr'], cur_line['st_llm_wd'], cur_line['st_llm_epochs'], cur_line['cg_linear_lr'], cur_line['cg_linear_wd'], cur_line['cg_linear_epochs'], cur_line['cg_llm_lr'], cur_line['cg_llm_wd'], cur_line['cg_llm_epochs'])

(0.0001, 0.01, 1, 1e-07, 0.001, 2, 0.001, 0.0001, 5, 1e-07, 0.001, 1)

In [8]:
print(f"python main.py --config {'small' if 'llama' in cur_line['teacher'] else 'mistral'} --mode effi_cot --dataset {cur_line['dataset']} --device 3 --variation vanilla -stllr {cur_line['st_linear_lr']} -stlwd {cur_line['st_linear_wd']} -stle {cur_line['st_linear_epochs']} -stllmlr {cur_line['st_llm_lr']} -stllmwd {cur_line['st_llm_wd']} -stllme {cur_line['st_llm_epochs']} -cgllr {cur_line['cg_linear_lr']} -cglwd {cur_line['cg_linear_wd']} -cgle {cur_line['cg_linear_epochs']} -cgllmlr {cur_line['cg_llm_lr']} -cgllmwd {cur_line['cg_llm_wd']} -cgllme {cur_line['cg_llm_epochs']}")

python main.py --config mistral --mode effi_cot --dataset multiarith --device 3 --variation vanilla -stllr 0.0001 -stlwd 0.01 -stle 1 -stllmlr 1e-07 -stllmwd 0.001 -stllme 2 -cgllr 0.001 -cglwd 0.0001 -cgle 5 -cgllmlr 1e-07 -cgllmwd 0.001 -cgllme 1


In [None]:
python main.py --config mistral --mode effi_cot --dataset multiarith --device 1 --variation vanilla -stllr 0.0001 -stlwd 0.01 -stle 1 -stllmlr 1e-07 -stllmwd 0.001 -stllme 2 -cgllr 0.001 -cglwd 0.0001 -cgle 5 -cgllmlr 1e-07 -cgllmwd 0.001 -cgllme 1


SyntaxError: invalid syntax (1663213852.py, line 1)

In [None]:
python main.py --config mistral --mode effi_cot --dataset multiarith --device 2 --variation vanilla -stllr 0.0001 -stlwd 0.001 -stle 3 -stllmlr 1e-05 -stllmwd 0.001 -stllme 2 -cgllr 0.0001 -cglwd 0.01 -cgle 3 -cgllmlr 1e-05 -cgllmwd 0.001 -cgllme 1 && python main.py --config mistral --mode effi_cot --dataset multiarith --device 2 --variation vanilla -stllr 0.0001 -stlwd 0.01 -stle 1 -stllmlr 1e-07 -stllmwd 0.001 -stllme 2 -cgllr 0.01 -cglwd 0.0001 -cgle 5 -cgllmlr 1e-05 -cgllmwd 0.001 -cgllme 1 && python main.py --config mistral --mode effi_cot --dataset multiarith --device 2 --variation vanilla -stllr 0.0001 -stlwd 0.01 -stle 1 -stllmlr 1e-07 -stllmwd 0.001 -stllme 2 -cgllr 0.01 -cglwd 0.0001 -cgle 5 -cgllmlr 1e-07 -cgllmwd 0.001 -cgllme 2 && python main.py --config mistral --mode effi_cot --dataset multiarith --device 2 --variation vanilla -stllr 0.0001 -stlwd 0.01 -stle 1 -stllmlr 1e-07 -stllmwd 0.001 -stllme 2 -cgllr 0.01 -cglwd 0.01 -cgle 5 -cgllmlr 1e-07 -cgllmwd 0.001 -cgllme 1

In [9]:
best_temps = {model: {method: {d:res[model][method][d]["temp"] for d in res[model][method]} for method in res[model]} for model in res}

In [2]:
import shutil
import os 
root = "/data/nee7ne/effi_cot2/saved_models/coconut"
for model in os.listdir(root):
    for dataset in os.listdir(f"{root}/{model}"):
        path = f"{root}/{model}/{dataset}/"
        while "model" not in os.listdir(path)[0]:
            path += os.listdir(path)[0] + "/"
        shutil.move(f"{path}/{os.listdir(path)[0]}", f"{root}/{model}/{dataset}/{os.listdir(path)[0]}")
        shutil.rmtree(f"{root}/{model}/{dataset}/coconut")

In [4]:
for model in os.listdir(root):
    for dataset in os.listdir(f"{root}/{model}"):
        if os.path.exists(f"{root}/{model}/{dataset}/softcot"):
            shutil.rmtree(f"{root}/{model}/{dataset}/softcot")