In [6]:
import json
import glob


prefix_name = "no_chunk_11_14"
# Step 1: Create an empty list to hold all JSON objects.
all_data = []

# Step 2: Loop through each JSON file in the directory where your files are stored.
for filename in glob.glob('data/pile_val/'+prefix_name+'*.json'):
    with open(filename, 'r') as file:
        # Read the single line of JSON and parse it.
        json_data = json.loads(file.readline())
        # Append the dictionary to the list.
        all_data.append(json_data)

# Step 3: Write the list of dictionaries to a new JSON file.
# with open('data/pile_val/combined_data'+prefix_name+'.json', 'w') as outfile:
#     json.dump(all_data, outfile)

In [7]:
# Custom order for names
custom_order = ["forwards-70m", "forwards-160m", "forwards-410m", "forwards-1B", "reverse-160m", "stationary_reversal"]

# Mapping from name to its position in custom order
order_mapping = {name: index for index, name in enumerate(custom_order)}


In [8]:
# LM Runs
# LM_runs = [d for d in all_data if d["name"] != "stationary_reversal" and d["prefix_length"]==2047]
LM_runs = all_data

In [9]:
# sort by number after 'forwards-' and before 'm' in name, 
# or after 'reverse' and before 'm in name
# LM_runs.sort(key=lambda x: int(x["name"][x["name"].find('forwards-')+9:x["name"].find('m')]) if 'forwards-' in x["name"] else int(x["name"][x["name"].find('reverse')+7:x["name"].find('m')]))
LM_runs.sort(key=lambda x: order_mapping[x['name']])


KeyError: 'reversal'

In [10]:
# Create and write to csv
import csv
with open('data/pile_val/'+prefix_name+'_models.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    if prefix_name == "redo_drop_3000":
        writer.writerow(["Model", "Loss"])
    elif prefix_name == "no_chunk_11_14":
        writer.writerow(["Model/Method", "Loss"])
    else:
        raise Exception("Something not accounted for.")
    for data_dict in LM_runs:
        mean_string_3_decimal_places = "{:.3f}".format(data_dict["mean"])
        upper_bound = "{:.3f}".format(data_dict["mean"]+1.96*data_dict["std_on_mean"])
        lower_bound = "{:.3f}".format(data_dict["mean"]-1.96*data_dict["std_on_mean"])

        string_with_mean_and_standard_dev = mean_string_3_decimal_places + " (" + lower_bound + ", " + upper_bound + ")"
        if data_dict["name"] == "stationary_reversal":
            if data_dict["reverse_model_prior"]==True:
                name = "Bayesian Reversal(forwards-"+data_dict["model_size"]+", prior=reverse-160m)"
            elif data_dict["dist"]=="../data/distributions/pile_empirical.pt":
                name = "Bayesian Reversal (forwards-"+data_dict["model_size"]+", prior=empirical)"
            elif data_dict["dist"]=="../data/distributions/probs_30.pt":
                name = "Bayesian Reversal (forwards-"+data_dict["model_size"]+", prior=marginals(p(x_0),...,p(x_28)))"
            else:
                raise Exception("Something not accounted for.")
        else:
            name = data_dict["name"].capitalize()
        writer.writerow([name, string_with_mean_and_standard_dev])

In [11]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('data/pile_val/'+prefix_name+'_models.csv')

# Transpose the DataFrame
transposed_df = df.transpose()

# Save the transposed DataFrame to a new CSV file
transposed_df.to_csv('data/pile_val/'+prefix_name+'_transpose.csv', header=False)

In [12]:
LM_runs[-1]

{'name': 'forwards-410m',
 'dataset': 'pile_val',
 'mean': 3.084847347859782,
 'variance': 1.0104325663174263,
 'std_on_mean': 0.031947440521522034,
 'nlosses': 990,
 'dataset_name': 'pile_val',
 'num_examples': 1000,
 'full_data_set_chunk': False,
 'prefix_length': 29,
 'suffix_length': 1,
 'num_buffer': 0,
 'batch_size': 1,
 'device': 'cuda',
 'seed': 5491,
 'filename_prefix': 'no_chunk_11_14',
 'return_all_sequences': False,
 'filter_small_sequences': False,
 'model_size': '410m'}

In [46]:
def print_topk_runs(k, list_of_dicts):
    sorted_list_of_dicts = sorted(list_of_dicts, key=lambda x: x['mean'])
    keys_for_non_reversal = ["name", "mean", "std_on_mean"]
    keys_for_reversal = ["name", "mean", "std_on_mean", "reverse_model_prior", "dist", "dilution"]
    for i in range(0, k):
        key_value_pairs = []
        out_dict = sorted_list_of_dicts[i]
        if out_dict["name"] != "stationary_reversal":
            for key in keys_for_non_reversal: 
                value = out_dict[key]
                if isinstance(value, float):
                    formatted_value = f"{value:.{2}f}"
                else:
                    formatted_value = value                        
                key_value_pairs.append(f"{key}: {formatted_value}")
            print(" | ".join(key_value_pairs))
        else:
            for key in keys_for_reversal: 
                value = out_dict[key]
                if isinstance(value, float):
                    formatted_value = f"{value:.{2}f}"
                else:
                    formatted_value = value                        
                key_value_pairs.append(f"{key}: {formatted_value}")
            print(" | ".join(key_value_pairs))
    return
    

In [47]:
print_topk_runs(len(prefix_10_runs), prefix_10_runs)

name: reverse-160m | mean: 4.36 | std_on_mean: 0.14
name: forwards-410m | mean: 4.46 | std_on_mean: 0.15
name: forwards-160m | mean: 4.59 | std_on_mean: 0.16
name: forwards-70m | mean: 4.88 | std_on_mean: 0.16
name: stationary_reversal | mean: 5.27 | std_on_mean: 0.18 | reverse_model_prior: False | dist: ../data/distributions/probs_10.pt | dilution: 0.00
name: stationary_reversal | mean: 5.33 | std_on_mean: 0.18 | reverse_model_prior: False | dist: ../data/distributions/probs_10.pt | dilution: 0.20
name: stationary_reversal | mean: 5.43 | std_on_mean: 0.18 | reverse_model_prior: False | dist: ../data/distributions/probs_10.pt | dilution: 0.40
name: stationary_reversal | mean: 5.49 | std_on_mean: 0.20 | reverse_model_prior: True | dist: ../data/pile10k_empirical.pt | dilution: 0.60
name: stationary_reversal | mean: 5.51 | std_on_mean: 0.20 | reverse_model_prior: True | dist: ../data/pile10k_empirical.pt | dilution: 0.40
name: stationary_reversal | mean: 5.51 | std_on_mean: 0.20 | revers

In [48]:
prefix_10_runs

[{'name': 'stationary_reversal',
  'dataset': 'pile_val',
  'mean': 5.510788377225399,
  'variance': 4.154577491518643,
  'std_on_mean': 0.2038278070214818,
  'nbatches': 100,
  'num_examples': 100,
  'prefix_length': 10,
  'suffix_length': 1,
  'num_buffer': 0,
  'suffix_batch_size': 1,
  'vocab_batch_size': 6288,
  'device': 'cuda',
  'dist': '../data/pile10k_empirical.pt',
  'dilution': 0.4,
  'reverse_model_prior': True,
  'multiple_priors_start_idx': 0,
  'multiple_priors_end_idx': 0,
  'model_size': '160m'},
 {'name': 'stationary_reversal',
  'dataset': 'pile_val',
  'mean': 5.266462098956108,
  'variance': 3.407668333950254,
  'std_on_mean': 0.18459870893238267,
  'nbatches': 100,
  'num_examples': 100,
  'prefix_length': 10,
  'suffix_length': 1,
  'num_buffer': 0,
  'suffix_batch_size': 1,
  'vocab_batch_size': 6288,
  'device': 'cuda',
  'dist': '../data/distributions/probs_10.pt',
  'dilution': 0.0,
  'reverse_model_prior': False,
  'multiple_priors_start_idx': 0,
  'multipl