In [14]:
min_models = 3
select_models = False
exp = 0

In [16]:
import argparse
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from joblib import Parallel, delayed
import copy
from utils import *
from experiment import *
from definitions import *

#python run_experiment.py --experiment 'simple' --weaklabels 
#python run_experiment.py --experiment 'simple' --no-weaklabels

parser = argparse.ArgumentParser(description='')
parser.add_argument('--experiment', help="A path to a sqlite db file for caching model responses. `None` if not caching.", type=int)
parser.add_argument('--min_models', type=int)
parser.add_argument('--select_models', action='store_true')
parser.set_defaults(select_models=False)
args = parser.parse_args()
exp = args.experiment
min_models = args.min_models

assert exp in [0,1,2] #
assert min_models in [2,3]

def get_results(test_families, benchs_names):
    data = pd.read_csv('data/data_v2.csv')
    if select_models:
        data = data.loc[[f not in families_to_delete for f in np.array(data['Family2'])]]
        
    data['Family'] = data['Family2']
    data, unique_families, avail_families = prep_data(data, benchs_names, min_models)

    X_train, X2_train, F_train, D_train, Y_train, X_test, X2_test, F_test, D_test, Y_test, Instruct_test = prep_data2(data, test_families, benchs_names, n_train_models=n_train_models)
    Inter_train = np.ones((X_train.shape[0],1))
    Inter_test = np.ones((X_test.shape[0],1))
    
    Cs = []
    for s in benchs_names:
        Cs.append(lower_bounds[s])
    Cs = np.array(Cs).astype(float)[None,:]

    if n_train_models==2:
        F_train = F_train*D_train
        F_test = F_test*D_test

    return run_exp(X_train, Inter_train, F_train, D_train, Y_train, X_test, Inter_test, F_test, D_test, Y_test, Cs), Instruct_test, test_families


if __name__=="__main__":
    n_train_models = min_models-1
    benchs_names = benchs_names_list[exp]
    
    if n_train_models==1:
        test_families_list = test_families[1]
    elif n_train_models==2:
        test_families_list = test_families[2]
    else:
        raise(NotImplementedError)
    
    if select_models:
        thresh_MMLU = .35
        thresh_MMLU_PRO = .15
        data = pd.read_csv('data/data_v2.csv')
        data = data.sort_values(by=['Family','#Params (B)'])
        biggest_model_data = data.drop_duplicates(subset=['Family'], keep='last')
        families_to_delete = np.unique(biggest_model_data.loc[(biggest_model_data.loc[:,'MMLU']<thresh_MMLU) | (biggest_model_data.loc[:,'MMLU-PRO']<thresh_MMLU_PRO)].Family).tolist()
        families_to_delete = np.unique(data.loc[[f in families_to_delete for f in data.Family]].Family2).tolist()
        test_families_list = [[x for x in y if not np.sum([z in families_to_delete for z in x])>0] for y in test_families_list]
        print('families_to_delete:',families_to_delete)
        
    errors = Parallel(n_jobs=-1, verbose=True)(delayed(get_results)(test_families, benchs_names) for test_families in test_families_list[exp])
    
    np.save(f'results/errors_exp-{exp}_n-train-models-{n_train_models}_select-models-{select_models}.npy', {'out':errors})

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of   9 | elapsed: 30.6min remaining: 15.3min
[Parallel(n_jobs=-1)]: Done   9 out of   9 | elapsed: 32.7min finished
