In [1]:
import argparse

import os
import random
import numpy as np
import pandas as pd

In [2]:
parser = argparse.ArgumentParser()
 
parser.add_argument('--data_path', type=str, default='/home/gean/Code/nns_performance_prediction/results/fast/test12/', 
                    help='k_best_archs csv location')    
parser.add_argument('--results_path', type=str, default='/home/gean/Code/nns_performance_prediction/results/fast/test12/',
                    help='results dir location')
parser.add_argument('--k', type=int, default=10, 
                    help='number of k best archs to extract accs')

#'+' == 1 or more, '*' == 0 or more, '?' == 0 or 1.
parser.add_argument('--data_subset', type=int, default=[12, 36], nargs='+',
                    help='one of the subsets from nasbench101 with 4, 12, 36, or 108 epochs')
parser.add_argument('--seed', type=int, default=[0, 1, 10, 42, 100, 123, 666, 1000, 1234, 12345], nargs='+', 
                    help='seeds used for all the random procedures') 
parser.add_argument('--train_size', type=int, default=[43, 86, 129, 172, 344, 860], nargs='+',
                    help='training split size')
parser.add_argument('--estimators', type=str, default=['LinearRegression', 'SGDRegressor', 'Lasso', 'BayesianRidge', 
                                                       'KNeighborsRegressor', 'DecisionTreeRegressor', 'SVR', 'MLPRegressor',
                                                       'RandomForestRegressor', 'AdaBoostRegressor', 'GradientBoostingRegressor', 
                                                       'DummyRegressor'], nargs='+',
                    help='list of sklearn estimators used for training') 
parser.add_argument('--verbose', type=int, default=1, 
                    help='control the logging prints. 0 for deactivate and 1 for activate') 

args, unknown = parser.parse_known_args()

In [3]:
def get_dataframe(subset, n):
    df_whole = pd.read_csv(str(args.data_path + str("Subset") + str(subset) + "_N" + str(n) + 
                               "_K" + str(args.k) + "_nasbench_best_archs.csv"))
                           
    return df_whole

In [4]:
def save_results(performance_dict, subset, n): 
    df_results = pd.DataFrame.from_dict(performance_dict)
    df_results.to_csv(str(args.results_path + str("Subset" + str(subset) + "_N" + str(n) + 
                                                  "_K" + str(args.k) + "_nasbench_best_val_test_accs.csv")),
                                                  index=False, float_format='%.6f')
    
    print(str(args.results_path + "Subset" + str(subset) + "_N" + str(n) + "_K" + str(args.k) + "_nasbench_best_val_test_accs.csv -> GENERATED"))

In [None]:
if __name__ == '__main__':  
    print("data_path: ", args.data_path)
    print("results_path: ", args.results_path)
    print("k: ", args.k)
    print("data_subset: ", args.data_subset)
    print("seed: ", args.seed)
    print("train_size: ", args.train_size)
    print("estimators: ", args.estimators)
    print("verbose: ", args.verbose, end="\n\n")
    
    for subset in args.data_subset:
        
        for n in args.train_size:
            df_whole = get_dataframe(subset, n)
            formatted_best_archs_file = {'K': [], 'Model': [], 'Seed': [], 'True_Val_Acc': [], 'True_Test_Acc': []}
            
            for seed in args.seed:
                
                for reg in args.estimators:
                    if args.verbose:
                        print("Subset{}, N{}, Seed{}, {}".format(subset, n, seed, reg))

                    for k in range(1, args.k+1):
                        df = df_whole.loc[(df_whole['Model'] == reg) & (df_whole['Seed'] == seed)]

                        true_val_accs = list(df['True_Val_Acc'][0:k]) #from k best pred_val_accs
                        true_val_accs.sort(reverse=True)
                        true_test_accs = list(df['True_Test_Acc'][0:k]) #from k best pred_val_accs
                        true_test_accs.sort(reverse=True)

                        formatted_best_archs_file['K'].append(k)
                        formatted_best_archs_file['Model'].append(reg)
                        formatted_best_archs_file['Seed'].append(seed)
                        formatted_best_archs_file['True_Val_Acc'].append(true_val_accs[0])
                        formatted_best_archs_file['True_Test_Acc'].append(true_test_accs[0])
                    
            save_results(formatted_best_archs_file, subset, n) 