In [31]:
import argparse

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

import numpy as np
import pandas as pd

In [37]:
parser = argparse.ArgumentParser()
 
parser.add_argument('--preds_path', type=str, default='/home/gean/Code/nns_performance_prediction/results/fast/test20/', 
                    help='dataset location')    
parser.add_argument('--results_path', type=str, default='/home/gean/Code/nns_performance_prediction/results/fast/test20/',
                    help='results dir location')
parser.add_argument('--k', type=int, default=1000, 
                    help='number of k best archs to extract accs')

#'+' == 1 or more, '*' == 0 or more, '?' == 0 or 1.
parser.add_argument('--data_subset', type=int, default=[36, 108], 
                    help='one of the subsets from nasbench101 with 4, 12, 36, or 108 epochs')
parser.add_argument('--seed', type=int, default=[0, 42], nargs='+', 
                    help='seeds used for all the random procedures') 
parser.add_argument('--train_size', type=int, default=[6880], 
                    help='training split size')
parser.add_argument('--estimators', type=str, default=['LinearRegression', 'SGDRegressor', 'Lasso', 'BayesianRidge', 
                                                       'KNeighborsRegressor', 'DecisionTreeRegressor', 'SVR', 'MLPRegressor',
                                                       'RandomForestRegressor', 'AdaBoostRegressor', 'GradientBoostingRegressor', 
                                                       'DummyRegressor'], nargs='+',
                    help='list of sklearn estimators used for training') 
parser.add_argument('--verbose', type=int, default=1, 
                    help='control the logging prints. 0 for deactivate and 1 for activate') 

args, unknown = parser.parse_known_args()

In [33]:
def save_results(performance_dict, subset, n): 
    df_results = pd.DataFrame.from_dict(performance_dict)
    df_results.to_csv(str(args.results_path + "Subset" + str(subset) + "_N" + str(n) + "_K" + str(args.k) + '_nasbench_best_archs.csv'),
                      index=False, float_format='%.6f')

In [34]:
def get_dataframe(data_path, file_name):
    df_whole = pd.read_csv(str(data_path + file_name + ".csv"))
    
    return df_whole

In [35]:
def get_k_best_archs(dataframe, k_best_archs, seed, subset, reg):
    
    setup = str("Seed" + str(seed) + "_Subset" + str(subset) + "_N" + str(n))
    reg_pred_col = str(setup + "_" + reg + "_Pred_Val_Acc")
    
    df = dataframe.copy()
            
    df = df.sort_values(by=[reg_pred_col], ascending=False)
    rows_best_preds = df.iloc[:args.k]
    
    pred_val_accs = list(rows_best_preds[reg_pred_col])
    true_val_accs = list(rows_best_preds[str(setup + "_True_Val_Acc")])
    true_test_accs = list(rows_best_preds[str(setup + "_True_Test_Acc")])
            
    k_best_archs['Seed'].extend([seed] * args.k)
    k_best_archs['Model'].extend([reg] * args.k)
    k_best_archs['K'].extend(list(range(1, args.k+1)))
    k_best_archs['Pred_Val_Acc'].extend(pred_val_accs)
    k_best_archs['True_Val_Acc'].extend(true_val_accs)
    k_best_archs['True_Test_Acc'].extend(true_test_accs)
    
    return k_best_archs

In [39]:
if __name__ == '__main__':
    print("preds_path: ", args.preds_path)
    print("results_path: ", args.results_path)
    print("k: ", args.k)
    print("data_subset: ", args.data_subset)
    print("seed: ", args.seed)
    print("train_size: ", args.train_size)
    print("estimators: ", args.estimators)
    print("verbose: ", args.verbose)
    
    k_best_archs = {'Seed': [], 'Model': [], 'K': [], 'Pred_Val_Acc': [], 'True_Val_Acc': [], 'True_Test_Acc': []}
    
    for n in args.train_size:
        df_preds = get_dataframe(args.preds_path, str("N" + str(n) + "_nasbench_predictions"))
        
        for subset in args.data_subset:
            
            for seed in args.seed:
                if (args.verbose):
                    print("\n\n\n########### N{}, Seed{}, Subset{} ###########".format(n, seed, subset))
                                
                for reg in args.estimators:
                    if (args.verbose):
                        print(reg)

                    k_best_archs = get_k_best_archs(df_preds, k_best_archs, seed, subset, reg)                
        
            #one for each 'subset' and 'n'
            save_results(k_best_archs, subset, n)
            k_best_archs = {'Seed': [], 'Model': [], 'K': [], 'Pred_Val_Acc': [], 'True_Val_Acc': [], 'True_Test_Acc': []}
            
            if (args.verbose):
                print("\n\n\n########## Final csv for Subset{}, N{} GENERATED #########\n".format(subset, n))

preds_path:  /home/gean/Code/nns_performance_prediction/results/fast/test20/
results_path:  /home/gean/Code/nns_performance_prediction/results/fast/test20/
k:  1000
data_subset:  [36, 108]
seed:  [0, 42]
train_size:  [6880]
estimators:  ['LinearRegression', 'SGDRegressor', 'Lasso', 'BayesianRidge', 'KNeighborsRegressor', 'DecisionTreeRegressor', 'SVR', 'MLPRegressor', 'RandomForestRegressor', 'AdaBoostRegressor', 'GradientBoostingRegressor', 'DummyRegressor']
verbose:  1



########### N6880, Seed0, Subset36 ###########
LinearRegression
SGDRegressor
Lasso
BayesianRidge
KNeighborsRegressor
DecisionTreeRegressor
SVR
MLPRegressor
RandomForestRegressor
AdaBoostRegressor
GradientBoostingRegressor
DummyRegressor



########### N6880, Seed42, Subset36 ###########
LinearRegression
SGDRegressor
Lasso
BayesianRidge
KNeighborsRegressor
DecisionTreeRegressor
SVR
MLPRegressor
RandomForestRegressor
AdaBoostRegressor
GradientBoostingRegressor
DummyRegressor



########## Final csv for Subset36, N6880