In [2]:
import argparse
import pandas as pd

In [3]:
parser = argparse.ArgumentParser()
 
parser.add_argument('--data_path', type=str, default='/home/gean/nns_performance_prediction/results/fast/test12/', 
                    help='location of the dataset')    
parser.add_argument('--results_path', type=str, default='/home/gean/nns_performance_prediction/results/fast/test12/', 
                    help='location of the results directory')    

#'+' == 1 or more, '*' == 0 or more, '?' == 0 or 1.
parser.add_argument('--dataset', type=str, default=['cifar10valid', 'cifar100', 'imagenet16_120'], nargs='+', 
                    help='one of the datasets from nasbench201, being cifar10valid, cifar100, or imagenet16_120')
parser.add_argument('--data_subset', type=int, default=[4, 108, 200], 
                    help='one of the subsets from nasbench101 with 4, 12, 36, or 108 epochs')
parser.add_argument('--train_size', type=int, default=[43, 86, 129, 172, 344, 860], 
                    help='[Int, Int...] representing the total number of train samples')
#all except mlp
parser.add_argument('--estimators', type=str, default=['LinearRegression', 'BayesianRidge', 'RandomForestRegressor', 
                                                       'GradientBoostingRegressor', 'DummyRegressor'], nargs='+',
                    help='list of sklearn estimators used for training') 
parser.add_argument('--verbose', type=int, default=1, 
                    help='control the logging prints. 0 for deactivate and 1 for activate') 

args, unknown = parser.parse_known_args() 

In [4]:
def main():    
    df_whole = pd.read_csv(str(args.data_path + 'nasbench201_allDatasets_allEpochs_allNs_metrics_absValues.csv'))
    
    for data in args.dataset:
        for subset in args.data_subset:
            for n in args.train_size:            
                df_run = df_whole.loc[(df_whole['Dataset'] == str(data)) & 
                                      (df_whole['Epoch'] == int(subset)) & 
                                      (df_whole['Train_Size'] == int(n))]
                
                if len(df_run) == 0:
                    continue
                
                else:
                
                    if args.verbose: 
                        print("=========== {}, Subset{}, N{} ===========".format(data, subset, n))

                    mae_nemenyi, mse_nemenyi = {}, {}

                    for reg in args.estimators:
                        reg_row = df_run.loc[(df_run['Model'] == str(reg))]
                        mae_nemenyi[str(reg)] = list(reg_row['MAE'])
                        mse_nemenyi[str(reg)] = list(reg_row['MSE'])

                    df_mae_nemenyi = pd.DataFrame.from_dict(mae_nemenyi)
                    df_mae_nemenyi = df_mae_nemenyi.rename(columns={"LinearRegression": "Linear Regression", 
                    "SGDRegressor": "Stochastic Gradient Descent", "BayesianRidge": "Bayesian Ridge",
                    "KNeighborsRegressor": "K-Nearest Neighbors", "DecisionTreeRegressor": "Decision Tree",
                    "SVR": "Support Vector Machine", "RandomForestRegressor": "Random Forest", 
                    "AdaBoostRegressor": "AdaBoost", "GradientBoostingRegressor": "Gradient Boosting", 
                    "DummyRegressor": "Dummy"})
                
                    df_mse_nemenyi = pd.DataFrame.from_dict(mse_nemenyi)
                    df_mse_nemenyi = df_mse_nemenyi.rename(columns={"LinearRegression": "Linear Regression", 
                    "SGDRegressor": "Stochastic Gradient Descent", "BayesianRidge": "Bayesian Ridge",
                    "KNeighborsRegressor": "K-Nearest Neighbors", "DecisionTreeRegressor": "Decision Tree",
                    "SVR": "Support Vector Machine", "RandomForestRegressor": "Random Forest", 
                    "AdaBoostRegressor": "AdaBoost", "GradientBoostingRegressor": "Gradient Boosting", 
                    "DummyRegressor": "Dummy"})
                    
                    path = str(args.results_path + 'nasbench201_' + str(data) + '_' + str(subset) + 'epochs_n' + str(n))                                           
                    df_mae_nemenyi.to_csv(str(path + '_mae_nemenyi.csv'), index=False, float_format='%.6f')
                    df_mse_nemenyi.to_csv(str(path + '_mse_nemenyi.csv'), index=False, float_format='%.6f')

In [5]:
if __name__ == '__main__':
    print("data_path:", args.data_path)
    print("results_path:", args.results_path)
    print("dataset:", args.dataset)
    print("data_subset:", args.data_subset)
    print("train_size:", args.train_size)
    print("estimators:", args.estimators)
    print("verbose:", args.verbose)
    
    main()

data_path: /home/gean/nns_performance_prediction/results/fast/test12/
results_path: /home/gean/nns_performance_prediction/results/fast/test12/
dataset: ['cifar10valid', 'cifar100', 'imagenet16_120']
data_subset: [4, 108, 200]
train_size: [43, 86, 129, 172, 344, 860]
estimators: ['LinearRegression', 'BayesianRidge', 'RandomForestRegressor', 'GradientBoostingRegressor', 'DummyRegressor']
verbose: 1
