In [19]:
import argparse

import numpy as np
import pandas as pd

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [20]:
parser = argparse.ArgumentParser()
 
parser.add_argument('--data_path', type=str, default='/home/gean/nns_performance_prediction/results/fast/test12/', 
                    help='location of the dataset')    
parser.add_argument('--results_path', type=str, default='/home/gean/nns_performance_prediction/results/fast/test12/', 
                    help='location of the results directory')    

#'+' == 1 or more, '*' == 0 or more, '?' == 0 or 1.
parser.add_argument('--dataset', type=str, default=['cifar10valid', 'cifar100', 'imagenet16_120'], nargs='+', 
                    help='one of the datasets from nasbench201, being cifar10valid, cifar100, or imagenet16_120')
parser.add_argument('--data_subset', type=int, default=[4, 108, 200], 
                    help='one of the subsets from nasbench101 with 4, 12, 36, or 108 epochs')
parser.add_argument('--train_size', type=int, default=[43, 86, 129, 172, 344, 860], 
                    help='[Int, Int...] representing the total number of train samples')
parser.add_argument('--seed', type=int, default=[0, 1, 10, 42, 100, 123, 666, 1000, 1234, 12345], nargs='+', 
                    help='seeds used for all the random procedures')
#all except mlp
parser.add_argument('--estimators', type=str, default=['LinearRegression', 'SGDRegressor', 'Lasso', 
                                                       'BayesianRidge', 'KNeighborsRegressor', 'DecisionTreeRegressor', 
                                                       'SVR','RandomForestRegressor', 'AdaBoostRegressor', 
                                                       'GradientBoostingRegressor', 'DummyRegressor'], nargs='+',
                    help='list of sklearn estimators used for training') 

parser.add_argument('--verbose', type=int, default=1, 
                    help='control the logging prints. 0 for deactivate and 1 for activate') 

args, unknown = parser.parse_known_args() 

In [21]:
def save_results(performance_dict, file_name): 
    df_results = pd.DataFrame.from_dict(performance_dict)
    df_results.to_csv(str(args.results_path + file_name), index=False, float_format='%.6f')

In [22]:
def main():    
    abs_values = {'Dataset': [], 'Epoch': [], 'Train_Size': [], 'Seed': [], 'Model': [], 'MAE': [], 'MSE': [], 'R2': []}
    means_stds = {'Dataset': [], 'Epoch': [], 'Train_Size': [], 'Model': [], 'MAE_mean': [], 'MAE_std': [], 'MSE_mean': [], 
                  'MSE_std': [], 'R2_mean': [], 'R2_std': []}
    
    for data in args.dataset:
        for subset in args.data_subset:
            for n in args.train_size:
                try: 
                    df_whole = pd.read_csv(str(args.data_path + 'nasbench201_' + str(data) + '_' + str(subset) + 
                                               'epochs_n' + str(n) + '_predictions.csv'))
                
                except FileNotFoundError as e:
                    print(e)
                    continue
                
                if args.verbose: 
                    print("\n\n\n=========================================================")
                    print("=========== {}, Subset{}, N{} ===========".format(data, subset, n))
                    print("=========================================================")
                
                for reg in args.estimators:
                    maes, mses, r2s = [], [], []
                    
                    for seed in args.seed:
                        reg_seed_rows = df_whole.loc[(df_whole['model'] == str(reg)) & (df_whole['seed'] == int(seed))]
                        y_pred = reg_seed_rows['acc_valid_pred'] / 100 #scale to match nasbench101 standards
                        y_true = reg_seed_rows['acc_valid_true'] / 100 #scale to match nasbench101 standards

                        mae = mean_absolute_error(y_true, y_pred)
                        maes.append(mae) 
                        mse = mean_squared_error(y_true, y_pred)
                        mses.append(mse) 
                        r2 = r2_score(y_true, y_pred)
                        r2s.append(r2)
                        
                        abs_values['Dataset'].append(data)
                        abs_values['Epoch'].append(subset)
                        abs_values['Train_Size'].append(n)
                        abs_values['Seed'].append(seed)
                        abs_values['Model'].append(reg)
                        abs_values['MAE'].append(mae)
                        abs_values['MSE'].append(mse)
                        abs_values['R2'].append(r2)
                        
                        if args.verbose: print("{}\nMAE: {}, MSE: {}, R2: {}".format(reg, mae, mse, r2))
                
                    means_stds['Dataset'].append(data)
                    means_stds['Epoch'].append(subset)
                    means_stds['Train_Size'].append(n)
                    means_stds['Model'].append(reg)
                    means_stds['MAE_mean'].append(np.mean(maes))
                    means_stds['MAE_std'].append(np.std(maes))
                    means_stds['MSE_mean'].append(np.mean(mses))
                    means_stds['MSE_std'].append(np.std(mses))
                    means_stds['R2_mean'].append(np.mean(r2s))
                    means_stds['R2_std'].append(np.std(r2s))
                
    save_results(abs_values, str('nasbench201_allDatasets_allEpochs_allNs_metrics_absValues.csv'))
    save_results(means_stds, str('nasbench201_allDatasets_allEpochs_allNs_metrics_averages.csv'))

In [23]:
if __name__ == '__main__':
    print("data_path:", args.data_path)
    print("results_path:", args.results_path)
    print("dataset:", args.dataset)
    print("data_subset:", args.data_subset)
    print("train_size:", args.train_size)
    print("seed:", args.seed)
    print("estimators:", args.estimators)
    print("verbose:", args.verbose)
    
    main()

data_path: /home/gean/nns_performance_prediction/results/fast/test12/
results_path: /home/gean/nns_performance_prediction/results/fast/test12/
dataset: ['cifar10valid', 'cifar100', 'imagenet16_120']
data_subset: [4, 108, 200]
train_size: [43, 86, 129, 172, 344, 860]
seed: [0, 1, 10, 42, 100, 123, 666, 1000, 1234, 12345]
estimators: ['LinearRegression', 'SGDRegressor', 'Lasso', 'BayesianRidge', 'KNeighborsRegressor', 'DecisionTreeRegressor', 'SVR', 'RandomForestRegressor', 'AdaBoostRegressor', 'GradientBoostingRegressor', 'DummyRegressor']
verbose: 1



LinearRegression
MAE: 0.06886884613785137, MSE: 0.008452652202438998, R2: 0.10170806689570377
LinearRegression
MAE: 0.08351126324220254, MSE: 0.011629844608014231, R2: -0.23558635990005738
LinearRegression
MAE: 0.0686531615049416, MSE: 0.008581001121546215, R2: 0.0886776324661549
LinearRegression
MAE: 0.06984646569952509, MSE: 0.009164936095601343, R2: 0.026946862237801517
LinearRegression
MAE: 0.07837007722307791, MSE: 0.010338935110868