In [9]:
import argparse
import pandas as pd

In [17]:
parser = argparse.ArgumentParser()
 
parser.add_argument('--data_path', type=str, default='/home/gean/Code/nns_performance_prediction/meta_datasets/', 
                    help='location of the dataset')    
parser.add_argument('--results_path', type=str, default='/home/gean/Code/nns_performance_prediction/results/oracle/', 
                    help='location of the results directory')    
parser.add_argument('--k', type=int, default=5000, 
                    help='number of k best archs to extract accs')

#'+' == 1 or more, '*' == 0 or more, '?' == 0 or 1.
parser.add_argument('--data_subset', type=int, default=[4, 12, 36, 108], nargs='+', 
                    help='one of the subsets from nasbench101 with 1, 4, 12, 36, or 108 epochs')
parser.add_argument('--verbose', type=int, default=1, 
                    help='control the logging prints. 0 for deactivate and 1 for activate')

args, unknown = parser.parse_known_args() 

In [18]:
def save_results(performance_dict, subset): 
    df_results = pd.DataFrame.from_dict(performance_dict)
    df_results.to_csv(str(args.results_path + 'nasbench101_' + str(subset) + 'epochs_k' + 
                          str(args.k) + '_oracle.csv'), index=False, float_format='%.6f')
    
    return df_results

In [19]:
def main():
    for subset in args.data_subset:
        '''
        best_test_acc -> best test acc of the whole subset
        true_valid_acc -> best valid acc of the whole subset
        true_test_acc -> test acc of the arch with the best true_valid_acc of the whole subset 
        '''
        try:
            df_whole = pd.read_csv(str(args.data_path + 'nasbench101_' + str(subset) + 'epochs_tabular.csv'))
        except FileNotFoundError as e:
            print(e)
            continue
            
        if args.verbose: print("\n\n######### Subset{}".format(subset))

        best_test_acc = df_whole.sort_values(by='final_test_accuracy', ascending=False).iloc[0]['final_test_accuracy']    
        best_acc_val_row = df_whole.sort_values(by='final_validation_accuracy', ascending=False).iloc[0]
        true_val_acc = best_acc_val_row['final_validation_accuracy']
        true_test_acc = best_acc_val_row['final_test_accuracy']

        #results for each k does not change in the oracle
        df_dict = {'Epoch': [subset] * args.k, 
                   'Model': ['Oracle'] * args.k, 
                   'K': list(range(1, args.k + 1)), 
                   'True_Val_Acc': [true_val_acc] * args.k, 
                   'True_Test_Acc': [true_test_acc] * args.k, 
                   'Best_Test_Acc': [best_test_acc] * args.k}            

        # one per dataset-subset
        df_results = save_results(df_dict, subset)
        if args.verbose: print(df_results.head())

In [20]:
if __name__ == '__main__':
    print("data_path: ", args.data_path)
    print("results_path: ", args.results_path)
    print("k: ", args.k)
    print("data_subset: ", args.data_subset)   
    print("verbose: ", args.verbose)   
    
    main()

data_path:  /home/gean/Code/nns_performance_prediction/meta_datasets/
results_path:  /home/gean/Code/nns_performance_prediction/results/oracle/
k:  5000
data_subset:  [4, 12, 36, 108]
verbose:  1


######### Subset4
   Epoch   Model  K  True_Val_Acc  True_Test_Acc  Best_Test_Acc
0      4  Oracle  1      0.708433       0.698651       0.701088
1      4  Oracle  2      0.708433       0.698651       0.701088
2      4  Oracle  3      0.708433       0.698651       0.701088
3      4  Oracle  4      0.708433       0.698651       0.701088
4      4  Oracle  5      0.708433       0.698651       0.701088


######### Subset12
   Epoch   Model  K  True_Val_Acc  True_Test_Acc  Best_Test_Acc
0     12  Oracle  1      0.876536       0.867755       0.867755
1     12  Oracle  2      0.876536       0.867755       0.867755
2     12  Oracle  3      0.876536       0.867755       0.867755
3     12  Oracle  4      0.876536       0.867755       0.867755
4     12  Oracle  5      0.876536       0.867755       0.86