## Imports

In [14]:
import argparse

import os
import random
import time
import json
import numpy as np
import pandas as pd

import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV, train_test_split

from sklearn.linear_model import LinearRegression, SGDRegressor, Lasso, BayesianRidge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.dummy import DummyRegressor

from joblib import dump, load

## Command-line Arguments Definitions

In [15]:
parser = argparse.ArgumentParser()
 
parser.add_argument('--data_path', type=str, default='/home/gean/Code/nns_performance_prediction/meta_datasets/', help='location of the dataset')    
parser.add_argument('--model_path', type=str, default='/home/gean/Code/nns_performance_prediction/saved_models/', help='path to save the trained models')
parser.add_argument('--results_path', type=str, default='/home/gean/Code/nns_performance_prediction/results/', help='location of the results directory')    
parser.add_argument('--target', type=str, default='final_validation_accuracy', help='target of the training/test')
parser.add_argument('--n_iter_rs', type=int, default=2, help='number of iterations for random search')
parser.add_argument('--cv_inner', type=int, default=3, help='number of partitions for the inner split of nested cross-validation')
parser.add_argument('--scoring_rs', type=str, default=None, help='[neg_mean_absolute_error, neg_mean_squared_error, r2, None]')

#'+' == 1 or more, '*' == 0 or more, '?' == 0 or 1.
# parser.add_argument('--data_subset', type=int, default=[4, 12], nargs='+', help='one of the subsets from nasbench101 with 4, 12, 36, or 108 epochs')
parser.add_argument('--data_subset', type=int, default=[4, 12, 36, 108], nargs='+', help='one of the subsets from nasbench101 with 4, 12, 36, or 108 epochs')
# parser.add_argument('--seed', type=int, default=[0, 1], nargs='+', help='seeds used for all the random procedures') 
parser.add_argument('--seed', type=int, default=[0, 1, 10, 42, 100, 123, 666, 1000, 1234, 12345], nargs='+', help='seeds used for all the random procedures') 
# parser.add_argument('--train_size', type=int, default=[43, 86], nargs='+', help='[Int, Int...] representing the total number of train samples')
parser.add_argument('--train_size', type=int, default=[43, 86, 129, 172, 344, 860], nargs='+', help='[Int, Int...] representing the total number of train samples')
# parser.add_argument('--estimators', type=str, default=['linear_regression', 'sgd', 'lasso', 'bayesian_ridge', 'knn', 'dt', 'svm', 'mlp', 'random_forest', 
#                                                     'ada_boost', 'gradient_boost', 'dummy'], nargs='+', help='list of sklearn estimators to be used for training') 
#except mlp (expansive) and lasso (much similar to dummy)
parser.add_argument('--estimators', type=str, default=['linear_regression', 'sgd', 'bayesian_ridge', 'knn', 'dt', 'svm', 'random_forest', 
                                                    'ada_boost', 'gradient_boost', 'dummy'], nargs='+', help='list of sklearn estimators to be used for training') 

parser.add_argument('--features_drop', type=str, default=['module_adjacency', 'halfway_training_time', 'halfway_train_accuracy', 'halfway_validation_accuracy', 
                                                          'halfway_test_accuracy', 'final_training_time', 'final_train_accuracy', 'final_test_accuracy'], 
                                                nargs='+', help='list of features to drop from nasbench101')

#args = parser.parse_args(args=[])
args, unknown = parser.parse_known_args() 

## Utils

In [16]:
def set_default_seed(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)

In [17]:
def get_preprocessed_numpy_dataset(data_subset):
    df_whole = pd.read_csv(str(args.data_path + 'nasbench101_' + str(data_subset) + 'epochs_tabular.csv'))
    df_whole.drop(args.features_drop, axis=1, inplace=True)
    df_whole = pd.get_dummies(df_whole)
    
    df_y = df_whole[args.target]
    df_X = df_whole.drop([args.target], axis = 1)
    X = df_X.to_numpy()
    y = df_y.to_numpy()
    
    return X, y

In [18]:
def get_estimators(seed):
    estimators_case_insensitive = [el.lower() for el in args.estimators]
    estimators = []
    
    if ("linear_regression" in estimators_case_insensitive):
        estimators.append(LinearRegression(n_jobs=-1))
    
    if ("sgd" in estimators_case_insensitive):
        estimators.append(SGDRegressor(random_state=seed))
        
#     if ("lasso" in estimators_case_insensitive):
#         estimators.append(Lasso(random_state=seed))
    
    if ("bayesian_ridge" in estimators_case_insensitive):
        estimators.append(BayesianRidge())

    if ("knn" in estimators_case_insensitive):
        estimators.append(KNeighborsRegressor(n_jobs=-1))
    
    if ("dt" in estimators_case_insensitive):
        estimators.append(DecisionTreeRegressor(random_state=seed))
        
    if ("svm" in estimators_case_insensitive):
        estimators.append(SVR())
        
#     if ("mlp" in estimators_case_insensitive):
#         estimators.append(MLPRegressor(random_state=seed))
        
    if ("random_forest" in estimators_case_insensitive):
        estimators.append(RandomForestRegressor(n_jobs=-1, random_state=seed))
        
    if ("ada_boost" in estimators_case_insensitive):
        estimators.append(AdaBoostRegressor(random_state=seed))
        
    if ("gradient_boost" in estimators_case_insensitive):
        estimators.append(GradientBoostingRegressor(random_state=seed))
        
    if ("dummy" in estimators_case_insensitive):
        estimators.append(DummyRegressor())
    
    return estimators

In [19]:
def get_estimators_hyperparameters_to_search():
    #linear regression
    hp_lr = {'fit_intercept': [False, True],
        'normalize': [False, True]}

    #stochastic gradient descent
    hp_sgd = {'loss': ["squared_loss", "huber", "epsilon_insensitive", "squared_epsilon_insensitive"],
         'penalty': ["l2", "l1", "elasticnet"],
         'fit_intercept': [False, True],
         'max_iter': [1000, 3000, 9000],
         'shuffle': [False, True],
         'learning_rate': ["constant", "optimal", "invscaling", "adaptive"],
         'early_stopping': [False, True],
         'n_iter_no_change': [5, 15, 45],
         'warm_start': [False, True]}
    
    #lasso
#     hp_lasso = {'fit_intercept': [False, True],
#             'normalize': [False, True],
#            'max_iter': [1000, 3000, 9000],
#            'warm_start': [False, True],
#            'positive': [False, True],
#            'selection': ["cyclic", "random"],
#             'tol': [0.001, 0.0001, 0.00001]}
    
    #bayesian ridge
    hp_bayesian = {'n_iter': [1000, 3000, 9000],
              'tol': [0.001, 0.0001, 0.00001],
              'compute_score': [False, True],
              'fit_intercept': [False, True],
                  'normalize': [False, True]}

    #k-nearest neighbors
    hp_knn = {'n_neighbors': list(range(1, 26)),
              'weights': ["uniform", "distance"],
         'algorithm': ["auto", "ball_tree", "kd_tree", "brute"],
         'leaf_size': [30, 90, 270],
         'p': [1, 2]}

    #decision tree
    hp_dt = {'criterion': ["mse", "friedman_mse", "mae"],
        'splitter': ["best", "random"],
        'max_depth': list(range(2, 51)) + [None],
        'min_samples_split': list(range(2, 51)),
        'min_samples_leaf': list(range(1, 51)),
        'max_features': ["auto", "sqrt", "log2"]}
    
    #support vector machine
    hp_svr = {'kernel': ["linear", "poly", "rbf", "sigmoid"],
         'gamma': ["scale", "auto"],
         'C': list(range(1, 51)),
         'max_iter': [1000, 3000, 9000]}

    #multilayer perceptron or feed-forward neural network
#     hp_mlp = {'hidden_layer_sizes': [(np.random.randint(1, 900),), (np.random.randint(1, 900), np.random.randint(1, 900)), 
#                                      (np.random.randint(1, 900), np.random.randint(1, 900), np.random.randint(1, 900))],
#          'activation': ["identity", "logistic", "tanh", "relu"],
#          'solver': ["lbfgs", "sgd", "adam"],
#          'learning_rate': ["constant", "invscaling", "adaptive"],
#          'learning_rate_init': [0.01, 0.001, 0.0001],
#          'max_iter': [1000, 3000, 9000],
#          'warm_start': [False, True],
#          'momentum': np.random.uniform(low=0.0, high=1.0, size=50),
#          'nesterovs_momentum': [False, True],
#          'early_stopping': [False, True],
#          'n_iter_no_change': [10, 30, 90],
#          'max_fun': [15000, 45000, 135000]}

    #random forest
    hp_random_forest = {'n_estimators': [100, 300, 900],
                   'criterion': ["mse", "mae"],
                   'min_samples_split': list(range(2, 51)),
                   'min_samples_leaf': list(range(1, 51)),
                   'max_features': ["auto", "sqrt", "log2"],
                   'oob_score': [False, True],
                   'warm_start': [False, True]}                   

    #ada boosting
    hp_ada_boost = {'n_estimators': [50, 150, 450],
               'learning_rate': [1, 0.1, 0.01],
               'loss': ["linear", "square", "exponential"]}

    #gradient boosting
    hp_gradient_boost = {'loss': ["ls", "lad", "huber", "quantile"],
                     'learning_rate': [0.1, 0.01, 0.001],
                     'n_estimators': [100, 300, 900],
                     'subsample': [0.1, 0.5, 1.0],
                     'criterion': ["friedman_mse", "mse", "mae"],
                     'min_samples_split': list(range(2, 51)),
                     'min_samples_leaf': list(range(1, 51)),
                     'max_depth': list(range(3, 51)),
                     'max_features': ["auto", "sqrt", "log2"],
                     'warm_start': [False, True], 
                     'n_iter_no_change': [10, 30, 90, None]}

    #simple rule regressor
    hp_dummy = {'strategy': ["mean", "median", "quantile"], 
            'quantile': [0.0, 0.25, 0.75, 1.0]}
    
    return hp_lr, hp_sgd, hp_lasso, hp_bayesian, hp_knn, hp_dt, hp_svr, hp_mlp, hp_random_forest, hp_ada_boost, hp_gradient_boost, hp_dummy

In [20]:
def save_estimator(estimator, file_name : str):
    dump(estimator, str(args.model_path + file_name + '.joblib')) 

In [21]:
def load_estimator(file_name: str):
    estimator = load(str(args.model_path + file_name + '.joblib'))
    
    return estimator

In [22]:
def train_and_test_whole(estimator, X_train, y_train, X_test, y_test):
    tic = time.time()
    estimator.fit(X_train, y_train)
    toc = time.time()
    print("Training DONE")
    y_pred = estimator.predict(X_test)
    print("Testing DONE\n\n")
    
    return y_pred, (toc - tic)

In [23]:
def save_results(performance_dict, n, file_name): 
    df_results = pd.DataFrame.from_dict(performance_dict)
    df_results.to_csv(str(args.results_path + "N" + str(n) + "_" + file_name + '.csv'),
                      index=False, float_format='%.6f')

## Train/Val/Test

### RandomSearch with 3-Folds and Train/Test with Hold-out

In [24]:
def train():
    for n in args.train_size:
        predictions = {}
#         fit_times = {'LinearRegression': [], 'SGDRegressor': [], 'Lasso': [], 'BayesianRidge': [], 'KNeighborsRegressor': [], 'DecisionTreeRegressor': [], 'SVR': [], 
#                      'MLPRegressor': [], 'RandomForestRegressor': [], 'AdaBoostRegressor': [], 'GradientBoostingRegressor': [], 'DummyRegressor': []}
        fit_times = {'LinearRegression': [], 'SGDRegressor': [], 'BayesianRidge': [], 'KNeighborsRegressor': [], 'DecisionTreeRegressor': [], 'SVR': [], 
                     'RandomForestRegressor': [], 'AdaBoostRegressor': [], 'GradientBoostingRegressor': [], 'DummyRegressor': []}
    
        for seed in args.seed:
            set_default_seed(seed)
        
            for subset in args.data_subset:
                X, y = get_preprocessed_numpy_dataset(subset)
                print("\n\n######### Seed", seed, ", Subset", subset, ", N", n)
            
                X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=n, random_state=seed, shuffle=True)

                min_max_scaler = MinMaxScaler()
                min_max_scaler.fit(X_train)
                X_train = min_max_scaler.transform(X_train)
                X_test = min_max_scaler.transform(X_test)

                key = str('Seed' + str(seed) + "_Subset" + str(subset) + "_N" + str(n) + "_Ytrue")
                predictions[key] = y_test
                
                estimators = get_estimators(seed)
                hyperparameters = get_estimators_hyperparameters_to_search()
                
                for reg, hp in zip(estimators, hyperparameters):
                    rs = RandomizedSearchCV(estimator=reg, param_distributions=hp, n_iter=args.n_iter_rs, scoring=args.scoring_rs, 
                                            n_jobs=-1, cv=args.cv_inner, verbose=0, pre_dispatch='2*n_jobs', random_state=seed)

                    reg_name = str(reg)[:str(reg).index('(')]
                    print("\n#", reg_name, "#")
                    
                    print("\n#########", args.cv_inner, "Fold RANDOM SEARCH #########")
                    rs.fit(X_train, y_train) 
                    print("DONE")
                    
                    best_estimator = rs.best_estimator_
                    print("best estimator: ", best_estimator.get_params())
                    
                    print("\n######### HOLD-OUT VALIDATION #########")
                    y_pred, fit_time = train_and_test_whole(best_estimator, X_train, y_train, X_test, y_test)
                    
                    key = str('Seed' + str(seed) + "_Subset" + str(subset) + "_N" + str(n) + "_" + reg_name + "_Ypred")
                    predictions[key] = y_pred
                    fit_times[reg_name].append(fit_time)
                    
#                     save_estimator(best_estimator, str("seed" + str(seed) + "_subset" + str(subset) + "_n" + str(n) + "_" + reg_name))    
                    
#         save_results(predictions, n, "nasbench_predictions")
        save_results(fit_times, n, "nasbench_fit_times")

### Main


In [25]:
if __name__ == '__main__':
    print("data_path: ", args.data_path)
    print("model_path: ", args.model_path)
    print("results_path: ", args.results_path)
    print("target: ", args.target)
    print("n_iter_rs: ", args.n_iter_rs)
    print("cv_inner: ", args.cv_inner)
    print("scoring_rs: ", args.scoring_rs)
    print("data_subset: ", args.data_subset)
    print("seed: ", args.seed)
    print("train_size: ", args.train_size)                
    print("estimators: ", args.estimators)
    print("features drop: ", args.features_drop)
    
    train()

data_path:  /home/gean/Code/nns_performance_prediction/meta_datasets/
model_path:  /home/gean/Code/nns_performance_prediction/saved_models/
results_path:  /home/gean/Code/nns_performance_prediction/results/
target:  final_validation_accuracy
n_iter_rs:  2
cv_inner:  3
scoring_rs:  None
data_subset:  [4, 12]
seed:  [0, 1]
train_size:  [43, 86]
estimators:  ['linear_regression', 'sgd', 'lasso', 'bayesian_ridge', 'knn', 'dt', 'svm', 'mlp', 'random_forest', 'ada_boost', 'gradient_boost', 'dummy']
features drop:  ['module_adjacency', 'halfway_training_time', 'halfway_train_accuracy', 'halfway_validation_accuracy', 'halfway_test_accuracy', 'final_training_time', 'final_train_accuracy', 'final_test_accuracy']


######### Seed 0 , Subset 4 , N 43

# LinearRegression #

######### 3 Fold RANDOM SEARCH #########
DONE
best estimator:  {'copy_X': True, 'fit_intercept': True, 'n_jobs': -1, 'normalize': True}

######### HOLD-OUT VALIDATION #########
Training DONE
Testing DONE



# SGDRegressor #

###

  warn("Warm-start fitting without increasing n_estimators does not "


Testing DONE



# AdaBoostRegressor #

######### 3 Fold RANDOM SEARCH #########
DONE
best estimator:  {'base_estimator': None, 'learning_rate': 1, 'loss': 'linear', 'n_estimators': 450, 'random_state': 0}

######### HOLD-OUT VALIDATION #########
Training DONE
Testing DONE



# GradientBoostingRegressor #

######### 3 Fold RANDOM SEARCH #########
DONE
best estimator:  {'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.001, 'loss': 'ls', 'max_depth': 15, 'max_features': 'auto', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 26, 'min_samples_split': 15, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 900, 'n_iter_no_change': 90, 'presort': 'deprecated', 'random_state': 0, 'subsample': 0.1, 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': False}

######### HOLD-OUT VALIDATION #########
Training DONE
Testing DONE



# DummyRegressor #

######### 3 Fold RANDOM SEARCH ####

  warn("Warm-start fitting without increasing n_estimators does not "


Testing DONE



# AdaBoostRegressor #

######### 3 Fold RANDOM SEARCH #########
DONE
best estimator:  {'base_estimator': None, 'learning_rate': 1, 'loss': 'linear', 'n_estimators': 450, 'random_state': 0}

######### HOLD-OUT VALIDATION #########
Training DONE
Testing DONE



# GradientBoostingRegressor #

######### 3 Fold RANDOM SEARCH #########
DONE
best estimator:  {'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': 'mse', 'init': None, 'learning_rate': 0.1, 'loss': 'quantile', 'max_depth': 36, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 29, 'min_samples_split': 27, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_iter_no_change': 30, 'presort': 'deprecated', 'random_state': 0, 'subsample': 0.1, 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': True}

######### HOLD-OUT VALIDATION #########
Training DONE
Testing DONE



# DummyRegressor #

######### 3 Fold RANDOM SEARCH #########


  warn("Warm-start fitting without increasing n_estimators does not "


Testing DONE



# AdaBoostRegressor #

######### 3 Fold RANDOM SEARCH #########
DONE
best estimator:  {'base_estimator': None, 'learning_rate': 0.01, 'loss': 'exponential', 'n_estimators': 50, 'random_state': 0}

######### HOLD-OUT VALIDATION #########
Training DONE
Testing DONE



# GradientBoostingRegressor #

######### 3 Fold RANDOM SEARCH #########
DONE
best estimator:  {'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.001, 'loss': 'ls', 'max_depth': 15, 'max_features': 'auto', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 26, 'min_samples_split': 15, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 900, 'n_iter_no_change': 90, 'presort': 'deprecated', 'random_state': 0, 'subsample': 0.1, 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': False}

######### HOLD-OUT VALIDATION #########
Training DONE
Testing DONE



# DummyRegressor #

######### 3 Fold RANDOM SEAR

  warn("Warm-start fitting without increasing n_estimators does not "


Testing DONE



# AdaBoostRegressor #

######### 3 Fold RANDOM SEARCH #########
DONE
best estimator:  {'base_estimator': None, 'learning_rate': 1, 'loss': 'linear', 'n_estimators': 450, 'random_state': 0}

######### HOLD-OUT VALIDATION #########
Training DONE
Testing DONE



# GradientBoostingRegressor #

######### 3 Fold RANDOM SEARCH #########
DONE
best estimator:  {'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.001, 'loss': 'ls', 'max_depth': 15, 'max_features': 'auto', 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 26, 'min_samples_split': 15, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 900, 'n_iter_no_change': 90, 'presort': 'deprecated', 'random_state': 0, 'subsample': 0.1, 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': False}

######### HOLD-OUT VALIDATION #########
Training DONE
Testing DONE



# DummyRegressor #

######### 3 Fold RANDOM SEARCH ####