In [1]:
import argparse

import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
subset = 108
n = 172
selected_models = ['LinearRegression', 'RandomForestRegressor', 'BayesianRidge', 'DummyRegressor']

In [3]:
df_results = pd.read_csv(str("/home/gean/Code/nns_performance_prediction/results/fast/test12/N" + 
                             str(n) + "_nasbench_metrics_formatted.csv"))
df_results = df_results.loc[df_results['Model'].isin(selected_models)]

df_results = df_results.astype({'Seed': np.int16, 'Subset': np.int16, 'MAE': np.float32, 'MSE': np.float32, 'R2': np.float32})
df_results.head(12)

Unnamed: 0,Seed,Subset,Model,MAE,MSE,R2
0,0,4,LinearRegression,0.114755,0.020631,-0.010661
3,0,4,BayesianRidge,0.114546,0.020379,0.001656
8,0,4,RandomForestRegressor,0.115189,0.020908,-0.02424
11,0,4,DummyRegressor,0.11983,0.021108,-0.034054
12,0,12,LinearRegression,0.163408,0.037073,0.04339
15,0,12,BayesianRidge,0.163551,0.0366,0.05558
20,0,12,RandomForestRegressor,0.162367,0.036204,0.065813
23,0,12,DummyRegressor,0.172058,0.039421,-0.017211
24,0,36,LinearRegression,0.055031,0.007533,0.113143
27,0,36,BayesianRidge,0.056005,0.007654,0.098931


In [4]:
df_results_gcn = pd.read_csv(str("/home/gean/Downloads/compilado_test/runs_p_test/runs_" + str(subset) + ".csv"))
df_results_gcn = df_results_gcn.drop(['id_run', 'loss', 'time'], axis=1) 
df_results_gcn = df_results_gcn.loc[(df_results_gcn['dataset'] == subset) & (df_results_gcn['train_size'] == n)]
df_results_gcn = df_results_gcn.rename(columns={"dataset": "Subset", "seed": "Seed", "mae": "MAE", "mse": "MSE", "r2": "R2"})
df_results_gcn = df_results_gcn.drop(['train_size'], axis=1)
df_results_gcn['Model'] = 'Graph Convolutional Network'
df_results_gcn

Unnamed: 0,Subset,Seed,MSE,MAE,R2,Model
3,108,0,0.004686,0.020649,0.073955,Graph Convolutional Network
7,108,1,0.004403,0.022709,0.076716,Graph Convolutional Network
11,108,10,0.004651,0.020515,0.072613,Graph Convolutional Network
15,108,42,0.004505,0.020573,0.07684,Graph Convolutional Network
19,108,100,0.004558,0.022712,0.072137,Graph Convolutional Network
23,108,123,0.004594,0.021148,0.075754,Graph Convolutional Network
27,108,666,0.004639,0.025696,0.053953,Graph Convolutional Network
30,108,1000,0.004841,0.03032,0.021601,Graph Convolutional Network
34,108,1234,0.004603,0.019918,0.06833,Graph Convolutional Network
38,108,12345,0.004512,0.021255,0.079664,Graph Convolutional Network


In [5]:
df_results_subset = df_results[df_results['Subset'] == subset]
df_results_subset = df_results_subset.append(df_results_gcn)
df_results_subset = df_results_subset.reset_index()
df_results_subset

Unnamed: 0,index,Seed,Subset,Model,MAE,MSE,R2
0,36,0,108,LinearRegression,0.023835,0.003108,0.095885
1,39,0,108,BayesianRidge,0.024305,0.00318,0.075128
2,44,0,108,RandomForestRegressor,0.018282,0.00309,0.101176
3,47,0,108,DummyRegressor,0.028603,0.003514,-0.022175
4,84,1,108,LinearRegression,0.022131,0.003111,0.095645
5,87,1,108,BayesianRidge,0.021202,0.003082,0.104016
6,92,1,108,RandomForestRegressor,0.018144,0.003101,0.098367
7,95,1,108,DummyRegressor,0.023036,0.003506,-0.019182
8,132,10,108,LinearRegression,0.019767,0.003129,0.090575
9,135,10,108,BayesianRidge,0.019819,0.003142,0.08662


## Nemenyi test

##### MAE

In [6]:
mae_nemenyi = {}

for unique in df_results_subset['Model'].unique():
    mae_nemenyi[unique] = list(df_results_subset[df_results_subset['Model'] == unique]['MAE'])

In [7]:
df_mae_nemenyi = pd.DataFrame.from_dict(mae_nemenyi)
df_mae_nemenyi = df_mae_nemenyi.rename(columns={"LinearRegression": "Linear Regression", 
                                                "RandomForestRegressor": "Random Forest",
                                                "BayesianRidge": "Bayesian Ridge",
                                                "DummyRegressor": "Dummy"})
df_mae_nemenyi.to_csv(str("/home/gean/Code/nns_performance_prediction/results/fast/test12/n" + 
                      str(n) + "_subset" + str(subset) + "_mae_nemenyi.csv"), 
                  index=False, float_format='%.6f')
df_mae_nemenyi

Unnamed: 0,Linear Regression,Bayesian Ridge,Random Forest,Dummy,Graph Convolutional Network
0,0.023835,0.024305,0.018282,0.028603,0.020649
1,0.022131,0.021202,0.018144,0.023036,0.022709
2,0.019767,0.019819,0.018114,0.023007,0.020515
3,0.022832,0.022098,0.018257,0.023016,0.020573
4,0.023364,0.022822,0.018848,0.027802,0.022712
5,0.020999,0.020448,0.018405,0.023637,0.021148
6,0.021014,0.020866,0.018182,0.023219,0.025696
7,0.024963,0.024052,0.018638,0.022992,0.03032
8,0.024252,0.023317,0.023728,0.02461,0.019918
9,0.021922,0.020937,0.018369,0.023077,0.021255


##### MSE

In [8]:
mse_nemenyi = {}

for unique in df_results_subset['Model'].unique():
    mse_nemenyi[unique] = list(df_results_subset[df_results_subset['Model'] == unique]['MSE'])

In [9]:
df_mse_nemenyi = pd.DataFrame.from_dict(mse_nemenyi)
df_mse_nemenyi = df_mse_nemenyi.rename(columns={"LinearRegression": "Linear Regression", 
                                                "RandomForestRegressor": "Random Forest", 
                                                "BayesianRidge": "Bayesian Ridge",
                                                "DummyRegressor": "Dummy"})
df_mse_nemenyi.to_csv(str("/home/gean/Code/nns_performance_prediction/results/fast/test12/n" + 
                      str(n) + "_subset" + str(subset) + "_mse_nemenyi.csv"), 
                  index=False, float_format='%.6f')

df_mse_nemenyi.head(12)

Unnamed: 0,Linear Regression,Bayesian Ridge,Random Forest,Dummy,Graph Convolutional Network
0,0.003108,0.00318,0.00309,0.003514,0.004686
1,0.003111,0.003082,0.003101,0.003506,0.004403
2,0.003129,0.003142,0.003054,0.003516,0.004651
3,0.003068,0.003057,0.003122,0.003511,0.004505
4,0.003156,0.003125,0.003081,0.003491,0.004558
5,0.003156,0.003134,0.003181,0.003687,0.004594
6,0.003098,0.003109,0.003097,0.00348,0.004639
7,0.003287,0.003232,0.003178,0.003555,0.004841
8,0.003343,0.003269,0.003556,0.003439,0.004603
9,0.003161,0.003115,0.003076,0.003497,0.004512
