In [41]:
import pickle
from os import listdir
import warnings
warnings.filterwarnings('ignore') # nao quero warning de convergência

import numpy as np
import pandas as pd

from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from ITELM import ITELM


In [42]:
fnames = {f.split('-train')[0].split('-test')[0] 
            for f in listdir(path='../datasets') 
            if 'ipynb' not in f and 'keijzer' not in f
         }
print(fnames)

{'airfoil', 'ppb', 'forestfires', 'bioavailability', 'wineWhite', 'energyCooling', 'cpu', 'yacht', 'wineRed', 'concrete', 'energyHeating', 'towerData'}


In [43]:
def load_trainTest(dataname, fold):
    X_train = np.loadtxt(f'../datasets/{dataname}-train-{fold}.dat', delimiter=',')
    X_test  = np.loadtxt(f'../datasets/{dataname}-test-{fold}.dat', delimiter=',')
    
    X_train, y_train = X_train[:, :-1], X_train[:,-1]
    X_test, y_test   = X_test[:, :-1], X_test[:,-1]
    
    return X_train, y_train, X_test, y_test

In [44]:
def complexity(algname, model):
    if 'XGBoost' in algname:
        return np.sum( [(est[0].feature_importances_ != 0).sum()
                           for est in model.best_estimator_.estimators_
                       ] 
                     )
    elif 'MLP' in algname:
        return np.sum([(coefs != 0).sum() for coefs in model.best_estimator_.coefs_])
    elif 'IT-ELM' in algname:
        return (model.best_estimator_.modelCV.coef_ != 0).sum() # + interactions
    else:
        return (model.coef_ != 0).sum()

In [45]:
fnames = ['airfoil', 'wineWhite', 'energyCooling', 'cpu', 'yacht', 'wineRed', 'concrete', 'energyHeating', 'towerData']

In [52]:
alg_l  = []
mae_l  = []
rmse_l = []
cplx_l = []
dat_l  = []
fold_l = []
it_l   = []

scaler = StandardScaler()

for name in fnames:
    for fold in range(5):
        X_train, y_train, X_test, y_test = load_trainTest(name, fold)
        X_trainS = scaler.fit_transform(X_train)
        X_testS  = scaler.transform(X_test)
        
        for rep in range(6):
            for alg in ['ElasticNet', 'IT-ELM', 'Lars', 'Lasso', 'MLP', 'Ridge', 'XGBoost']:
                
                if alg == 'IT-ELM':
                    fname = f'models/{name}_{alg} (Lasso)_{fold}_{rep}.pkl'
                else:
                    fname = f'models/{name}_{alg}_{fold}_{rep}.pkl'

                f = open(fname, 'rb')
                model = pickle.load(f)
                f.close()
                
                if alg == 'MLP':
                    y_hat = model.predict(X_testS)
                else:
                    y_hat = model.predict(X_test)
                mae  = np.absolute(y_hat - y_test).mean()
                rmse = np.sqrt(np.square(y_hat - y_test).mean())
                cplx = complexity(alg, model)
                
                alg_l.append(alg)
                mae_l.append(mae)
                rmse_l.append(rmse)
                cplx_l.append(cplx)
                dat_l.append(name)
                fold_l.append(fold)
                it_l.append(rep)

In [53]:
df = pd.DataFrame({'Algorithm': alg_l, 'Dataset': dat_l, 'Fold': fold_l, 'Iteration': it_l, 'MAE': mae_l, 'RMSE': rmse_l, 'Complexity': cplx_l})

f = open('results.pkl','wb')
pickle.dump(df, f)
f.close()

In [54]:
from IPython.display import display, HTML

def highlight_min(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_min = s == s.min()
    return ['color: red' if v else '' for v in is_min]

for name in fnames:
    dfG = df[df.Dataset==name].groupby(['Algorithm']).median().round(2)
    display(dfG[['Complexity','MAE','RMSE']].sort_values('RMSE').style.set_caption(f'{name}').apply(highlight_min))

#dfGrouped = df.groupby(['Dataset', 'Algorithm']).median().round(2)
#for name in fnames:
#    print(dfGrouped[dfGrouped.Dataset == name])

Unnamed: 0_level_0,Complexity,MAE,RMSE
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
XGBoost,1276.5,1.24,1.8
IT-ELM,101.5,2.08,2.78
Lars,5.0,3.7,4.74
MLP,3000.0,3.71,4.77
Ridge,5.0,3.76,4.8
Lasso,3.0,4.89,6.18
ElasticNet,3.0,4.91,6.2


Unnamed: 0_level_0,Complexity,MAE,RMSE
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
XGBoost,2237.5,0.5,0.66
MLP,30550.0,0.54,0.69
IT-ELM,86.0,0.56,0.72
Lars,10.0,0.59,0.75
Ridge,11.0,0.59,0.75
Lasso,6.0,0.6,0.76
ElasticNet,7.0,0.61,0.77


Unnamed: 0_level_0,Complexity,MAE,RMSE
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
XGBoost,1139.5,0.53,0.73
IT-ELM,252.5,0.73,0.95
Lars,7.0,2.25,3.2
Ridge,8.0,2.3,3.21
MLP,4500.0,2.37,3.29
Lasso,5.0,2.97,4.09
ElasticNet,5.0,3.17,4.4


Unnamed: 0_level_0,Complexity,MAE,RMSE
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
IT-ELM,35.5,6.13,13.9
MLP,28550.0,15.33,25.15
Lars,7.0,17.98,29.1
Ridge,7.0,18.41,29.15
Lasso,4.0,27.17,36.94
XGBoost,378.0,13.88,38.56
ElasticNet,4.0,29.29,39.48


Unnamed: 0_level_0,Complexity,MAE,RMSE
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
XGBoost,643.0,0.44,0.8
IT-ELM,54.5,0.61,0.96
MLP,28050.0,1.34,2.19
Lars,1.0,6.94,8.66
Lasso,3.0,7.03,8.68
Ridge,6.0,7.02,8.7
ElasticNet,6.0,6.84,8.72


Unnamed: 0_level_0,Complexity,MAE,RMSE
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
XGBoost,734.5,0.44,0.59
IT-ELM,33.0,0.5,0.63
MLP,900.0,0.49,0.63
Lars,9.0,0.51,0.64
ElasticNet,7.0,0.51,0.65
Lasso,7.0,0.51,0.65
Ridge,11.0,0.5,0.65


Unnamed: 0_level_0,Complexity,MAE,RMSE
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
XGBoost,1245.5,2.84,4.13
IT-ELM,103.0,4.36,5.83
MLP,4500.0,5.86,7.67
ElasticNet,8.0,8.14,10.26
Lasso,8.0,8.15,10.26
Lars,8.0,8.14,10.27
Ridge,8.0,8.16,10.27


Unnamed: 0_level_0,Complexity,MAE,RMSE
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
XGBoost,1681.5,0.24,0.33
IT-ELM,284.5,0.29,0.4
MLP,4500.0,1.93,2.68
Lars,7.0,2.07,2.86
Ridge,8.0,2.15,2.92
Lasso,5.0,3.14,4.17
ElasticNet,5.0,3.24,4.41


Unnamed: 0_level_0,Complexity,MAE,RMSE
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
XGBoost,2920.5,10.83,15.73
MLP,37550.0,12.05,17.47
IT-ELM,262.0,14.1,20.18
Ridge,25.0,21.36,29.31
Lars,24.0,21.56,29.71
Lasso,13.0,25.47,33.16
ElasticNet,20.0,32.62,43.58
