https://archive.ics.uci.edu/ml/datasets/student+performance

In [1]:
import numpy as np
import pandas as pd
from copy import deepcopy
from sklearn.utils import shuffle
from scipy.stats import ranksums
from catboost import CatBoostRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor,BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
# from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder,OrdinalEncoder
import warnings
from math import log2
from time import time

def KFoldCV(model, data, n_fold=10):
#     num = len(data)
    diff = int(len(data)/n_fold)
    results = np.zeros((n_fold, 4))
    for i in range(n_fold):
        begin = diff*i
        end = diff*(i+1)
#         if i == n_fold-1:
#             end = -1
        test = data[begin:end]
        train = deepcopy(data)
        train = np.delete(train, range(begin, end),axis=0)
        X_train, y_train = train[:,:-1], train[:,-1]
        X_test, y = test[:,:-1], test[:,-1]
        predictY = model.fit(X_train, y_train).predict(X_test)
        mae = np.mean(abs((y-predictY)))
        stdErr = np.std(((y-predictY)))
        error=sum((y-predictY)**2)
        RMSE=np.sqrt(error/len(y))
        MAC = np.dot(y,predictY)**2/(np.dot(y, y)*np.dot(predictY, predictY))
#         print(mae, stdErr, RMSE, MAC)
        results[i,:] = [mae, stdErr, RMSE, MAC]
    return results

warnings.filterwarnings('ignore')

# data = pd.read_csv('./student-mat.csv', sep=';')
# por = pd.read_csv('./student-por.csv', sep=';')
# combine = pd.merge(math, por, on=["school","sex","age","address","famsize","Pstatus","Medu","Fedu","Mjob","Fjob","reason","nursery","internet"])
# data = data.values
# data = shuffle(data)

In [2]:
data=pd.read_csv('./exams.csv')
data['reading_score']=data['reading score']
data.drop(columns=['math score'],inplace=True)
data.drop(columns=['reading score'],inplace=True)
data.drop(columns=['writing score'],inplace=True)

cats = ['gender', 'race/ethnicity', 'parental level of education', 'lunch',
       'test preparation course']
oe = OrdinalEncoder()
for c in cats:
    oe.fit(data[c].values.reshape(-1,1))
    data[c] = np.asarray(oe.transform(data[c].values.reshape(-1,1)),dtype=int)
#     np.asarray(data['gender'],dtype=int)
data = data.values
data = shuffle(data)

In [3]:
# data.shape (395, 33)
# np.savetxt('math.txt',data)
data.shape

(1000, 6)

In [8]:
t1 = time()
hidden_layer_sizes  = [100, 300, 500]
max_iter = [500, 1000,1500]
# hidden_layer_sizes  = [500]
# max_iter = [1000]
epochs=1
mlps = np.zeros((epochs,4))
for i in hidden_layer_sizes:
    for j in max_iter:
        for epoch in range(epochs):
            data = shuffle(data)
            model = MLPRegressor(hidden_layer_sizes=i, max_iter=j)
            result = KFoldCV(model, data)
            mlps[epoch,:] = np.mean(result, axis=0)
#             print(np.mean(result, axis=0))
        print(np.mean(mlps, axis=0))
#     [0.96503638 1.28823634 1.30661604 0.97660302]
print(time()-t1)

[10.66325083 12.95132916 13.0278646   0.96633683]
[10.3264142  12.69480942 12.72732946  0.96797832]
[10.34570099 12.67705502 12.7073647   0.96780365]
[10.34901236 12.66858709 12.71767891  0.96794842]
[10.33796365 12.67154079 12.69701477  0.96798629]
[10.31715129 12.67349924 12.7174369   0.96802783]
[10.35406534 12.61867828 12.69084182  0.96822781]
[10.3088277  12.60882769 12.70215479  0.96830827]
[10.37459678 12.68164709 12.75230233  0.96798554]
234.35040545463562


In [4]:
t1 = time()
hidden_layer_sizes  = [100, 300, 500]
max_iter = [500, 1000,1500]
hidden_layer_sizes  = [500]
max_iter = [500]
epochs=20
mlps = np.zeros((epochs,4))
for i in hidden_layer_sizes:
    for j in max_iter:
        for epoch in range(epochs):
            data = shuffle(data)
            model = MLPRegressor(hidden_layer_sizes=i, max_iter=j)
            result = KFoldCV(model, data)
            mlps[epoch,:] = np.mean(result, axis=0)
#             print(np.mean(result, axis=0))
        print(np.mean(mlps, axis=0))
#     [0.96503638 1.28823634 1.30661604 0.97660302]
print(time()-t1)

[10.33832021 12.64868867 12.71874668  0.96810553]
612.6678531169891


In [5]:
mlps = np.array([[10.33844826, 12.54313348, 12.7262093 ,  0.96862652],
       [10.32582301, 12.65637348, 12.70912657,  0.96804509],
       [10.37354187, 12.66155571, 12.76277178,  0.96806129],
       [10.31935821, 12.66241199, 12.69265866,  0.96798035],
       [10.31813866, 12.65630203, 12.71218706,  0.96809453],
       [10.30843906, 12.61516826, 12.67704782,  0.96826888],
       [10.33989355, 12.62697726, 12.7312375 ,  0.96824818],
       [10.32086251, 12.65109107, 12.71694892,  0.9681219 ],
       [10.35721493, 12.6673005 , 12.71473986,  0.96798332],
       [10.33973879, 12.61913049, 12.69705027,  0.96826056],
       [10.35732446, 12.64910193, 12.72302641,  0.96816994],
       [10.35598136, 12.68469268, 12.73404263,  0.96794306],
       [10.3657521 , 12.71902567, 12.7772913 ,  0.96772724],
       [10.33320945, 12.66198054, 12.73265444,  0.96809913],
       [10.36052839, 12.66301649, 12.74723794,  0.96805692],
       [10.3123423 , 12.63285127, 12.6896689 ,  0.9681747 ],
       [10.36518771, 12.65933583, 12.72378723,  0.96793156],
       [10.3192333 , 12.62615195, 12.71158864,  0.96831801],
       [10.32329558, 12.69390125, 12.70985159,  0.96790072],
       [10.33209071, 12.62427159, 12.68580677,  0.96809862]])

array([[10.33844826, 12.54313348, 12.7262093 ,  0.96862652],
       [10.32582301, 12.65637348, 12.70912657,  0.96804509],
       [10.37354187, 12.66155571, 12.76277178,  0.96806129],
       [10.31935821, 12.66241199, 12.69265866,  0.96798035],
       [10.31813866, 12.65630203, 12.71218706,  0.96809453],
       [10.30843906, 12.61516826, 12.67704782,  0.96826888],
       [10.33989355, 12.62697726, 12.7312375 ,  0.96824818],
       [10.32086251, 12.65109107, 12.71694892,  0.9681219 ],
       [10.35721493, 12.6673005 , 12.71473986,  0.96798332],
       [10.33973879, 12.61913049, 12.69705027,  0.96826056],
       [10.35732446, 12.64910193, 12.72302641,  0.96816994],
       [10.35598136, 12.68469268, 12.73404263,  0.96794306],
       [10.3657521 , 12.71902567, 12.7772913 ,  0.96772724],
       [10.33320945, 12.66198054, 12.73265444,  0.96809913],
       [10.36052839, 12.66301649, 12.74723794,  0.96805692],
       [10.3123423 , 12.63285127, 12.6896689 ,  0.9681747 ],
       [10.36518771, 12.

In [9]:
t1 = time()
Cs = [10, 100, 1000]
gammas = [0.001, 0.01, 0.1]
# Cs = [1000]
# gammas = [0.001]
svms = np.zeros((epochs,4))
for i in range(len(Cs)):
    for j in range(len(gammas)):
        lc = len(Cs)
        lg = len(gammas)
        for epoch in range(epochs):
            data = shuffle(data)
            model = SVR(kernel = 'rbf', C = Cs[i], gamma = gammas[j])
            result = KFoldCV(model, data)
            svms[epoch,:] = np.mean(result, axis=0)
        print(np.mean(svms, axis=0))
#         [0.90084585 1.28205627 1.30138029 0.97616123]
print(time()-t1)

[10.7059298  13.26808088 13.44343796  0.96468684]
[10.39411257 12.73392117 12.87601446  0.96756996]
[10.43637676 12.70179188 12.79782653  0.96772278]
[10.37252971 12.76518032 12.86340246  0.9674794 ]
[10.29172539 12.63748023 12.66974327  0.96818982]
[10.50644576 12.86039594 12.92228499  0.96693278]
[10.31074723 12.66260214 12.69446088  0.96810456]
[10.3161436  12.67874579 12.72729811  0.96793916]
[10.64052574 13.06940003 13.14410508  0.96603151]
5.3331382274627686


In [6]:
t1 = time()
Cs = [10, 100, 1000]
gammas = [0.001, 0.01, 0.1]
Cs = [100]
gammas = [0.01]
svms = np.zeros((epochs,4))
for i in range(len(Cs)):
    for j in range(len(gammas)):
        lc = len(Cs)
        lg = len(gammas)
        for epoch in range(epochs):
            data = shuffle(data)
            model = SVR(kernel = 'rbf', C = Cs[i], gamma = gammas[j])
            result = KFoldCV(model, data)
            svms[epoch,:] = np.mean(result, axis=0)
        print(np.mean(svms, axis=0))
#         [0.90084585 1.28205627 1.30138029 0.97616123]
print(time()-t1)

[10.32145804 12.61883208 12.69638269  0.96821037]
11.311320543289185


In [7]:
svms = np.array([[10.31502399, 12.60965195, 12.70234336,  0.96809039],
       [10.29528714, 12.60122263, 12.68653493,  0.96816691],
       [10.33741548, 12.61042373, 12.70851649,  0.96816076],
       [10.35669184, 12.6830911 , 12.73194972,  0.96801522],
       [10.34874295, 12.54725052, 12.71256792,  0.96853742],
       [10.30265705, 12.62792342, 12.67876883,  0.96814528],
       [10.31309369, 12.60471122, 12.68130234,  0.96827801],
       [10.29526308, 12.60315796, 12.64913135,  0.96821252],
       [10.34298771, 12.5951941 , 12.71161038,  0.96829797],
       [10.29506639, 12.64356901, 12.66682028,  0.96815279],
       [10.30335878, 12.58787403, 12.685352  ,  0.96844832],
       [10.3244898 , 12.54553981, 12.71662034,  0.96857975],
       [10.36272844, 12.71695656, 12.73053412,  0.96780353],
       [10.32824891, 12.67242857, 12.70603662,  0.96801742],
       [10.29422907, 12.55920334, 12.67837294,  0.96849163],
       [10.33788268, 12.63522053, 12.70225725,  0.96817008],
       [10.31079704, 12.62893514, 12.69035966,  0.96816154],
       [10.29870255, 12.62271341, 12.66500276,  0.96812105],
       [10.33950805, 12.6297841 , 12.71761838,  0.96818278],
       [10.32698621, 12.65179038, 12.70595414,  0.96817392]])

array([[10.31502399, 12.60965195, 12.70234336,  0.96809039],
       [10.29528714, 12.60122263, 12.68653493,  0.96816691],
       [10.33741548, 12.61042373, 12.70851649,  0.96816076],
       [10.35669184, 12.6830911 , 12.73194972,  0.96801522],
       [10.34874295, 12.54725052, 12.71256792,  0.96853742],
       [10.30265705, 12.62792342, 12.67876883,  0.96814528],
       [10.31309369, 12.60471122, 12.68130234,  0.96827801],
       [10.29526308, 12.60315796, 12.64913135,  0.96821252],
       [10.34298771, 12.5951941 , 12.71161038,  0.96829797],
       [10.29506639, 12.64356901, 12.66682028,  0.96815279],
       [10.30335878, 12.58787403, 12.685352  ,  0.96844832],
       [10.3244898 , 12.54553981, 12.71662034,  0.96857975],
       [10.36272844, 12.71695656, 12.73053412,  0.96780353],
       [10.32824891, 12.67242857, 12.70603662,  0.96801742],
       [10.29422907, 12.55920334, 12.67837294,  0.96849163],
       [10.33788268, 12.63522053, 12.70225725,  0.96817008],
       [10.31079704, 12.

In [10]:
t1 = time()
n_estimators  = [100, 300, 500]
max_depth = [3, 4, 5]
# n_estimators  = [500]
# max_depth = [4]
rfs = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = RandomForestRegressor(n_estimators=i, max_depth=j)
            result = KFoldCV(model, data)
            rfs[epoch,:] = np.mean(result, axis=0)
        print(np.mean(rfs, axis=0))
#         [0.82798772 1.18901865 1.20639261 0.97973146]
print(time()-t1)

[10.42826064 12.82115198 12.91350725  0.96717578]
[10.33780795 12.68141196 12.71912641  0.96785328]
[10.36736764 12.73999589 12.81548061  0.96759772]
[10.40366524 12.78840945 12.85291152  0.96726573]
[10.33114041 12.64983358 12.7122126   0.96805725]
[10.31377922 12.63310013 12.7195769   0.96815005]
[10.41435344 12.82898293 12.89048596  0.96721018]
[10.37819713 12.6653173  12.77533693  0.96803244]
[10.34503117 12.62519419 12.76005913  0.96811247]
26.165308952331543


In [8]:
t1 = time()
n_estimators  = [100, 300, 500]
max_depth = [3, 4, 5]
n_estimators  = [300]
max_depth = [4]
rfs = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = RandomForestRegressor(n_estimators=i, max_depth=j)
            result = KFoldCV(model, data)
            rfs[epoch,:] = np.mean(result, axis=0)
        print(np.mean(rfs, axis=0))
#         [0.82798772 1.18901865 1.20639261 0.97973146]
print(time()-t1)

[10.33988394 12.66760842 12.74221324  0.96797911]
62.22170686721802


In [9]:
rfs = np.array([[10.32256625, 12.61982707, 12.7421031 ,  0.96810854],
       [10.36324125, 12.71659334, 12.77970894,  0.96775257],
       [10.34759433, 12.67391293, 12.75932332,  0.96802543],
       [10.3084755 , 12.64903314, 12.73397049,  0.96810231],
       [10.36590579, 12.66922857, 12.76524788,  0.96809254],
       [10.32250406, 12.65041227, 12.73724824,  0.96806253],
       [10.34138856, 12.66382882, 12.74483943,  0.96791368],
       [10.33608809, 12.64714317, 12.72208457,  0.96808706],
       [10.38443037, 12.7135082 , 12.7564205 ,  0.96770674],
       [10.36623186, 12.67446174, 12.74047479,  0.9678949 ],
       [10.35470672, 12.6861324 , 12.76281249,  0.96784102],
       [10.31290868, 12.62360549, 12.72020505,  0.9682553 ],
       [10.31166265, 12.64166848, 12.69890393,  0.96809927],
       [10.31476079, 12.624622  , 12.69033632,  0.96821124],
       [10.35172122, 12.75443723, 12.7797969 ,  0.96761403],
       [10.35432862, 12.65085233, 12.74174059,  0.96804758],
       [10.32825046, 12.70522531, 12.74304665,  0.96778647],
       [10.34527801, 12.64221575, 12.74668749,  0.96817599],
       [10.30958474, 12.62320986, 12.6875007 ,  0.96811914],
       [10.3560508 , 12.72225032, 12.7918134 ,  0.96768584]])

array([[10.32256625, 12.61982707, 12.7421031 ,  0.96810854],
       [10.36324125, 12.71659334, 12.77970894,  0.96775257],
       [10.34759433, 12.67391293, 12.75932332,  0.96802543],
       [10.3084755 , 12.64903314, 12.73397049,  0.96810231],
       [10.36590579, 12.66922857, 12.76524788,  0.96809254],
       [10.32250406, 12.65041227, 12.73724824,  0.96806253],
       [10.34138856, 12.66382882, 12.74483943,  0.96791368],
       [10.33608809, 12.64714317, 12.72208457,  0.96808706],
       [10.38443037, 12.7135082 , 12.7564205 ,  0.96770674],
       [10.36623186, 12.67446174, 12.74047479,  0.9678949 ],
       [10.35470672, 12.6861324 , 12.76281249,  0.96784102],
       [10.31290868, 12.62360549, 12.72020505,  0.9682553 ],
       [10.31166265, 12.64166848, 12.69890393,  0.96809927],
       [10.31476079, 12.624622  , 12.69033632,  0.96821124],
       [10.35172122, 12.75443723, 12.7797969 ,  0.96761403],
       [10.35432862, 12.65085233, 12.74174059,  0.96804758],
       [10.32825046, 12.

In [11]:
t1 = time()
n_estimators= [100, 300, 500]
max_depth = [3, 4, 5]
# n_estimators  = [100]
# max_depth = [3]
xgbs = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = XGBRegressor(n_estimators=i, max_depth=j)
            result = KFoldCV(model, data)
            xgbs[epoch,:] = np.mean(result, axis=0)
        print(np.mean(xgbs, axis=0))
#         [0.89059432 1.2367164  1.25472816 0.97802632]
print(time()-t1)

[10.37755486 12.71330239 12.80985509  0.96754499]
[10.51694379 12.95948716 13.11131204  0.96652675]
[10.84604918 13.46542799 13.53062149  0.96383114]
[10.5664884  13.01628766 13.09069323  0.9663278 ]
[10.69578739 13.22991429 13.28065605  0.96508923]
[11.08970062 13.75260138 13.89109194  0.96245264]
[10.53177464 12.90223252 13.02338487  0.96669768]
[10.96275043 13.52051245 13.61261729  0.96355415]
[11.06185763 13.68447422 13.8044166   0.96257601]
21.670836925506592


In [10]:
t1 = time()
n_estimators= [100, 300, 500]
max_depth = [3, 4, 5]
n_estimators  = [100]
max_depth = [3]
xgbs = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = XGBRegressor(n_estimators=i, max_depth=j)
            result = KFoldCV(model, data)
            xgbs[epoch,:] = np.mean(result, axis=0)
        print(np.mean(xgbs, axis=0))
#         [0.89059432 1.2367164  1.25472816 0.97802632]
print(time()-t1)

[10.35633515 12.75561159 12.83782279  0.96754887]
17.183526039123535


In [11]:
xgbs = np.array([[10.25779176, 12.69624405, 12.74631292,  0.96780137],
       [10.33098093, 12.72891122, 12.84286545,  0.9675998 ],
       [10.36792937, 12.81221264, 12.89186481,  0.96732048],
       [10.35353653, 12.77790998, 12.84935438,  0.96746365],
       [10.33350623, 12.69695194, 12.77123261,  0.967803  ],
       [10.36631676, 12.82057727, 12.84480066,  0.96724663],
       [10.41405458, 12.79921299, 12.91141796,  0.96731082],
       [10.3028325 , 12.67138604, 12.77527883,  0.9679224 ],
       [10.35096022, 12.8046974 , 12.85038317,  0.96732309],
       [10.30497141, 12.72820635, 12.80182272,  0.96772157],
       [10.35500245, 12.71951044, 12.79153477,  0.96774658],
       [10.27488227, 12.62898442, 12.75892269,  0.96819504],
       [10.33621259, 12.72781413, 12.79598891,  0.96766257],
       [10.40768904, 12.78751901, 12.86311637,  0.96738576],
       [10.43577154, 12.83481552, 12.90512511,  0.96719407],
       [10.32985087, 12.66490575, 12.79973768,  0.96791358],
       [10.41086992, 12.78904671, 12.88051065,  0.96738466],
       [10.4425563 , 12.84183029, 12.93265525,  0.96713218],
       [10.35687946, 12.73057554, 12.85819238,  0.96769736],
       [10.3941082 , 12.85092004, 12.88533856,  0.96715287]])

array([[10.25779176, 12.69624405, 12.74631292,  0.96780137],
       [10.33098093, 12.72891122, 12.84286545,  0.9675998 ],
       [10.36792937, 12.81221264, 12.89186481,  0.96732048],
       [10.35353653, 12.77790998, 12.84935438,  0.96746365],
       [10.33350623, 12.69695194, 12.77123261,  0.967803  ],
       [10.36631676, 12.82057727, 12.84480066,  0.96724663],
       [10.41405458, 12.79921299, 12.91141796,  0.96731082],
       [10.3028325 , 12.67138604, 12.77527883,  0.9679224 ],
       [10.35096022, 12.8046974 , 12.85038317,  0.96732309],
       [10.30497141, 12.72820635, 12.80182272,  0.96772157],
       [10.35500245, 12.71951044, 12.79153477,  0.96774658],
       [10.27488227, 12.62898442, 12.75892269,  0.96819504],
       [10.33621259, 12.72781413, 12.79598891,  0.96766257],
       [10.40768904, 12.78751901, 12.86311637,  0.96738576],
       [10.43577154, 12.83481552, 12.90512511,  0.96719407],
       [10.32985087, 12.66490575, 12.79973768,  0.96791358],
       [10.41086992, 12.

In [12]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
# n_estimators  = [500]
# max_depth = [3]
# epochs=20
cats = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False)
            result = KFoldCV(model, data)
            cats[epoch,:] = np.mean(result, axis=0)
#             print(np.mean(result, axis=0))
        print(np.mean(cats, axis=0))
print(time()-t1)

[10.25859267 12.69313053 12.71464426  0.96793142]
[10.35277855 12.7210552  12.81319492  0.96770929]
[10.49759008 13.03984558 13.06862095  0.96599241]
[10.30071329 12.65095582 12.74738968  0.96809921]
[10.43321546 12.86121314 12.89019639  0.96706516]
[10.49103615 12.94625618 13.00432286  0.96664666]
[10.30720243 12.70663231 12.76382598  0.96790672]
[10.47894874 12.87413593 12.90509605  0.96697704]
[10.54846694 13.09228738 13.1331273   0.96585025]
12.621881484985352


In [12]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
n_estimators  = [100]
max_depth = [3]
# epochs=20
cats = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False)
            result = KFoldCV(model, data)
            cats[epoch,:] = np.mean(result, axis=0)
#             print(np.mean(result, axis=0))
        print(np.mean(cats, axis=0))
print(time()-t1)

[10.25143786 12.63925939 12.71548354  0.96813783]
14.638161182403564


In [13]:
cats = np.array([[10.37768828, 12.75518778, 12.85268199,  0.96763925],
       [10.28629027, 12.70814963, 12.75741842,  0.96779405],
       [10.28846453, 12.61157366, 12.75612958,  0.96822424],
       [10.26291826, 12.67328712, 12.72697849,  0.96803176],
       [10.20737047, 12.61829216, 12.66953496,  0.96831695],
       [10.1946487 , 12.60367969, 12.64429181,  0.96831283],
       [10.28032235, 12.65562592, 12.72826828,  0.96810326],
       [10.1868521 , 12.59690015, 12.67531244,  0.96834741],
       [10.2636554 , 12.61870867, 12.72485244,  0.96826005],
       [10.22513427, 12.66802107, 12.70719096,  0.96802131],
       [10.25509684, 12.6357489 , 12.74546813,  0.9681497 ],
       [10.22525486, 12.58046481, 12.64321274,  0.96845513],
       [10.27841704, 12.70765268, 12.75463206,  0.96782623],
       [10.22109582, 12.63456768, 12.68046439,  0.96814172],
       [10.25238082, 12.61450912, 12.71370603,  0.96819329],
       [10.26975231, 12.62762262, 12.72613944,  0.96811967],
       [10.26143787, 12.6137239 , 12.74003693,  0.96816986],
       [10.18585822, 12.56416408, 12.61717576,  0.96854948],
       [10.24750641, 12.59980365, 12.71627781,  0.96820034],
       [10.25861246, 12.69750453, 12.72989807,  0.96790004]])

array([[10.37768828, 12.75518778, 12.85268199,  0.96763925],
       [10.28629027, 12.70814963, 12.75741842,  0.96779405],
       [10.28846453, 12.61157366, 12.75612958,  0.96822424],
       [10.26291826, 12.67328712, 12.72697849,  0.96803176],
       [10.20737047, 12.61829216, 12.66953496,  0.96831695],
       [10.1946487 , 12.60367969, 12.64429181,  0.96831283],
       [10.28032235, 12.65562592, 12.72826828,  0.96810326],
       [10.1868521 , 12.59690015, 12.67531244,  0.96834741],
       [10.2636554 , 12.61870867, 12.72485244,  0.96826005],
       [10.22513427, 12.66802107, 12.70719096,  0.96802131],
       [10.25509684, 12.6357489 , 12.74546813,  0.9681497 ],
       [10.22525486, 12.58046481, 12.64321274,  0.96845513],
       [10.27841704, 12.70765268, 12.75463206,  0.96782623],
       [10.22109582, 12.63456768, 12.68046439,  0.96814172],
       [10.25238082, 12.61450912, 12.71370603,  0.96819329],
       [10.26975231, 12.62762262, 12.72613944,  0.96811967],
       [10.26143787, 12.

# hierarchical CatBoost

In [14]:
from sklearn.model_selection import GridSearchCV


def deepCatBoostBackward(train, test,importances,ind,n_estimators, max_depth):
    s = 0
    inds = []
    inds2 = []
    inds3 = []
    for i in ind:
        s = s + importances[i]
        inds.append(i)
#     70% importance for the first layer
        if (s > 0.9):
            model = gridSearch4CatBoost(train[:,inds], train[:,-1],n_estimators, max_depth)
            predY = model.fit(train[:,inds], train[:,-1]).predict(train[:,inds])
            predY2 = model.predict(test[:,inds])
        else:
            inds2.append(i)
#     20% importance for the second layer
            if (s > 0.95):
                tempTrain = np.array([train[:,inds2],predY])
                tempTest = np.array([test[:,inds2],predY2])
                model = gridSearch4CatBoost(tempTrain, train[:,-1],n_estimators, max_depth)
                predYY = model.fit(tempTrain, train[:,-1]).predict(tempTrain)
                predYY2 = model.predict(tempTest)
            else:
                inds3.append(i)
#     10% importance for the third layer
                if (s == 1):
                    tempTrain = np.array([train[:,inds3],predY,predYY])
                    tempTest = np.array([test[:,inds3],predY2,predYY2])
                    model = gridSearch4CatBoost(tempTrain, train[:,-1],n_estimators, max_depth)
#                     predY = model.fit(tempTrain, train[:,-1]).predict(tempTrain)
                    predY2 = model.predict(tempTest)
    return predY2

def deepCatBoostForward(train, test,importances,ind,n_estimators, max_depth):
#     ind = ind[::-1]
    s = 0
    inds = []
    inds2 = []
    inds3 = []
    for i in ind:
        s = s + importances[i]
        inds.append(i)
#     10% importance for the first layer
        if (s > 0.05):
            model = gridSearch4CatBoost(train[:,inds], train[:,-1],n_estimators, max_depth)
            predY = model.fit(train[:,inds], train[:,-1]).predict(train[:,inds])
            predY2 = model.predict(test[:,inds])
        else:
            inds2.append(i)
#     20% importance for the second layer
            if (s > 0.1):
                tempTrain = np.array([train[:,inds2],predY])
                tempTest = np.array([test[:,inds2],predY2])
                model = gridSearch4CatBoost(tempTrain, train[:,-1],n_estimators, max_depth)
                predYY = model.fit(tempTrain, train[:,-1]).predict(tempTrain)
                predYY2 = model.predict(tempTest)
            else:
                inds3.append(i)
#     10% importance for the third layer
                if (s == 1):
                    tempTrain = np.array([train[:,inds3],predY,predYY])
                    tempTest = np.array([test[:,inds3],predY2,predYY2])
                    model = gridSearch4CatBoost(tempTrain, train[:,-1],n_estimators, max_depth)
#                     predY = model.fit(tempTrain, train[:,-1]).predict(tempTrain)
                    predY2 = model.predict(tempTest)
    return predY2

def gridSearch4CatBoost(X,y,n_estimators, max_depth):
#     cbc = CatBoostRegressor(verbose=False,task_type="GPU",devices='0:1')
# #     cbc = RandomForestRegressor()
#     grid = {'max_depth': [3,4,5],'n_estimators':[100,300,500]}
#     gscv = GridSearchCV (estimator = cbc, param_grid = grid, cv = 10)
#     gscv.fit(X,y)
#     return gscv.best_estimator_
    cbc = CatBoostRegressor(n_estimators=n_estimators, max_depth=max_depth,verbose=False,task_type="GPU",devices='0:1').fit(X,y)
    return cbc
#     return model.fit(X,y).predict(testX)

def KFoldForward(data, ind,n_estimators, max_depth, n_fold=10):
#     num = len(data)
    diff = int(len(data)/n_fold)
    results = np.zeros((n_fold, 4))
    for i in range(n_fold):
        begin = diff*i
        end = diff*(i+1)
#         if i == n_fold-1:
#             end = -1
        test = data[begin:end]
        train = deepcopy(data)
        train = np.delete(train, range(begin, end),axis=0)
        X_train, y_train = train[:,:-1], train[:,-1]
        X_test, y = test[:,:-1], test[:,-1]
#         predictY = model.fit(X_train, y_train).predict(X_test)
        predictY = deepCatBoostForward(train, test,importances,ind,n_estimators, max_depth)
        mae = np.mean(abs((y-predictY)))
        stdErr = np.std(((y-predictY)))
        error=sum((y-predictY)**2)
        RMSE=np.sqrt(error/len(y))
        MAC = np.dot(y,predictY)**2/(np.dot(y, y)*np.dot(predictY, predictY))
#         print(mae, stdErr, RMSE, MAC)
        results[i,:] = [mae, stdErr, RMSE, MAC]
    return results

def KFoldBackward(data, ind,n_estimators, max_depth, n_fold=10):
#     num = len(data)
    diff = int(len(data)/n_fold)
    results = np.zeros((n_fold, 4))
    for i in range(n_fold):
        begin = diff*i
        end = diff*(i+1)
#         if i == n_fold-1:
#             end = -1
        test = data[begin:end]
        train = deepcopy(data)
        train = np.delete(train, range(begin, end),axis=0)
        X_train, y_train = train[:,:-1], train[:,-1]
        X_test, y = test[:,:-1], test[:,-1]
#         predictY = model.fit(X_train, y_train).predict(X_test)
        predictY = deepCatBoostBackward(train, test,importances,ind,n_estimators, max_depth)
        mae = np.mean(abs((y-predictY)))
        stdErr = np.std(((y-predictY)))
        error=sum((y-predictY)**2)
        RMSE=np.sqrt(error/len(y))
        MAC = np.dot(y,predictY)**2/(np.dot(y, y)*np.dot(predictY, predictY))
#         print(mae, stdErr, RMSE, MAC)
        results[i,:] = [mae, stdErr, RMSE, MAC]
    return results

In [20]:
t1 = time()
model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])
print((time()-t1)*8)

0.9647693634033203


In [None]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
# n_estimators  = [500]
# max_depth = [5]
epochs=1
cats = np.zeros((epochs,4))
model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])
importances = model.feature_importances_
ind = np.argsort(importances)
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
#             model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])
# #             model = CatBoostRegressor(n_estimators=300, max_depth=3,verbose=False,task_type="GPU",devices='0:1').fit(data[:,:-1], data[:,-1])#n_estimators=i, max_depth=j,
#             importances = model.feature_importances_
#             ind = np.argsort(importances)
        #     model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False,task_type="GPU",devices='0:1')
            result = KFoldForward(data, ind,n_estimators=i, max_depth=j)
            cats[epoch,:] = np.mean(result, axis=0)
        print(np.mean(cats, axis=0))
print(time()-t1)

In [18]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
# n_estimators  = [500]
# max_depth = [5]
epochs=1
cats = np.zeros((epochs,4))
model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])
importances = model.feature_importances_
ind = np.argsort(importances)
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
#             model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])
# #             model = CatBoostRegressor(n_estimators=300, max_depth=3,verbose=False,task_type="GPU",devices='0:1').fit(data[:,:-1], data[:,-1])#n_estimators=i, max_depth=j,
#             importances = model.feature_importances_
#             ind = np.argsort(importances)
        #     model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False,task_type="GPU",devices='0:1')
            result = KFoldForward(data, ind,n_estimators=i, max_depth=j)
            cats[epoch,:] = np.mean(result, axis=0)
        print(np.mean(cats, axis=0))
print(time()-t1)

[10.14728343 12.54247857 12.57556117  0.96861837]
[10.26718204 12.6177972  12.69336673  0.96823571]
[10.46918917 12.80974676 12.93744636  0.96721618]
[10.19210737 12.55887986 12.60713841  0.96855284]
[10.22578583 12.62205091 12.66954586  0.96816748]
[10.45136444 12.81745249 12.96188407  0.96707092]
[10.01436665 12.30248227 12.3938405   0.96980193]
[10.18529858 12.55766728 12.61871678  0.9685615 ]
[10.03746159 12.46854883 12.49820786  0.96908512]
1451.6139676570892


In [15]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
n_estimators  = [500]
max_depth = [4]
epochs=20
cats = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])#n_estimators=500, max_depth=5
#             model = CatBoostRegressor(n_estimators=300, max_depth=3,verbose=False,task_type="GPU",devices='0:1').fit(data[:,:-1], data[:,-1])#n_estimators=i, max_depth=j,
            importances = model.feature_importances_
            ind = np.argsort(importances)
        #     model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False,task_type="GPU",devices='0:1')
            result = KFoldForward(data, ind,n_estimators=i, max_depth=j)
            cats[epoch,:] = np.mean(result, axis=0)
        print(np.mean(cats, axis=0))
print(time()-t1)



[10.10815258 12.45726782 12.52046062  0.96907876]
16764.305967092514


In [21]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
n_estimators  = [500]
max_depth = [3]
epochs=20
cats = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])#n_estimators=500, max_depth=5
#             model = CatBoostRegressor(n_estimators=300, max_depth=3,verbose=False,task_type="GPU",devices='0:1').fit(data[:,:-1], data[:,-1])#n_estimators=i, max_depth=j,
            importances = model.feature_importances_
            ind = np.argsort(importances)
        #     model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False,task_type="GPU",devices='0:1')
            result = KFoldForward(data, ind,n_estimators=i, max_depth=j)
            cats[epoch,:] = np.mean(result, axis=0)
        print(np.mean(cats, axis=0))
print(time()-t1)





[10.08124643 12.41915303 12.48572991  0.96922269]
17013.10431241989


In [None]:
catsf = np.array([[10.09878837, 12.41553323, 12.49560124,  0.969262  ],
       [10.11962392, 12.41233262, 12.49401796,  0.96907576],
       [10.06275084, 12.39462215, 12.43594042,  0.96919346],
       [10.10046857, 12.38029852, 12.49201242,  0.96949448],
       [10.08940891, 12.44357236, 12.51259939,  0.96920998],
       [10.07858261, 12.42263207, 12.51927705,  0.96922953],
       [10.06361071, 12.37677864, 12.47582953,  0.96953568],
       [10.11230769, 12.39720151, 12.53181201,  0.9692203 ],
       [10.01163555, 12.36056672, 12.41258104,  0.96957136],
       [10.07121168, 12.4134961 , 12.4884813 ,  0.96924027],
       [10.09956638, 12.45274712, 12.48578031,  0.96898993],
       [10.06041186, 12.40917716, 12.48302379,  0.9692543 ],
       [10.09565417, 12.49193886, 12.51689932,  0.96897822],
       [10.01535136, 12.36551016, 12.44865885,  0.96952809],
       [10.07152558, 12.4178285 , 12.48414386,  0.96916646],
       [10.07556061, 12.40682223, 12.48808095,  0.96937213],
       [10.12493774, 12.50849555, 12.52719512,  0.9687676 ],
       [10.02914908, 12.41435847, 12.44337056,  0.96931097],
       [10.11527019, 12.41500563, 12.46781261,  0.96925335],
       [10.12911278, 12.48414292, 12.51148055,  0.96879996]])

In [23]:
cats

array([[10.09878837, 12.41553323, 12.49560124,  0.969262  ],
       [10.11962392, 12.41233262, 12.49401796,  0.96907576],
       [10.06275084, 12.39462215, 12.43594042,  0.96919346],
       [10.10046857, 12.38029852, 12.49201242,  0.96949448],
       [10.08940891, 12.44357236, 12.51259939,  0.96920998],
       [10.07858261, 12.42263207, 12.51927705,  0.96922953],
       [10.06361071, 12.37677864, 12.47582953,  0.96953568],
       [10.11230769, 12.39720151, 12.53181201,  0.9692203 ],
       [10.01163555, 12.36056672, 12.41258104,  0.96957136],
       [10.07121168, 12.4134961 , 12.4884813 ,  0.96924027],
       [10.09956638, 12.45274712, 12.48578031,  0.96898993],
       [10.06041186, 12.40917716, 12.48302379,  0.9692543 ],
       [10.09565417, 12.49193886, 12.51689932,  0.96897822],
       [10.01535136, 12.36551016, 12.44865885,  0.96952809],
       [10.07152558, 12.4178285 , 12.48414386,  0.96916646],
       [10.07556061, 12.40682223, 12.48808095,  0.96937213],
       [10.12493774, 12.

In [13]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
# n_estimators  = [500]
# max_depth = [3]
# epochs=20
cats = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = RandomForestRegressor(n_estimators=100, max_depth=3).fit(data[:,:-1], data[:,-1])
#             model = CatBoostRegressor(n_estimators=300, max_depth=3,verbose=False,task_type="GPU",devices='0:1').fit(data[:,:-1], data[:,-1])#n_estimators=i, max_depth=j,
            importances = model.feature_importances_
            ind = np.argsort(importances)
        #     model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False,task_type="GPU",devices='0:1')
            result = KFoldBackward(data, ind[::-1],n_estimators=i, max_depth=j)
            cats[epoch,:] = np.mean(result, axis=0)
        print(np.mean(cats, axis=0))
print(time()-t1)

[10.36614959 12.69420403 12.79676066  0.96565374]
[10.47310438 12.91071287 12.9638704   0.96450822]
[10.62711653 13.06638053 13.08613655  0.96356736]
[10.42653544 12.74791228 12.78826384  0.9652562 ]
[10.62697537 13.02775326 13.09210136  0.9638165 ]
[10.70271048 13.04682205 13.17844813  0.96358623]
[10.35925177 12.69167011 12.73485574  0.9656374 ]
[10.2762919  12.55890107 12.63178806  0.96627053]
[10.28438036 12.6139164  12.69082185  0.9660747 ]
756.545571565628


In [9]:
mlps = np.array([[10.33844826, 12.54313348, 12.7262093 ,  0.96862652],
       [10.32582301, 12.65637348, 12.70912657,  0.96804509],
       [10.37354187, 12.66155571, 12.76277178,  0.96806129],
       [10.31935821, 12.66241199, 12.69265866,  0.96798035],
       [10.31813866, 12.65630203, 12.71218706,  0.96809453],
       [10.30843906, 12.61516826, 12.67704782,  0.96826888],
       [10.33989355, 12.62697726, 12.7312375 ,  0.96824818],
       [10.32086251, 12.65109107, 12.71694892,  0.9681219 ],
       [10.35721493, 12.6673005 , 12.71473986,  0.96798332],
       [10.33973879, 12.61913049, 12.69705027,  0.96826056],
       [10.35732446, 12.64910193, 12.72302641,  0.96816994],
       [10.35598136, 12.68469268, 12.73404263,  0.96794306],
       [10.3657521 , 12.71902567, 12.7772913 ,  0.96772724],
       [10.33320945, 12.66198054, 12.73265444,  0.96809913],
       [10.36052839, 12.66301649, 12.74723794,  0.96805692],
       [10.3123423 , 12.63285127, 12.6896689 ,  0.9681747 ],
       [10.36518771, 12.65933583, 12.72378723,  0.96793156],
       [10.3192333 , 12.62615195, 12.71158864,  0.96831801],
       [10.32329558, 12.69390125, 12.70985159,  0.96790072],
       [10.33209071, 12.62427159, 12.68580677,  0.96809862]])
svms = np.array([[10.31502399, 12.60965195, 12.70234336,  0.96809039],
       [10.29528714, 12.60122263, 12.68653493,  0.96816691],
       [10.33741548, 12.61042373, 12.70851649,  0.96816076],
       [10.35669184, 12.6830911 , 12.73194972,  0.96801522],
       [10.34874295, 12.54725052, 12.71256792,  0.96853742],
       [10.30265705, 12.62792342, 12.67876883,  0.96814528],
       [10.31309369, 12.60471122, 12.68130234,  0.96827801],
       [10.29526308, 12.60315796, 12.64913135,  0.96821252],
       [10.34298771, 12.5951941 , 12.71161038,  0.96829797],
       [10.29506639, 12.64356901, 12.66682028,  0.96815279],
       [10.30335878, 12.58787403, 12.685352  ,  0.96844832],
       [10.3244898 , 12.54553981, 12.71662034,  0.96857975],
       [10.36272844, 12.71695656, 12.73053412,  0.96780353],
       [10.32824891, 12.67242857, 12.70603662,  0.96801742],
       [10.29422907, 12.55920334, 12.67837294,  0.96849163],
       [10.33788268, 12.63522053, 12.70225725,  0.96817008],
       [10.31079704, 12.62893514, 12.69035966,  0.96816154],
       [10.29870255, 12.62271341, 12.66500276,  0.96812105],
       [10.33950805, 12.6297841 , 12.71761838,  0.96818278],
       [10.32698621, 12.65179038, 12.70595414,  0.96817392]])
rfs = np.array([[10.32256625, 12.61982707, 12.7421031 ,  0.96810854],
       [10.36324125, 12.71659334, 12.77970894,  0.96775257],
       [10.34759433, 12.67391293, 12.75932332,  0.96802543],
       [10.3084755 , 12.64903314, 12.73397049,  0.96810231],
       [10.36590579, 12.66922857, 12.76524788,  0.96809254],
       [10.32250406, 12.65041227, 12.73724824,  0.96806253],
       [10.34138856, 12.66382882, 12.74483943,  0.96791368],
       [10.33608809, 12.64714317, 12.72208457,  0.96808706],
       [10.38443037, 12.7135082 , 12.7564205 ,  0.96770674],
       [10.36623186, 12.67446174, 12.74047479,  0.9678949 ],
       [10.35470672, 12.6861324 , 12.76281249,  0.96784102],
       [10.31290868, 12.62360549, 12.72020505,  0.9682553 ],
       [10.31166265, 12.64166848, 12.69890393,  0.96809927],
       [10.31476079, 12.624622  , 12.69033632,  0.96821124],
       [10.35172122, 12.75443723, 12.7797969 ,  0.96761403],
       [10.35432862, 12.65085233, 12.74174059,  0.96804758],
       [10.32825046, 12.70522531, 12.74304665,  0.96778647],
       [10.34527801, 12.64221575, 12.74668749,  0.96817599],
       [10.30958474, 12.62320986, 12.6875007 ,  0.96811914],
       [10.3560508 , 12.72225032, 12.7918134 ,  0.96768584]])
xgbs = np.array([[10.25779176, 12.69624405, 12.74631292,  0.96780137],
       [10.33098093, 12.72891122, 12.84286545,  0.9675998 ],
       [10.36792937, 12.81221264, 12.89186481,  0.96732048],
       [10.35353653, 12.77790998, 12.84935438,  0.96746365],
       [10.33350623, 12.69695194, 12.77123261,  0.967803  ],
       [10.36631676, 12.82057727, 12.84480066,  0.96724663],
       [10.41405458, 12.79921299, 12.91141796,  0.96731082],
       [10.3028325 , 12.67138604, 12.77527883,  0.9679224 ],
       [10.35096022, 12.8046974 , 12.85038317,  0.96732309],
       [10.30497141, 12.72820635, 12.80182272,  0.96772157],
       [10.35500245, 12.71951044, 12.79153477,  0.96774658],
       [10.27488227, 12.62898442, 12.75892269,  0.96819504],
       [10.33621259, 12.72781413, 12.79598891,  0.96766257],
       [10.40768904, 12.78751901, 12.86311637,  0.96738576],
       [10.43577154, 12.83481552, 12.90512511,  0.96719407],
       [10.32985087, 12.66490575, 12.79973768,  0.96791358],
       [10.41086992, 12.78904671, 12.88051065,  0.96738466],
       [10.4425563 , 12.84183029, 12.93265525,  0.96713218],
       [10.35687946, 12.73057554, 12.85819238,  0.96769736],
       [10.3941082 , 12.85092004, 12.88533856,  0.96715287]])
cats = np.array([[10.37768828, 12.75518778, 12.85268199,  0.96763925],
       [10.28629027, 12.70814963, 12.75741842,  0.96779405],
       [10.28846453, 12.61157366, 12.75612958,  0.96822424],
       [10.26291826, 12.67328712, 12.72697849,  0.96803176],
       [10.20737047, 12.61829216, 12.66953496,  0.96831695],
       [10.1946487 , 12.60367969, 12.64429181,  0.96831283],
       [10.28032235, 12.65562592, 12.72826828,  0.96810326],
       [10.1868521 , 12.59690015, 12.67531244,  0.96834741],
       [10.2636554 , 12.61870867, 12.72485244,  0.96826005],
       [10.22513427, 12.66802107, 12.70719096,  0.96802131],
       [10.25509684, 12.6357489 , 12.74546813,  0.9681497 ],
       [10.22525486, 12.58046481, 12.64321274,  0.96845513],
       [10.27841704, 12.70765268, 12.75463206,  0.96782623],
       [10.22109582, 12.63456768, 12.68046439,  0.96814172],
       [10.25238082, 12.61450912, 12.71370603,  0.96819329],
       [10.26975231, 12.62762262, 12.72613944,  0.96811967],
       [10.26143787, 12.6137239 , 12.74003693,  0.96816986],
       [10.18585822, 12.56416408, 12.61717576,  0.96854948],
       [10.24750641, 12.59980365, 12.71627781,  0.96820034],
       [10.25861246, 12.69750453, 12.72989807,  0.96790004]])
catsf = np.array([[10.09878837, 12.41553323, 12.49560124,  0.969262  ],
       [10.11962392, 12.41233262, 12.49401796,  0.96907576],
       [10.06275084, 12.39462215, 12.43594042,  0.96919346],
       [10.10046857, 12.38029852, 12.49201242,  0.96949448],
       [10.08940891, 12.44357236, 12.51259939,  0.96920998],
       [10.07858261, 12.42263207, 12.51927705,  0.96922953],
       [10.06361071, 12.37677864, 12.47582953,  0.96953568],
       [10.11230769, 12.39720151, 12.53181201,  0.9692203 ],
       [10.01163555, 12.36056672, 12.41258104,  0.96957136],
       [10.07121168, 12.4134961 , 12.4884813 ,  0.96924027],
       [10.09956638, 12.45274712, 12.48578031,  0.96898993],
       [10.06041186, 12.40917716, 12.48302379,  0.9692543 ],
       [10.09565417, 12.49193886, 12.51689932,  0.96897822],
       [10.01535136, 12.36551016, 12.44865885,  0.96952809],
       [10.07152558, 12.4178285 , 12.48414386,  0.96916646],
       [10.07556061, 12.40682223, 12.48808095,  0.96937213],
       [10.01535136, 12.36551016, 12.44865885,  0.96952809],
       [10.02914908, 12.41435847, 12.44337056,  0.96931097],
       [10.11527019, 12.41500563, 12.46781261,  0.96925335],
       [10.01163555, 12.36056672, 12.41258104,  0.96957136]])

In [10]:
print(mlps.std(axis=0))
print(svms.std(axis=0))
print(rfs.std(axis=0))
print(xgbs.std(axis=0))
print(cats.std(axis=0))
print(catsf.std(axis=0))

[0.01961281 0.03505128 0.02446033 0.00018403]
[0.02158895 0.04206855 0.02167493 0.00018338]
[0.02194707 0.03714127 0.02766001 0.00018514]
[0.04924589 0.06310574 0.05335895 0.00029097]
[0.04290789 0.04758377 0.05047421 0.00021841]
[0.03515395 0.03230777 0.03296911 0.00018136]


In [11]:
print(np.mean(catsf, axis=0))

[10.06989325 12.40582495 12.47685812  0.96929929]


In [12]:
def getTestResult(f1,f2,i):
    print(ranksums(np.asarray(f1[:,i]), np.asarray(f2[:,i])).pvalue)

i = 0
print(getTestResult(mlps, catsf,i))
print(getTestResult(svms, catsf,i))
print(getTestResult(rfs, catsf,i))
print(getTestResult(xgbs, catsf,i))
print(getTestResult(cats, catsf,i))

i = 2
print(getTestResult(mlps, catsf,i))
print(getTestResult(svms, catsf,i))
print(getTestResult(rfs, catsf,i))
print(getTestResult(xgbs, catsf,i))
print(getTestResult(cats, catsf,i))

6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
