https://archive.ics.uci.edu/ml/datasets/student+performance

In [1]:
import numpy as np
import pandas as pd
from copy import deepcopy
from sklearn.utils import shuffle
from scipy.stats import ranksums
from catboost import CatBoostRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor,BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
# from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder,OrdinalEncoder
import warnings
from math import log2
from time import time

def KFoldCV(model, data, n_fold=10):
#     num = len(data)
    diff = int(len(data)/n_fold)
    results = np.zeros((n_fold, 4))
    for i in range(n_fold):
        begin = diff*i
        end = diff*(i+1)
#         if i == n_fold-1:
#             end = -1
        test = data[begin:end]
        train = deepcopy(data)
        train = np.delete(train, range(begin, end),axis=0)
        X_train, y_train = train[:,:-1], train[:,-1]
        X_test, y = test[:,:-1], test[:,-1]
        predictY = model.fit(X_train, y_train).predict(X_test)
        mae = np.mean(abs((y-predictY)))
        stdErr = np.std(((y-predictY)))
        error=sum((y-predictY)**2)
        RMSE=np.sqrt(error/len(y))
        MAC = np.dot(y,predictY)**2/(np.dot(y, y)*np.dot(predictY, predictY))
#         print(mae, stdErr, RMSE, MAC)
        results[i,:] = [mae, stdErr, RMSE, MAC]
    return results

warnings.filterwarnings('ignore')

# data = pd.read_csv('./student-mat.csv', sep=';')
# por = pd.read_csv('./student-por.csv', sep=';')
# combine = pd.merge(math, por, on=["school","sex","age","address","famsize","Pstatus","Medu","Fedu","Mjob","Fjob","reason","nursery","internet"])
# data = data.values
# data = shuffle(data)

In [2]:
data=pd.read_csv('./exams.csv')
data['writing_score']=data['writing score']
data.drop(columns=['math score'],inplace=True)
data.drop(columns=['reading score'],inplace=True)
data.drop(columns=['writing score'],inplace=True)

cats = ['gender', 'race/ethnicity', 'parental level of education', 'lunch',
       'test preparation course']
oe = OrdinalEncoder()
for c in cats:
    oe.fit(data[c].values.reshape(-1,1))
    data[c] = np.asarray(oe.transform(data[c].values.reshape(-1,1)),dtype=int)
#     np.asarray(data['gender'],dtype=int)
data = data.values
data = shuffle(data)

In [3]:
# data.shape (395, 33)
# np.savetxt('math.txt',data)
data.shape

(1000, 6)

In [7]:
t1 = time()
hidden_layer_sizes  = [100, 300, 500]
max_iter = [500, 1000,1500]
# hidden_layer_sizes  = [500]
# max_iter = [1000]
epochs=1
mlps = np.zeros((epochs,4))
for i in hidden_layer_sizes:
    for j in max_iter:
        for epoch in range(epochs):
            data = shuffle(data)
            model = MLPRegressor(hidden_layer_sizes=i, max_iter=j)
            result = KFoldCV(model, data)
            mlps[epoch,:] = np.mean(result, axis=0)
#             print(np.mean(result, axis=0))
        print(np.mean(mlps, axis=0))
#     [0.96503638 1.28823634 1.30661604 0.97660302]
print(time()-t1)

[10.46674332 12.67748182 12.75028512  0.96730099]
[10.26630024 12.50294124 12.54406165  0.96829227]
[10.22532893 12.43550828 12.51408295  0.96858856]
[10.25379238 12.4513434  12.5483246   0.96861692]
[10.3071357  12.33315764 12.58051905  0.96881061]
[10.27273724 12.42879647 12.55490995  0.96854997]
[10.2352059  12.47832985 12.51843867  0.96835682]
[10.28202781 12.50818582 12.58490825  0.96820731]
[10.24722718 12.35639215 12.47803149  0.9684611 ]
216.37790489196777


In [4]:
t1 = time()
hidden_layer_sizes  = [100, 300, 500]
max_iter = [500, 1000,1500]
hidden_layer_sizes  = [500]
max_iter = [1500]
epochs=20
mlps = np.zeros((epochs,4))
for i in hidden_layer_sizes:
    for j in max_iter:
        for epoch in range(epochs):
            data = shuffle(data)
            model = MLPRegressor(hidden_layer_sizes=i, max_iter=j)
            result = KFoldCV(model, data)
            mlps[epoch,:] = np.mean(result, axis=0)
#             print(np.mean(result, axis=0))
        print(np.mean(mlps, axis=0))
#     [0.96503638 1.28823634 1.30661604 0.97660302]
print(time()-t1)

[10.24708941 12.4583472  12.52814562  0.96846481]
613.320728302002


In [5]:
mlps = np.array([[10.24702922, 12.4247865 , 12.51196346,  0.96868107],
       [10.23508496, 12.48977467, 12.54664674,  0.96837363],
       [10.27581518, 12.51125036, 12.56710662,  0.96815556],
       [10.20434426, 12.46428498, 12.49276779,  0.96844304],
       [10.21449042, 12.45770235, 12.50272909,  0.96849756],
       [10.25645146, 12.46623805, 12.5393194 ,  0.9685545 ],
       [10.25293692, 12.43832785, 12.53255566,  0.96856773],
       [10.24015863, 12.436593  , 12.52160923,  0.96859482],
       [10.2480341 , 12.470938  , 12.53053366,  0.96842864],
       [10.23634332, 12.40500003, 12.52486652,  0.96858968],
       [10.25203841, 12.4111212 , 12.51169896,  0.96865791],
       [10.2657211 , 12.48702409, 12.52070581,  0.96825708],
       [10.2297428 , 12.46844311, 12.50788628,  0.96838222],
       [10.27715139, 12.53746928, 12.5668944 ,  0.96816509],
       [10.23211151, 12.48228443, 12.51443667,  0.96832019],
       [10.26042647, 12.36532915, 12.54515646,  0.96866462],
       [10.26542217, 12.45723524, 12.53552954,  0.96854895],
       [10.26909359, 12.47013094, 12.55662755,  0.96848665],
       [10.24235693, 12.44073885, 12.52255106,  0.96863832],
       [10.2370353 , 12.48227198, 12.51132745,  0.96828901]])

array([[10.24702922, 12.4247865 , 12.51196346,  0.96868107],
       [10.23508496, 12.48977467, 12.54664674,  0.96837363],
       [10.27581518, 12.51125036, 12.56710662,  0.96815556],
       [10.20434426, 12.46428498, 12.49276779,  0.96844304],
       [10.21449042, 12.45770235, 12.50272909,  0.96849756],
       [10.25645146, 12.46623805, 12.5393194 ,  0.9685545 ],
       [10.25293692, 12.43832785, 12.53255566,  0.96856773],
       [10.24015863, 12.436593  , 12.52160923,  0.96859482],
       [10.2480341 , 12.470938  , 12.53053366,  0.96842864],
       [10.23634332, 12.40500003, 12.52486652,  0.96858968],
       [10.25203841, 12.4111212 , 12.51169896,  0.96865791],
       [10.2657211 , 12.48702409, 12.52070581,  0.96825708],
       [10.2297428 , 12.46844311, 12.50788628,  0.96838222],
       [10.27715139, 12.53746928, 12.5668944 ,  0.96816509],
       [10.23211151, 12.48228443, 12.51443667,  0.96832019],
       [10.26042647, 12.36532915, 12.54515646,  0.96866462],
       [10.26542217, 12.

In [8]:
t1 = time()
Cs = [10, 100, 1000]
gammas = [0.001, 0.01, 0.1]
# Cs = [1000]
# gammas = [0.001]
svms = np.zeros((epochs,4))
for i in range(len(Cs)):
    for j in range(len(gammas)):
        lc = len(Cs)
        lg = len(gammas)
        for epoch in range(epochs):
            data = shuffle(data)
            model = SVR(kernel = 'rbf', C = Cs[i], gamma = gammas[j])
            result = KFoldCV(model, data)
            svms[epoch,:] = np.mean(result, axis=0)
        print(np.mean(svms, axis=0))
#         [0.90084585 1.28205627 1.30138029 0.97616123]
print(time()-t1)

[10.95584869 13.56224512 13.70816076  0.9624667 ]
[10.31220197 12.69834486 12.7640979   0.9671862 ]
[10.20807857 12.5389712  12.57997339  0.96815519]
[10.24390014 12.66017076 12.6767062   0.96739672]
[10.2359687  12.44841941 12.51913439  0.96845208]
[10.37558956 12.68949146 12.72394532  0.96737747]
[10.1731565  12.38540029 12.51920731  0.96879517]
[10.25492607 12.52844134 12.55345872  0.96819867]
[10.43058336 12.74981034 12.87608382  0.96699848]
6.062178373336792


In [6]:
t1 = time()
Cs = [10, 100, 1000]
gammas = [0.001, 0.01, 0.1]
Cs = [100]
gammas = [0.01]
svms = np.zeros((epochs,4))
for i in range(len(Cs)):
    for j in range(len(gammas)):
        lc = len(Cs)
        lg = len(gammas)
        for epoch in range(epochs):
            data = shuffle(data)
            model = SVR(kernel = 'rbf', C = Cs[i], gamma = gammas[j])
            result = KFoldCV(model, data)
            svms[epoch,:] = np.mean(result, axis=0)
        print(np.mean(svms, axis=0))
#         [0.90084585 1.28205627 1.30138029 0.97616123]
print(time()-t1)

[10.25072662 12.48540406 12.5551505   0.96833906]
9.745406150817871


In [7]:
svms = np.array([[10.24576414, 12.49357116, 12.55514368,  0.96833862],
       [10.2491687 , 12.47751752, 12.55823731,  0.96835725],
       [10.25846759, 12.52283016, 12.5691685 ,  0.96817818],
       [10.2676875 , 12.48046173, 12.58049292,  0.96834835],
       [10.24327115, 12.45105008, 12.54003333,  0.96848824],
       [10.22403406, 12.47737767, 12.54654033,  0.9683862 ],
       [10.25691613, 12.50592963, 12.57059028,  0.9681622 ],
       [10.21143418, 12.47669542, 12.50463541,  0.96841276],
       [10.28658116, 12.46853705, 12.58800699,  0.96837609],
       [10.26027499, 12.54023721, 12.55535575,  0.96813417],
       [10.25265996, 12.45951932, 12.56551005,  0.96837222],
       [10.21771729, 12.48513817, 12.53065884,  0.96844134],
       [10.28842344, 12.44818886, 12.57823585,  0.96853768],
       [10.25590049, 12.52741537, 12.54361508,  0.96818803],
       [10.22292424, 12.45289984, 12.53401607,  0.96845975],
       [10.26644024, 12.49279487, 12.55935877,  0.96831544],
       [10.25788975, 12.46979611, 12.55379284,  0.96846172],
       [10.25631993, 12.46823251, 12.55237473,  0.96842887],
       [10.2587265 , 12.50359809, 12.58542461,  0.96822413],
       [10.23393093, 12.50629033, 12.53181869,  0.96816989]])

array([[10.24576414, 12.49357116, 12.55514368,  0.96833862],
       [10.2491687 , 12.47751752, 12.55823731,  0.96835725],
       [10.25846759, 12.52283016, 12.5691685 ,  0.96817818],
       [10.2676875 , 12.48046173, 12.58049292,  0.96834835],
       [10.24327115, 12.45105008, 12.54003333,  0.96848824],
       [10.22403406, 12.47737767, 12.54654033,  0.9683862 ],
       [10.25691613, 12.50592963, 12.57059028,  0.9681622 ],
       [10.21143418, 12.47669542, 12.50463541,  0.96841276],
       [10.28658116, 12.46853705, 12.58800699,  0.96837609],
       [10.26027499, 12.54023721, 12.55535575,  0.96813417],
       [10.25265996, 12.45951932, 12.56551005,  0.96837222],
       [10.21771729, 12.48513817, 12.53065884,  0.96844134],
       [10.28842344, 12.44818886, 12.57823585,  0.96853768],
       [10.25590049, 12.52741537, 12.54361508,  0.96818803],
       [10.22292424, 12.45289984, 12.53401607,  0.96845975],
       [10.26644024, 12.49279487, 12.55935877,  0.96831544],
       [10.25788975, 12.

In [9]:
t1 = time()
n_estimators  = [100, 300, 500]
max_depth = [3, 4, 5]
# n_estimators  = [500]
# max_depth = [4]
rfs = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = RandomForestRegressor(n_estimators=i, max_depth=j)
            result = KFoldCV(model, data)
            rfs[epoch,:] = np.mean(result, axis=0)
        print(np.mean(rfs, axis=0))
#         [0.82798772 1.18901865 1.20639261 0.97973146]
print(time()-t1)

[10.39582023 12.79976995 12.83814857  0.96678902]
[10.27098497 12.52570622 12.59539342  0.96805658]
[10.25075856 12.46716246 12.55912967  0.96843136]
[10.37539107 12.77348649 12.79440202  0.96683434]
[10.29567634 12.59382431 12.65238367  0.96790661]
[10.31212059 12.54383304 12.6225909   0.96814901]
[10.38215911 12.7664999  12.80878175  0.96685994]
[10.22898832 12.49147614 12.57463535  0.96828482]
[10.2623195  12.4629227  12.58350419  0.96831093]
31.558969974517822


In [8]:
t1 = time()
n_estimators  = [100, 300, 500]
max_depth = [3, 4, 5]
n_estimators  = [100]
max_depth = [5]
rfs = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = RandomForestRegressor(n_estimators=i, max_depth=j)
            result = KFoldCV(model, data)
            rfs[epoch,:] = np.mean(result, axis=0)
        print(np.mean(rfs, axis=0))
#         [0.82798772 1.18901865 1.20639261 0.97973146]
print(time()-t1)

[10.29826761 12.55181773 12.62296383  0.96798247]
19.781859397888184


In [9]:
rfs = np.array([[10.29584987, 12.45924653, 12.61002881,  0.96839795],
       [10.24455504, 12.49253891, 12.57532794,  0.96829881],
       [10.27272134, 12.56972272, 12.63197613,  0.96793714],
       [10.29415792, 12.57987838, 12.64072529,  0.96783943],
       [10.36607208, 12.55882154, 12.70298689,  0.96788023],
       [10.33880184, 12.54029621, 12.62196298,  0.96777525],
       [10.21252659, 12.48420344, 12.53068994,  0.96839397],
       [10.38396127, 12.68308583, 12.7259957 ,  0.96738905],
       [10.28169394, 12.55368799, 12.62808287,  0.96809904],
       [10.35976357, 12.59073452, 12.68311803,  0.96793838],
       [10.28821046, 12.57053378, 12.63275849,  0.96793685],
       [10.28914472, 12.58758886, 12.62673155,  0.96779746],
       [10.32236837, 12.55668605, 12.62873629,  0.96784441],
       [10.29381964, 12.5330877 , 12.59971915,  0.96812762],
       [10.32538291, 12.50460568, 12.6208809 ,  0.96805346],
       [10.26123903, 12.4840676 , 12.53207284,  0.96817104],
       [10.39138504, 12.71374584, 12.7279281 ,  0.96726438],
       [10.29961192, 12.59227285, 12.63769819,  0.96780373],
       [10.21825561, 12.46898925, 12.55412443,  0.96840277],
       [10.22583113, 12.512561  , 12.54773211,  0.96829837]])

array([[10.29584987, 12.45924653, 12.61002881,  0.96839795],
       [10.24455504, 12.49253891, 12.57532794,  0.96829881],
       [10.27272134, 12.56972272, 12.63197613,  0.96793714],
       [10.29415792, 12.57987838, 12.64072529,  0.96783943],
       [10.36607208, 12.55882154, 12.70298689,  0.96788023],
       [10.33880184, 12.54029621, 12.62196298,  0.96777525],
       [10.21252659, 12.48420344, 12.53068994,  0.96839397],
       [10.38396127, 12.68308583, 12.7259957 ,  0.96738905],
       [10.28169394, 12.55368799, 12.62808287,  0.96809904],
       [10.35976357, 12.59073452, 12.68311803,  0.96793838],
       [10.28821046, 12.57053378, 12.63275849,  0.96793685],
       [10.28914472, 12.58758886, 12.62673155,  0.96779746],
       [10.32236837, 12.55668605, 12.62873629,  0.96784441],
       [10.29381964, 12.5330877 , 12.59971915,  0.96812762],
       [10.32538291, 12.50460568, 12.6208809 ,  0.96805346],
       [10.26123903, 12.4840676 , 12.53207284,  0.96817104],
       [10.39138504, 12.

In [10]:
t1 = time()
n_estimators= [100, 300, 500]
max_depth = [3, 4, 5]
# n_estimators  = [100]
# max_depth = [3]
xgbs = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = XGBRegressor(n_estimators=i, max_depth=j)
            result = KFoldCV(model, data)
            xgbs[epoch,:] = np.mean(result, axis=0)
        print(np.mean(xgbs, axis=0))
#         [0.89059432 1.2367164  1.25472816 0.97802632]
print(time()-t1)

[10.1976924  12.52910867 12.54869206  0.96818468]
[10.45958146 12.8250128  12.88800913  0.966295  ]
[10.67588148 13.18553698 13.25108796  0.96479998]
[10.32028933 12.606721   12.69421858  0.96779335]
[10.74357015 13.18737727 13.30513336  0.96474696]
[11.03551589 13.54072961 13.61944825  0.96265492]
[10.4863853  12.7957437  12.90933686  0.96680902]
[10.72258972 13.2482558  13.27435844  0.96422877]
[11.16682899 13.68296789 13.73806198  0.96207316]
22.90143322944641


In [10]:
t1 = time()
n_estimators= [100, 300, 500]
max_depth = [3, 4, 5]
n_estimators  = [100]
max_depth = [3]
xgbs = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = XGBRegressor(n_estimators=i, max_depth=j)
            result = KFoldCV(model, data)
            xgbs[epoch,:] = np.mean(result, axis=0)
        print(np.mean(xgbs, axis=0))
#         [0.89059432 1.2367164  1.25472816 0.97802632]
print(time()-t1)

[10.20065798 12.46367574 12.54224802  0.96843639]
18.765238046646118


In [11]:
xgbs = np.array([[10.17236014, 12.47224944, 12.52692333,  0.96836986],
       [10.25268184, 12.57112088, 12.60946838,  0.96794548],
       [10.22945911, 12.54412072, 12.59088005,  0.96805575],
       [10.1825717 , 12.4952832 , 12.5481748 ,  0.96827154],
       [10.26881711, 12.48951399, 12.61018107,  0.96831292],
       [10.16997033, 12.43585955, 12.53119892,  0.96858836],
       [10.20721584, 12.47866681, 12.54596335,  0.96838111],
       [10.27591428, 12.59465086, 12.63968254,  0.96766674],
       [10.17153807, 12.37616913, 12.49048956,  0.96862278],
       [10.13954746, 12.34127814, 12.48297606,  0.96896768],
       [10.17891239, 12.43703647, 12.52410432,  0.96858669],
       [10.23626408, 12.4275139 , 12.56417387,  0.9685997 ],
       [10.24604296, 12.5414683 , 12.59780891,  0.96814858],
       [10.16829383, 12.37999568, 12.4860957 ,  0.96892873],
       [10.12531774, 12.417264  , 12.46898974,  0.96875531],
       [10.24646563, 12.48205608, 12.56506835,  0.96837264],
       [10.13384484, 12.45981684, 12.50533259,  0.96849657],
       [10.23285533, 12.51173097, 12.54563005,  0.96828896],
       [10.19589955, 12.36876242, 12.49281216,  0.96877406],
       [10.17918739, 12.4489574 , 12.51900663,  0.96859424]])

array([[10.17236014, 12.47224944, 12.52692333,  0.96836986],
       [10.25268184, 12.57112088, 12.60946838,  0.96794548],
       [10.22945911, 12.54412072, 12.59088005,  0.96805575],
       [10.1825717 , 12.4952832 , 12.5481748 ,  0.96827154],
       [10.26881711, 12.48951399, 12.61018107,  0.96831292],
       [10.16997033, 12.43585955, 12.53119892,  0.96858836],
       [10.20721584, 12.47866681, 12.54596335,  0.96838111],
       [10.27591428, 12.59465086, 12.63968254,  0.96766674],
       [10.17153807, 12.37616913, 12.49048956,  0.96862278],
       [10.13954746, 12.34127814, 12.48297606,  0.96896768],
       [10.17891239, 12.43703647, 12.52410432,  0.96858669],
       [10.23626408, 12.4275139 , 12.56417387,  0.9685997 ],
       [10.24604296, 12.5414683 , 12.59780891,  0.96814858],
       [10.16829383, 12.37999568, 12.4860957 ,  0.96892873],
       [10.12531774, 12.417264  , 12.46898974,  0.96875531],
       [10.24646563, 12.48205608, 12.56506835,  0.96837264],
       [10.13384484, 12.

In [11]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
# n_estimators  = [500]
# max_depth = [3]
# epochs=20
cats = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False)
            result = KFoldCV(model, data)
            cats[epoch,:] = np.mean(result, axis=0)
#             print(np.mean(result, axis=0))
        print(np.mean(cats, axis=0))
print(time()-t1)

[10.03128582 12.23084286 12.31656778  0.96967487]
[10.20071984 12.50268749 12.58766614  0.96825408]
[10.50288989 12.88331735 12.94861279  0.96637792]
[10.06908387 12.32930977 12.39801539  0.96925174]
[10.19309243 12.56086907 12.62399138  0.96795613]
[10.54424202 12.85164587 12.92672999  0.96624063]
[10.05442427 12.29365933 12.35745752  0.96930783]
[10.35902804 12.68041054 12.74804349  0.96721363]
[10.4801428  12.86042571 12.91001186  0.96637646]
13.876201629638672


In [12]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
n_estimators  = [100]
max_depth = [3]
# epochs=20
cats = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False)
            result = KFoldCV(model, data)
            cats[epoch,:] = np.mean(result, axis=0)
#             print(np.mean(result, axis=0))
        print(np.mean(cats, axis=0))
print(time()-t1)

[10.05492771 12.28218953 12.35573943  0.96936758]
14.472714900970459


In [13]:
cats = np.array([[10.03374446, 12.27614294, 12.34044757,  0.9693041 ],
       [10.04587756, 12.26567573, 12.34706646,  0.96948862],
       [10.04502916, 12.22580058, 12.34249277,  0.96970859],
       [10.03753257, 12.28656396, 12.33982192,  0.96927433],
       [10.04205342, 12.27344787, 12.34609054,  0.96945708],
       [10.08868391, 12.27626064, 12.39598836,  0.96947063],
       [10.116552  , 12.41101843, 12.44852049,  0.96874994],
       [10.07480935, 12.29901086, 12.37679264,  0.96928802],
       [10.04104189, 12.26511336, 12.34234951,  0.96953258],
       [10.07176815, 12.25110702, 12.36209175,  0.96936625],
       [10.03275069, 12.2945381 , 12.35806322,  0.96937684],
       [ 9.9938698 , 12.25782531, 12.30746019,  0.96956656],
       [10.04232959, 12.2440261 , 12.3214168 ,  0.96941181],
       [10.06774758, 12.30907938, 12.37620875,  0.96922782],
       [10.04792167, 12.24992286, 12.32090525,  0.96958147],
       [10.09741492, 12.31606655, 12.38112575,  0.96912508],
       [10.1093944 , 12.38476963, 12.4272966 ,  0.96889634],
       [10.00203101, 12.2295399 , 12.31762703,  0.96948157],
       [10.04324076, 12.27765625, 12.31817313,  0.96945299],
       [10.06476125, 12.25022514, 12.34484986,  0.969591  ]])

array([[10.03374446, 12.27614294, 12.34044757,  0.9693041 ],
       [10.04587756, 12.26567573, 12.34706646,  0.96948862],
       [10.04502916, 12.22580058, 12.34249277,  0.96970859],
       [10.03753257, 12.28656396, 12.33982192,  0.96927433],
       [10.04205342, 12.27344787, 12.34609054,  0.96945708],
       [10.08868391, 12.27626064, 12.39598836,  0.96947063],
       [10.116552  , 12.41101843, 12.44852049,  0.96874994],
       [10.07480935, 12.29901086, 12.37679264,  0.96928802],
       [10.04104189, 12.26511336, 12.34234951,  0.96953258],
       [10.07176815, 12.25110702, 12.36209175,  0.96936625],
       [10.03275069, 12.2945381 , 12.35806322,  0.96937684],
       [ 9.9938698 , 12.25782531, 12.30746019,  0.96956656],
       [10.04232959, 12.2440261 , 12.3214168 ,  0.96941181],
       [10.06774758, 12.30907938, 12.37620875,  0.96922782],
       [10.04792167, 12.24992286, 12.32090525,  0.96958147],
       [10.09741492, 12.31606655, 12.38112575,  0.96912508],
       [10.1093944 , 12.

# hierarchical CatBoost

In [14]:
from sklearn.model_selection import GridSearchCV


def deepCatBoostBackward(train, test,importances,ind,n_estimators, max_depth):
    s = 0
    inds = []
    inds2 = []
    inds3 = []
    for i in ind:
        s = s + importances[i]
        inds.append(i)
#     70% importance for the first layer
        if (s > 0.9):
            model = gridSearch4CatBoost(train[:,inds], train[:,-1],n_estimators, max_depth)
            predY = model.fit(train[:,inds], train[:,-1]).predict(train[:,inds])
            predY2 = model.predict(test[:,inds])
        else:
            inds2.append(i)
#     20% importance for the second layer
            if (s > 0.95):
                tempTrain = np.array([train[:,inds2],predY])
                tempTest = np.array([test[:,inds2],predY2])
                model = gridSearch4CatBoost(tempTrain, train[:,-1],n_estimators, max_depth)
                predYY = model.fit(tempTrain, train[:,-1]).predict(tempTrain)
                predYY2 = model.predict(tempTest)
            else:
                inds3.append(i)
#     10% importance for the third layer
                if (s == 1):
                    tempTrain = np.array([train[:,inds3],predY,predYY])
                    tempTest = np.array([test[:,inds3],predY2,predYY2])
                    model = gridSearch4CatBoost(tempTrain, train[:,-1],n_estimators, max_depth)
#                     predY = model.fit(tempTrain, train[:,-1]).predict(tempTrain)
                    predY2 = model.predict(tempTest)
    return predY2

def deepCatBoostForward(train, test,importances,ind,n_estimators, max_depth):
#     ind = ind[::-1]
    s = 0
    inds = []
    inds2 = []
    inds3 = []
    for i in ind:
        s = s + importances[i]
        inds.append(i)
#     10% importance for the first layer
        if (s > 0.05):
            model = gridSearch4CatBoost(train[:,inds], train[:,-1],n_estimators, max_depth)
            predY = model.fit(train[:,inds], train[:,-1]).predict(train[:,inds])
            predY2 = model.predict(test[:,inds])
        else:
            inds2.append(i)
#     20% importance for the second layer
            if (s > 0.1):
                tempTrain = np.array([train[:,inds2],predY])
                tempTest = np.array([test[:,inds2],predY2])
                model = gridSearch4CatBoost(tempTrain, train[:,-1],n_estimators, max_depth)
                predYY = model.fit(tempTrain, train[:,-1]).predict(tempTrain)
                predYY2 = model.predict(tempTest)
            else:
                inds3.append(i)
#     10% importance for the third layer
                if (s == 1):
                    tempTrain = np.array([train[:,inds3],predY,predYY])
                    tempTest = np.array([test[:,inds3],predY2,predYY2])
                    model = gridSearch4CatBoost(tempTrain, train[:,-1],n_estimators, max_depth)
#                     predY = model.fit(tempTrain, train[:,-1]).predict(tempTrain)
                    predY2 = model.predict(tempTest)
    return predY2

def gridSearch4CatBoost(X,y,n_estimators, max_depth):
#     cbc = CatBoostRegressor(verbose=False,task_type="GPU",devices='0:1')
# #     cbc = RandomForestRegressor()
#     grid = {'max_depth': [3,4,5],'n_estimators':[100,300,500]}
#     gscv = GridSearchCV (estimator = cbc, param_grid = grid, cv = 10)
#     gscv.fit(X,y)
#     return gscv.best_estimator_
    cbc = CatBoostRegressor(n_estimators=n_estimators, max_depth=max_depth,verbose=False,task_type="GPU",devices='0:1').fit(X,y)
    return cbc
#     return model.fit(X,y).predict(testX)

def KFoldForward(data, ind,n_estimators, max_depth, n_fold=10):
#     num = len(data)
    diff = int(len(data)/n_fold)
    results = np.zeros((n_fold, 4))
    for i in range(n_fold):
        begin = diff*i
        end = diff*(i+1)
#         if i == n_fold-1:
#             end = -1
        test = data[begin:end]
        train = deepcopy(data)
        train = np.delete(train, range(begin, end),axis=0)
        X_train, y_train = train[:,:-1], train[:,-1]
        X_test, y = test[:,:-1], test[:,-1]
#         predictY = model.fit(X_train, y_train).predict(X_test)
        predictY = deepCatBoostForward(train, test,importances,ind,n_estimators, max_depth)
        mae = np.mean(abs((y-predictY)))
        stdErr = np.std(((y-predictY)))
        error=sum((y-predictY)**2)
        RMSE=np.sqrt(error/len(y))
        MAC = np.dot(y,predictY)**2/(np.dot(y, y)*np.dot(predictY, predictY))
#         print(mae, stdErr, RMSE, MAC)
        results[i,:] = [mae, stdErr, RMSE, MAC]
    return results

def KFoldBackward(data, ind,n_estimators, max_depth, n_fold=10):
#     num = len(data)
    diff = int(len(data)/n_fold)
    results = np.zeros((n_fold, 4))
    for i in range(n_fold):
        begin = diff*i
        end = diff*(i+1)
#         if i == n_fold-1:
#             end = -1
        test = data[begin:end]
        train = deepcopy(data)
        train = np.delete(train, range(begin, end),axis=0)
        X_train, y_train = train[:,:-1], train[:,-1]
        X_test, y = test[:,:-1], test[:,-1]
#         predictY = model.fit(X_train, y_train).predict(X_test)
        predictY = deepCatBoostBackward(train, test,importances,ind,n_estimators, max_depth)
        mae = np.mean(abs((y-predictY)))
        stdErr = np.std(((y-predictY)))
        error=sum((y-predictY)**2)
        RMSE=np.sqrt(error/len(y))
        MAC = np.dot(y,predictY)**2/(np.dot(y, y)*np.dot(predictY, predictY))
#         print(mae, stdErr, RMSE, MAC)
        results[i,:] = [mae, stdErr, RMSE, MAC]
    return results

In [19]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
# n_estimators  = [500]
# max_depth = [5]
epochs=1
cats = np.zeros((epochs,4))
model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])
importances = model.feature_importances_
ind = np.argsort(importances)
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
#             model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])
# #             model = CatBoostRegressor(n_estimators=300, max_depth=3,verbose=False,task_type="GPU",devices='0:1').fit(data[:,:-1], data[:,-1])#n_estimators=i, max_depth=j,
#             importances = model.feature_importances_
#             ind = np.argsort(importances)
        #     model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False,task_type="GPU",devices='0:1')
            result = KFoldForward(data, ind,n_estimators=i, max_depth=j)
            cats[epoch,:] = np.mean(result, axis=0)
        print(np.mean(cats, axis=0))
print(time()-t1)

[10.02599628 12.19771749 12.31747506  0.96982512]
[10.09697248 12.3664927  12.4633498   0.96891066]
[10.16656791 12.48160174 12.54633739  0.96835771]
[10.02427444 12.27954778 12.32675205  0.96942658]
[10.24930856 12.56558843 12.59262017  0.96798175]
[10.47714597 12.74954193 12.90193104  0.9669361 ]
[ 9.91851098 12.16676187 12.19188725  0.96998233]
[ 9.92389219 12.08714154 12.19196674  0.97025686]
[ 9.93044594 12.05314709 12.17025437  0.97060726]
1460.5511026382446


In [22]:
t1 = time()
model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])
print((time()-t1)*8)

1.0122547149658203


In [23]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
n_estimators  = [500]
max_depth = [3]
epochs=20
cats = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])
#             model = CatBoostRegressor(n_estimators=300, max_depth=3,verbose=False,task_type="GPU",devices='0:1').fit(data[:,:-1], data[:,-1])#n_estimators=i, max_depth=j,
            importances = model.feature_importances_
            ind = np.argsort(importances)
        #     model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False,task_type="GPU",devices='0:1')
            result = KFoldForward(data, ind,n_estimators=i, max_depth=j)
            cats[epoch,:] = np.mean(result, axis=0)
        print(np.mean(cats, axis=0))
print(time()-t1)













[ 9.92262237 12.12626734 12.1864917   0.97014444]
17018.216717481613


In [24]:
cats

array([[ 9.9457778 , 12.15410695, 12.20718685,  0.96998426],
       [ 9.87816565, 12.08379659, 12.131561  ,  0.97036324],
       [ 9.96333035, 12.13892254, 12.21191774,  0.97006691],
       [ 9.94629249, 12.16525817, 12.22826915,  0.97002917],
       [ 9.89597693, 12.11384485, 12.14679461,  0.97008102],
       [ 9.92582524, 12.11782896, 12.18720789,  0.97016385],
       [ 9.89964746, 12.14149588, 12.20102104,  0.97010568],
       [ 9.94273662, 12.15838756, 12.22201859,  0.96996388],
       [ 9.90955944, 12.08682837, 12.17070039,  0.97035413],
       [ 9.90741985, 12.12839941, 12.17421221,  0.97009194],
       [ 9.94988155, 12.15454159, 12.22451586,  0.97007532],
       [ 9.89524986, 12.10234862, 12.15339259,  0.97037224],
       [ 9.96307217, 12.12478436, 12.23264847,  0.97011814],
       [ 9.85745772, 12.04636952, 12.1004503 ,  0.97048255],
       [ 9.92865824, 12.12240626, 12.1751921 ,  0.97017626],
       [ 9.9398465 , 12.16566184, 12.23270255,  0.97004039],
       [ 9.91971528, 12.

In [15]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
n_estimators  = [500]
max_depth = [4]
# epochs=1
cats = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])
#             model = CatBoostRegressor(n_estimators=300, max_depth=3,verbose=False,task_type="GPU",devices='0:1').fit(data[:,:-1], data[:,-1])#n_estimators=i, max_depth=j,
            importances = model.feature_importances_
            ind = np.argsort(importances)
        #     model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False,task_type="GPU",devices='0:1')
            result = KFoldForward(data, ind,n_estimators=i, max_depth=j)
            cats[epoch,:] = np.mean(result, axis=0)
        print(np.mean(cats, axis=0))
print(time()-t1)













[ 9.94254354 12.1419744  12.2103823   0.9700465 ]
16763.375135183334


In [25]:
catsf = np.array([[ 9.9457778 , 12.15410695, 12.20718685,  0.96998426],
       [ 9.87816565, 12.08379659, 12.131561  ,  0.97036324],
       [ 9.96333035, 12.13892254, 12.21191774,  0.97006691],
       [ 9.94629249, 12.16525817, 12.22826915,  0.97002917],
       [ 9.89597693, 12.11384485, 12.14679461,  0.97008102],
       [ 9.92582524, 12.11782896, 12.18720789,  0.97016385],
       [ 9.89964746, 12.14149588, 12.20102104,  0.97010568],
       [ 9.94273662, 12.15838756, 12.22201859,  0.96996388],
       [ 9.90955944, 12.08682837, 12.17070039,  0.97035413],
       [ 9.90741985, 12.12839941, 12.17421221,  0.97009194],
       [ 9.94988155, 12.15454159, 12.22451586,  0.97007532],
       [ 9.89524986, 12.10234862, 12.15339259,  0.97037224],
       [ 9.96307217, 12.12478436, 12.23264847,  0.97011814],
       [ 9.85745772, 12.04636952, 12.1004503 ,  0.97048255],
       [ 9.92865824, 12.12240626, 12.1751921 ,  0.97017626],
       [ 9.9398465 , 12.16566184, 12.23270255,  0.97004039],
       [ 9.91971528, 12.13818935, 12.18086974,  0.97005423],
       [ 9.91304142, 12.11889441, 12.15965609,  0.97020866],
       [ 9.94313091, 12.17548149, 12.22020217,  0.96991667],
       [ 9.92766182, 12.08780013, 12.16931457,  0.97024024]])

array([[ 9.9457778 , 12.15410695, 12.20718685,  0.96998426],
       [ 9.87816565, 12.08379659, 12.131561  ,  0.97036324],
       [ 9.96333035, 12.13892254, 12.21191774,  0.97006691],
       [ 9.94629249, 12.16525817, 12.22826915,  0.97002917],
       [ 9.89597693, 12.11384485, 12.14679461,  0.97008102],
       [ 9.92582524, 12.11782896, 12.18720789,  0.97016385],
       [ 9.89964746, 12.14149588, 12.20102104,  0.97010568],
       [ 9.94273662, 12.15838756, 12.22201859,  0.96996388],
       [ 9.90955944, 12.08682837, 12.17070039,  0.97035413],
       [ 9.90741985, 12.12839941, 12.17421221,  0.97009194],
       [ 9.94988155, 12.15454159, 12.22451586,  0.97007532],
       [ 9.89524986, 12.10234862, 12.15339259,  0.97037224],
       [ 9.96307217, 12.12478436, 12.23264847,  0.97011814],
       [ 9.85745772, 12.04636952, 12.1004503 ,  0.97048255],
       [ 9.92865824, 12.12240626, 12.1751921 ,  0.97017626],
       [ 9.9398465 , 12.16566184, 12.23270255,  0.97004039],
       [ 9.91971528, 12.

In [8]:
t1 = time()
n_estimators= [100,300,500]
max_depth = [3,4, 5]
n_estimators  = [500]
max_depth = [3]
epochs=20
cats = np.zeros((epochs,4))
for i in n_estimators:
    for j in max_depth:
        for epoch in range(epochs):
            data = shuffle(data)
            model = RandomForestRegressor().fit(data[:,:-1], data[:,-1])#n_estimators=500, max_depth=5
#             model = CatBoostRegressor(n_estimators=300, max_depth=3,verbose=False,task_type="GPU",devices='0:1').fit(data[:,:-1], data[:,-1])#n_estimators=i, max_depth=j,
            importances = model.feature_importances_
            ind = np.argsort(importances)
        #     model = CatBoostRegressor(n_estimators=i, max_depth=j,verbose=False,task_type="GPU",devices='0:1')
            result = KFoldForward(data, ind,n_estimators=i, max_depth=j)
            cats[epoch,:] = np.mean(result, axis=0)
        print(np.mean(cats, axis=0))
print(time()-t1)
# cats = np.array([[10.28976097, 12.59481721, 12.65070932,  0.96624157],
#        [10.30024397, 12.63062878, 12.69241147,  0.96602366],
#        [10.33083445, 12.59531248, 12.69207518,  0.96621038],
#        [10.24792874, 12.58840255, 12.61996367,  0.96629239],
#        [10.30794189, 12.6137253 , 12.69108725,  0.96614657],
#        [10.29798972, 12.62405592, 12.67647327,  0.96602566],
#        [10.32873924, 12.61653428, 12.71578044,  0.9661013 ],
#        [10.28083714, 12.57314053, 12.62959809,  0.96616244],
#        [10.30403658, 12.63575339, 12.68464483,  0.9660592 ],
#        [10.26307977, 12.59914586, 12.64763497,  0.96622729],
#        [10.32173914, 12.6809491 , 12.72575812,  0.96570388],
#        [10.28004193, 12.61886929, 12.65076916,  0.96597413],
#        [10.3444059 , 12.61383646, 12.74207457,  0.96609326],
#        [10.3266248 , 12.59553247, 12.74308068,  0.96601364],
#        [10.34485756, 12.65096954, 12.7355711 ,  0.96579763],
#        [10.30052081, 12.5984135 , 12.68527344,  0.96601561],
#        [10.26237152, 12.56257937, 12.61511786,  0.96626345],
#        [10.36245171, 12.62669499, 12.71238474,  0.96595044],
#        [10.32223432, 12.68663689, 12.72457832,  0.96568758],
#        [10.28316186, 12.56110305, 12.67896506,  0.96627807]])



[10.3049901  12.61335505 12.68569758  0.96606341]
9377.573452949524


In [2]:
catsf = np.array([[ 9.9457778 , 12.15410695, 12.20718685,  0.96998426],
       [ 9.87816565, 12.08379659, 12.131561  ,  0.97036324],
       [ 9.89597693, 12.11384485, 12.14679461,  0.97008102],
       [ 9.94629249, 12.16525817, 12.22826915,  0.97002917],
       [ 9.89597693, 12.11384485, 12.14679461,  0.97008102],
       [ 9.92582524, 12.11782896, 12.18720789,  0.97016385],
       [ 9.89964746, 12.14149588, 12.20102104,  0.97010568],
       [ 9.94273662, 12.15838756, 12.22201859,  0.96996388],
       [ 9.90955944, 12.08682837, 12.17070039,  0.97035413],
       [ 9.90741985, 12.12839941, 12.17421221,  0.97009194],
       [ 9.94988155, 12.15454159, 12.22451586,  0.97007532],
       [ 9.89524986, 12.10234862, 12.15339259,  0.97037224],
       [ 9.85745772, 12.04636952, 12.1004503 ,  0.97048255],
       [ 9.85745772, 12.04636952, 12.1004503 ,  0.97048255],
       [ 9.92865824, 12.12240626, 12.1751921 ,  0.97017626],
       [ 9.9398465 , 12.16566184, 12.23270255,  0.97004039],
       [ 9.91971528, 12.13818935, 12.18086974,  0.97005423],
       [ 9.91304142, 12.11889441, 12.15965609,  0.97020866],
       [ 9.94313091, 12.17548149, 12.22020217,  0.96991667],
       [ 9.92766182, 12.08780013, 12.16931457,  0.97024024]])

In [7]:
mlps = np.array([[10.24702922, 12.4247865 , 12.51196346,  0.96868107],
       [10.23508496, 12.48977467, 12.54664674,  0.96837363],
       [10.27581518, 12.51125036, 12.56710662,  0.96815556],
       [10.20434426, 12.46428498, 12.49276779,  0.96844304],
       [10.21449042, 12.45770235, 12.50272909,  0.96849756],
       [10.25645146, 12.46623805, 12.5393194 ,  0.9685545 ],
       [10.25293692, 12.43832785, 12.53255566,  0.96856773],
       [10.24015863, 12.436593  , 12.52160923,  0.96859482],
       [10.2480341 , 12.470938  , 12.53053366,  0.96842864],
       [10.23634332, 12.40500003, 12.52486652,  0.96858968],
       [10.25203841, 12.4111212 , 12.51169896,  0.96865791],
       [10.2657211 , 12.48702409, 12.52070581,  0.96825708],
       [10.2297428 , 12.46844311, 12.50788628,  0.96838222],
       [10.27715139, 12.53746928, 12.5668944 ,  0.96816509],
       [10.23211151, 12.48228443, 12.51443667,  0.96832019],
       [10.26042647, 12.36532915, 12.54515646,  0.96866462],
       [10.26542217, 12.45723524, 12.53552954,  0.96854895],
       [10.26909359, 12.47013094, 12.55662755,  0.96848665],
       [10.24235693, 12.44073885, 12.52255106,  0.96863832],
       [10.2370353 , 12.48227198, 12.51132745,  0.96828901]])
svms = np.array([[10.24576414, 12.49357116, 12.55514368,  0.96833862],
       [10.2491687 , 12.47751752, 12.55823731,  0.96835725],
       [10.25846759, 12.52283016, 12.5691685 ,  0.96817818],
       [10.2676875 , 12.48046173, 12.58049292,  0.96834835],
       [10.24327115, 12.45105008, 12.54003333,  0.96848824],
       [10.22403406, 12.47737767, 12.54654033,  0.9683862 ],
       [10.25691613, 12.50592963, 12.57059028,  0.9681622 ],
       [10.21143418, 12.47669542, 12.50463541,  0.96841276],
       [10.28658116, 12.46853705, 12.58800699,  0.96837609],
       [10.26027499, 12.54023721, 12.55535575,  0.96813417],
       [10.25265996, 12.45951932, 12.56551005,  0.96837222],
       [10.21771729, 12.48513817, 12.53065884,  0.96844134],
       [10.28842344, 12.44818886, 12.57823585,  0.96853768],
       [10.25590049, 12.52741537, 12.54361508,  0.96818803],
       [10.22292424, 12.45289984, 12.53401607,  0.96845975],
       [10.26644024, 12.49279487, 12.55935877,  0.96831544],
       [10.25788975, 12.46979611, 12.55379284,  0.96846172],
       [10.25631993, 12.46823251, 12.55237473,  0.96842887],
       [10.2587265 , 12.50359809, 12.58542461,  0.96822413],
       [10.23393093, 12.50629033, 12.53181869,  0.96816989]])
rfs = np.array([[10.29584987, 12.45924653, 12.61002881,  0.96839795],
       [10.24455504, 12.49253891, 12.57532794,  0.96829881],
       [10.27272134, 12.56972272, 12.63197613,  0.96793714],
       [10.29415792, 12.57987838, 12.64072529,  0.96783943],
       [10.36607208, 12.55882154, 12.70298689,  0.96788023],
       [10.33880184, 12.54029621, 12.62196298,  0.96777525],
       [10.21252659, 12.48420344, 12.53068994,  0.96839397],
       [10.38396127, 12.68308583, 12.7259957 ,  0.96738905],
       [10.28169394, 12.55368799, 12.62808287,  0.96809904],
       [10.35976357, 12.59073452, 12.68311803,  0.96793838],
       [10.28821046, 12.57053378, 12.63275849,  0.96793685],
       [10.28914472, 12.58758886, 12.62673155,  0.96779746],
       [10.32236837, 12.55668605, 12.62873629,  0.96784441],
       [10.29381964, 12.5330877 , 12.59971915,  0.96812762],
       [10.32538291, 12.50460568, 12.6208809 ,  0.96805346],
       [10.26123903, 12.4840676 , 12.53207284,  0.96817104],
       [10.39138504, 12.71374584, 12.7279281 ,  0.96726438],
       [10.29961192, 12.59227285, 12.63769819,  0.96780373],
       [10.21825561, 12.46898925, 12.55412443,  0.96840277],
       [10.22583113, 12.512561  , 12.54773211,  0.96829837]])
xgbs = np.array([[10.17236014, 12.47224944, 12.52692333,  0.96836986],
       [10.25268184, 12.57112088, 12.60946838,  0.96794548],
       [10.22945911, 12.54412072, 12.59088005,  0.96805575],
       [10.1825717 , 12.4952832 , 12.5481748 ,  0.96827154],
       [10.26881711, 12.48951399, 12.61018107,  0.96831292],
       [10.16997033, 12.43585955, 12.53119892,  0.96858836],
       [10.20721584, 12.47866681, 12.54596335,  0.96838111],
       [10.27591428, 12.59465086, 12.63968254,  0.96766674],
       [10.17153807, 12.37616913, 12.49048956,  0.96862278],
       [10.13954746, 12.34127814, 12.48297606,  0.96896768],
       [10.17891239, 12.43703647, 12.52410432,  0.96858669],
       [10.23626408, 12.4275139 , 12.56417387,  0.9685997 ],
       [10.24604296, 12.5414683 , 12.59780891,  0.96814858],
       [10.16829383, 12.37999568, 12.4860957 ,  0.96892873],
       [10.12531774, 12.417264  , 12.46898974,  0.96875531],
       [10.24646563, 12.48205608, 12.56506835,  0.96837264],
       [10.13384484, 12.45981684, 12.50533259,  0.96849657],
       [10.23285533, 12.51173097, 12.54563005,  0.96828896],
       [10.19589955, 12.36876242, 12.49281216,  0.96877406],
       [10.17918739, 12.4489574 , 12.51900663,  0.96859424]])
cats = np.array([[10.03374446, 12.27614294, 12.34044757,  0.9693041 ],
       [10.04587756, 12.26567573, 12.34706646,  0.96948862],
       [10.04502916, 12.22580058, 12.34249277,  0.96970859],
       [10.03753257, 12.28656396, 12.33982192,  0.96927433],
       [10.04205342, 12.27344787, 12.34609054,  0.96945708],
       [10.08868391, 12.27626064, 12.39598836,  0.96947063],
       [10.116552  , 12.41101843, 12.44852049,  0.96874994],
       [10.07480935, 12.29901086, 12.37679264,  0.96928802],
       [10.04104189, 12.26511336, 12.34234951,  0.96953258],
       [10.07176815, 12.25110702, 12.36209175,  0.96936625],
       [10.03275069, 12.2945381 , 12.35806322,  0.96937684],
       [ 9.9938698 , 12.25782531, 12.30746019,  0.96956656],
       [10.04232959, 12.2440261 , 12.3214168 ,  0.96941181],
       [10.06774758, 12.30907938, 12.37620875,  0.96922782],
       [10.04792167, 12.24992286, 12.32090525,  0.96958147],
       [10.09741492, 12.31606655, 12.38112575,  0.96912508],
       [10.1093944 , 12.38476963, 12.4272966 ,  0.96889634],
       [10.00203101, 12.2295399 , 12.31762703,  0.96948157],
       [10.04324076, 12.27765625, 12.31817313,  0.96945299],
       [10.06476125, 12.25022514, 12.34484986,  0.969591  ]])
catsf = np.array([[ 9.90830935, 12.10753229, 12.18383373,  0.97026204],
       [ 9.92641942, 12.22124249, 12.26658453,  0.96974407],
       [ 9.91847887, 12.13001258, 12.17311815,  0.97008284],
       [ 9.93779175, 12.12516811, 12.19859709,  0.96996375],
       [ 9.92540993, 12.07721524, 12.18813348,  0.9704428 ],
       [ 9.95602754, 12.19832287, 12.21377384,  0.96979457],
       [ 9.99120175, 12.22682147, 12.26351748,  0.96967742],
       [10.03281195, 12.26705365, 12.32091854,  0.96950479],
       [ 9.95093558, 12.12572228, 12.20599164,  0.97001858],
       [ 9.96984203, 12.09127016, 12.20229892,  0.97011115],
       [ 9.94470872, 12.09708265, 12.21460167,  0.97008893],
       [ 9.9615292 , 12.12986669, 12.21678923,  0.97016799],
       [ 9.91669683, 12.17702889, 12.21861481,  0.96998126],
       [ 9.98092611, 12.18506859, 12.24662308,  0.96990498],
       [ 9.92028437, 12.14882238, 12.18286739,  0.97008396],
       [ 9.90959297, 12.09663598, 12.14543957,  0.970282  ],
       [ 9.89626898, 12.04813558, 12.1605923 ,  0.97050326],
       [10.00449945, 12.220897  , 12.27287276,  0.96970561],
       [ 9.88100298, 12.06438873, 12.15770816,  0.97050562],
       [ 9.91813291, 12.10120041, 12.17476972,  0.97010441]])
catsf = np.array([[ 9.9457778 , 12.15410695, 12.20718685,  0.96998426],
       [ 9.87816565, 12.08379659, 12.131561  ,  0.97036324],
       [ 9.89597693, 12.11384485, 12.14679461,  0.97008102],
       [ 9.94629249, 12.16525817, 12.22826915,  0.97002917],
       [ 9.89597693, 12.11384485, 12.14679461,  0.97008102],
       [ 9.92582524, 12.11782896, 12.18720789,  0.97016385],
       [ 9.89964746, 12.14149588, 12.20102104,  0.97010568],
       [ 9.94273662, 12.15838756, 12.22201859,  0.96996388],
       [ 9.90955944, 12.08682837, 12.17070039,  0.97035413],
       [ 9.90741985, 12.12839941, 12.17421221,  0.97009194],
       [ 9.94988155, 12.15454159, 12.22451586,  0.97007532],
       [ 9.89524986, 12.10234862, 12.15339259,  0.97037224],
       [ 9.85745772, 12.04636952, 12.1004503 ,  0.97048255],
       [ 9.85745772, 12.04636952, 12.1004503 ,  0.97048255],
       [ 9.92865824, 12.12240626, 12.1751921 ,  0.97017626],
       [ 9.9398465 , 12.16566184, 12.23270255,  0.97004039],
       [ 9.91971528, 12.13818935, 12.18086974,  0.97005423],
       [ 9.91304142, 12.11889441, 12.15965609,  0.97020866],
       [ 9.94313091, 12.17548149, 12.22020217,  0.96991667],
       [ 9.92766182, 12.08780013, 12.16931457,  0.97024024]])

In [8]:
print(mlps.std(axis=0))
print(svms.std(axis=0))
print(rfs.std(axis=0))
print(xgbs.std(axis=0))
print(cats.std(axis=0))
print(catsf.std(axis=0))

[0.0187984  0.03778355 0.02014713 0.00015988]
[0.02011393 0.02532179 0.0204067  0.00011932]
[0.05087495 0.06394397 0.05594426 0.00030222]
[0.04424191 0.06695527 0.04737395 0.00031724]
[0.03106513 0.04539195 0.03578839 0.00022752]
[0.02757343 0.03644924 0.03877102 0.00016422]


In [9]:
print(np.mean(catsf, axis=0))

[ 9.91397397 12.12109272 12.17662563  0.97016336]


In [10]:
def getTestResult(f1,f2,i):
    print(ranksums(np.asarray(f1[:,i]), np.asarray(f2[:,i])).pvalue)

i = 0
print(getTestResult(mlps, catsf,i))
print(getTestResult(svms, catsf,i))
print(getTestResult(rfs, catsf,i))
print(getTestResult(xgbs, catsf,i))
print(getTestResult(cats, catsf,i))

i = 2
print(getTestResult(mlps, catsf,i))
print(getTestResult(svms, catsf,i))
print(getTestResult(rfs, catsf,i))
print(getTestResult(xgbs, catsf,i))
print(getTestResult(cats, catsf,i))

6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
6.301848221392269e-08
None
