In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import numpy as np
import pandas as pd

In [4]:
from xgboost import XGBClassifier
from lightgbm.sklearn import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [5]:
X = np.load("./tatanic_X_train.npy")
y = np.load("./tatanic_y_train.npy")

In [6]:
estimator1 = XGBClassifier(max_depth=3, learning_rate=0.5, n_estimators=50, n_jobs=-1)
estimator2 = LGBMClassifier(max_depth=2, learning_rate=0.5, n_estimators=50, n_jobs=-1)
estimator3 = RandomForestClassifier(n_estimators=500, max_depth=3, n_jobs=-1)
estimator4 = SVC(probability=True)
estimator5 = MLPClassifier(hidden_layer_sizes=(512,256, 32))

In [7]:
base_estimators = [estimator1, estimator2, estimator3, estimator4, estimator5]

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) # subset1 = train, 2 = test

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((533, 27), (356, 27), (533,), (356,))

In [10]:
for estimator in base_estimators:
            estimator.fit(X_train, y_train)

In [11]:
base_estimators[0].predict_proba(X_test)

array([[8.96916449e-01, 1.03083551e-01],
       [3.50505114e-02, 9.64949489e-01],
       [8.86556208e-01, 1.13443799e-01],
       [7.00740099e-01, 2.99259871e-01],
       [6.82833791e-01, 3.17166179e-01],
       [9.70499158e-01, 2.95008402e-02],
       [9.69199181e-01, 3.08007989e-02],
       [4.83441949e-02, 9.51655805e-01],
       [9.87133384e-01, 1.28665995e-02],
       [9.70299006e-01, 2.97009796e-02],
       [9.56243992e-01, 4.37559858e-02],
       [1.65725946e-01, 8.34274054e-01],
       [9.49824274e-01, 5.01757152e-02],
       [8.81482542e-01, 1.18517473e-01],
       [9.89593089e-01, 1.04069291e-02],
       [9.53805149e-01, 4.61948626e-02],
       [8.21174085e-01, 1.78825900e-01],
       [5.44881225e-02, 9.45511878e-01],
       [4.18843806e-01, 5.81156194e-01],
       [8.68553579e-01, 1.31446406e-01],
       [8.36073816e-01, 1.63926184e-01],
       [8.22206497e-01, 1.77793518e-01],
       [9.12549913e-01, 8.74500722e-02],
       [3.72189283e-03, 9.96278107e-01],
       [9.433577

In [12]:
meta_train_set = np.array([estimator.predict(X_test) for estimator in base_estimators]).T

In [13]:
meta_train_set # 356 x 5 (0, 1 값)

array([[0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1.],
       [0., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [14]:
from sklearn.model_selection import cross_val_score

In [15]:
for estimator in base_estimators:
    result = cross_val_score(estimator, meta_train_set, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.8459523809523809
0.8571428571428571
0.8346825396825397
0.8403174603174601
0.8515079365079365


In [16]:
meta_train_set2 = np.array([estimator.predict_proba(X_test)[:,1] for estimator in base_estimators]).T

In [17]:
meta_train_set2 # 확률값

array([[0.10308355, 0.09543753, 0.16146157, 0.15797022, 0.05072441],
       [0.96494949, 0.96364846, 0.84243441, 0.82233538, 0.99999957],
       [0.1134438 , 0.13207876, 0.19234638, 0.15014122, 0.16712905],
       ...,
       [0.193468  , 0.19001659, 0.21429586, 0.15293937, 0.0407104 ],
       [0.11788797, 0.16160413, 0.16710508, 0.16171437, 0.30355288],
       [0.05863315, 0.0754453 , 0.13984346, 0.15731554, 0.15004984]])

In [18]:
for estimator in base_estimators:
    result = cross_val_score(estimator, meta_train_set2, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.7925396825396825
0.8038095238095238
0.8261904761904761
0.8401587301587302
0.8402380952380952


In [19]:
new_X_test = np.concatenate([X_test, meta_train_set2], axis = 1)
new_X_test.shape

(356, 32)

In [20]:
for estimator in base_estimators:
    result = cross_val_score(estimator, new_X_test, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.7756349206349207
0.7815079365079365
0.8316666666666667
0.8374603174603175
0.7838888888888889
