In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import HistGradientBoostingClassifier

In [2]:
fetal = pd.read_csv("../fetal_health.csv")
X = fetal.drop(["fetal_health"],axis=1).values
y = fetal["fetal_health"].values.ravel().astype(int) - 1

In [3]:
fetal[fetal.duplicated()]
fetal_dup = fetal.drop_duplicates(subset = None , keep = 'first', inplace = False)
corr = fetal.corr()

In [4]:
#X_remove = fetal[corr[abs(corr['fetal_health']) > 0.10]['fetal_health'].index]
#X_cor = X_remove.drop(['fetal_health'], axis=1)
#y_cor = fetal['fetal_health']

#s_scaler = preprocessing.StandardScaler()
#X_trans= s_scaler.fit_transform(X)
X_train, X_test, y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=123, stratify = y)

In [5]:
def get_results_simple(model, prediction): # 각 지표들 리턴해주는 함수
    result_dict = dict()
    
    test_accuracy = round(accuracy_score(y_test, prediction),4)
    recall = round(recall_score(y_test, prediction, average = "weighted", labels = np.unique(prediction)), 3)
    precision = round(precision_score(y_test, prediction, average = "weighted", labels = np.unique(prediction)), 3)
    f1 = round(f1_score(y_test, prediction, average = "micro", labels = np.unique(prediction)), 3)

    result_dict["test_accuracy"] = test_accuracy
    result_dict["recall"] = recall
    result_dict["f1_score"] = f1
    result_dict["precision"] = precision
    
    return result_dict

## Stacking (2 models)

In [6]:
boost = XGBClassifier(random_state=123, verbosity=0, use_label_encoder=False)
boost.fit(X_train, y_train)
print("Training Accuracy: %0.3f" % boost.score(X_train, y_train))
print("Test Accuracy: %0.3f" % boost.score(X_test, y_test))

Training Accuracy: 1.000
Test Accuracy: 0.958


In [7]:
prediction = boost.predict(X_test)
gb_result = get_results_simple(boost, prediction)
gb_result['train_accuracy'] = round(boost.score(X_train, y_train),3)
gb_result 

{'test_accuracy': 0.9577,
 'recall': 0.958,
 'f1_score': 0.958,
 'precision': 0.957,
 'train_accuracy': 1.0}

In [7]:
clf1 = RandomForestClassifier(random_state=123)
clf2 = boost

lr = LogisticRegression(random_state=123)
estimators = [('clf1', clf1),
              ('clf2', clf2)]

sclf_2 = StackingClassifier(estimators=estimators, final_estimator=lr, cv=10)

sclf_2.fit(X_train, y_train)

StackingClassifier(cv=10,
                   estimators=[('clf1',
                                RandomForestClassifier(random_state=123)),
                               ('clf2',
                                XGBClassifier(base_score=0.5, booster='gbtree',
                                              colsample_bylevel=1,
                                              colsample_bynode=1,
                                              colsample_bytree=1,
                                              enable_categorical=False, gamma=0,
                                              gpu_id=-1, importance_type=None,
                                              interaction_constraints='',
                                              learning_rate=0.300000012,
                                              max_delta_step=0, max_depth=6,
                                              min_child_weight=1, missing=nan,
                                              monotone_constraints='()',
    

In [8]:
print(f"Training Accuracy: {sclf_2.score(X_train, y_train):0.3f}")
print(f"Test Accuracy: {sclf_2.score(X_test, y_test):0.3f}")

Training Accuracy: 1.000
Test Accuracy: 0.956


In [9]:
prediction = sclf_2.predict(X_test)
stacking_2_result = get_results_simple(sclf_2, prediction)
stacking_2_result['train_accuracy'] = round(sclf_2.score(X_train, y_train),3)
stacking_2_result 

{'test_accuracy': 0.9561,
 'recall': 0.956,
 'f1_score': 0.956,
 'precision': 0.955,
 'train_accuracy': 1.0}

## Stacking (5 models)

In [10]:
clf1 = KNeighborsClassifier(n_neighbors=5)
clf2 = RandomForestClassifier(random_state=123)
clf3 = AdaBoostClassifier(random_state=123)
clf4 = DecisionTreeClassifier(random_state=123,max_depth=None)
clf5 = HistGradientBoostingClassifier(random_state=123)

lr = LogisticRegression(random_state=123)

estimators = [('clf1', clf1),
              ('clf2', clf2),
              ('clf3', clf3),
              ('clf4', clf4),
              ('clf5', clf5)]

sclf_5 = StackingClassifier(estimators=estimators, final_estimator=lr, cv = 10)

sclf_5.fit(X_train, y_train)

StackingClassifier(cv=10,
                   estimators=[('clf1', KNeighborsClassifier()),
                               ('clf2',
                                RandomForestClassifier(random_state=123)),
                               ('clf3', AdaBoostClassifier(random_state=123)),
                               ('clf4',
                                DecisionTreeClassifier(random_state=123)),
                               ('clf5',
                                HistGradientBoostingClassifier(random_state=123))],
                   final_estimator=LogisticRegression(random_state=123))

In [11]:
print("Training Accuracy: %0.3f" % sclf_5.score(X_train, y_train))
print("Test Accuracy: %0.3f" % sclf_5.score(X_test, y_test))

Training Accuracy: 1.000
Test Accuracy: 0.956


In [12]:
prediction = sclf_5.predict(X_test)
stacking_5_result = get_results_simple(sclf_5, prediction)
stacking_5_result['train_accuracy'] = round(sclf_5.score(X_train, y_train),3)
stacking_5_result 

{'test_accuracy': 0.9561,
 'recall': 0.956,
 'f1_score': 0.956,
 'precision': 0.955,
 'train_accuracy': 1.0}