In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, accuracy_score

from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

import numpy as np
import pandas as pd

## Regression

In [60]:
boston

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


In [61]:
# Boston 데이터셋
boston = pd.read_csv("data/Boston.csv")

# scaling
scaler = MinMaxScaler()
scaler.fit(boston)
boston = scaler.transform(boston)
boston

X_boston = boston[:, :-1]
y_boston = boston[:, -1]
print(X_boston.shape, y_boston.shape)

(506, 13) (506,)


In [62]:
# 데이터셋 분할
X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(X_boston, y_boston, test_size=0.3, random_state=42)

In [74]:
# Bagging Regressor
bagging_reg = BaggingRegressor(base_estimator=DecisionTreeRegressor(random_state=42), random_state=42)
bagging_reg.fit(X_train_b, y_train_b)
y_pred_bagging = bagging_reg.predict(X_test_b)
mse_bagging = mean_squared_error(y_test_b, y_pred_bagging)

# Random Forest Regressor
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train_b, y_train_b)
y_pred_rf = rf_reg.predict(X_test_b)
mse_rf = mean_squared_error(y_test_b, y_pred_rf)

# Ada Boosting Regressor
ab_reg = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(random_state=42), random_state=42)
ab_reg.fit(X_train_b, y_train_b)
y_pred_ab = ab_reg.predict(X_test_b)
mse_ab = mean_squared_error(y_test_b, y_pred_ab)

# Gradient Boosting Regressor
gb_reg = GradientBoostingRegressor(random_state=42)
gb_reg.fit(X_train_b, y_train_b)
y_pred_gb = gb_reg.predict(X_test_b)
mse_gb = mean_squared_error(y_test_b, y_pred_gb)

# 출력
print("Bagging:\t  ", round(mse_bagging, 5))
print("Random Forest:\t  ", round(mse_rf, 5))
print("Ada Boosting:\t  ", round(mse_ab, 5))
print("Gradient Boosting:", round(mse_gb, 5))

Bagging:	   0.00509
Random Forest:	   0.00461
Ada Boosting:	   0.0056
Gradient Boosting: 0.00388


## Classification

In [65]:
# Pima 인디언 당뇨병 데이터셋
pima = pd.read_csv("data/pima.csv")
X_pima = pima.drop(["Outcome"], axis=1)
y_pima = pima["Outcome"]
print(X_pima.shape, y_pima.shape)

(768, 8) (768,)


In [66]:
# 데이터셋 분할
X_train_p, X_test_p, y_train_p, y_test_p = train_test_split(X_pima, y_pima, test_size=0.3, random_state=42)

In [71]:
# Bagging Classifier
bagging_clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(random_state=42), random_state=42)
bagging_clf.fit(X_train_p, y_train_p)
y_pred_bagging_clf = bagging_clf.predict(X_test_p)
acc_bagging = accuracy_score(y_test_p, y_pred_bagging_clf)

# Random Forest Classifier
rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(X_train_p, y_train_p)
y_pred_rf_clf = rf_clf.predict(X_test_p)
acc_rf = accuracy_score(y_test_p, y_pred_rf_clf)

# Ada Boosting Classifier
ab_clf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(random_state=42), random_state=42)
ab_clf.fit(X_train_p, y_train_p)
y_pred_ab_clf = ab_clf.predict(X_test_p)
acc_ab = accuracy_score(y_test_p, y_pred_ab_clf)


# Gradient Boosting Classifier
gb_clf = GradientBoostingClassifier(random_state=42)
gb_clf.fit(X_train_p, y_train_p)
y_pred_gb_clf = gb_clf.predict(X_test_p)
acc_gb = accuracy_score(y_test_p, y_pred_gb_clf)

# 출력
print("Bagging:\t  ", round(acc_bagging*100, 3))
print("Random Forest:\t  ", round(acc_rf*100, 3))
print("Ada Boosting:\t  ", round(acc_ab*100, 3))
print("Gradient Boosting:", round(acc_gb*100, 3))

Bagging:	   72.727
Random Forest:	   75.325
Ada Boosting:	   71.861
Gradient Boosting: 74.892
