# Boosting 
是一種常用的機器學習集成方法，旨在通過將多個弱學習器（通常是決策樹）結合起來，以提高模型的準確度。Boosting 通常是以序列化的方式進行訓練，這意味著每個後續模型都會關注前一個模型的錯誤，並試圖將這些錯誤進行修正。Boosting 的代表算法有 AdaBoost、Gradient Boosting 和 XGBoost 等

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# 載入資料集並建立 pandas DataFrame
path_to_data = 'https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data'
column_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'accep']

df = pd.read_csv(path_to_data, names=column_names)
target_column = 'accep'
raw_feature_columns = [col for col in column_names if col != target_column]

# 將特徵欄位轉換為虛擬變數（dummy variables）
X = pd.get_dummies(df[raw_feature_columns], drop_first=True)

# 將目標欄位轉換為二元變數；如果是 'unacc'，設為 0，否則設為 1
df[target_column] = np.where(df[target_column] == 'unacc', 0, 1)
y = df[target_column]

# 將資料集分為訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123, test_size=0.3)

# 1. 使用 DecisionTreeClassifier 建立 AdaBoost 基本模型，並顯示其參數
decision_stump = DecisionTreeClassifier(max_depth=1)
ada_classifier = AdaBoostClassifier(estimator=decision_stump, n_estimators=5)

print("AdaBoost Classifier parameters:")
print(ada_classifier.get_params())

# 訓練 AdaBoost 模型並預測測試集
ada_classifier.fit(X_train, y_train)
y_pred_ada = ada_classifier.predict(X_test)

# 2. 使用 GradientBoostingClassifier 建立 Gradient Boosting 模型
grad_classifier = GradientBoostingClassifier(n_estimators=15)

print("\nGradient Boosting Classifier parameters:")
print(grad_classifier.get_params())

# 訓練 Gradient Boosting 模型並預測測試集
grad_classifier.fit(X_train, y_train)
y_pred_grad = grad_classifier.predict(X_test)

# 3. 計算 AdaBoost 模型的準確度、精確度、召回率和 F1 分數
accuracy_ada = accuracy_score(y_test, y_pred_ada)
precision_ada = precision_score(y_test, y_pred_ada)
recall_ada = recall_score(y_test, y_pred_ada)
f1_ada = f1_score(y_test, y_pred_ada)

print(f'\nAdaBoost - Test set accuracy: {accuracy_ada}')
print(f'AdaBoost - Test set precision: {precision_ada}')
print(f'AdaBoost - Test set recall: {recall_ada}')
print(f'AdaBoost - Test set f1-score: {f1_ada}')

# 計算 Gradient Boosting 模型的準確度、精確度、召回率和 F1 分數
accuracy_grad = accuracy_score(y_test, y_pred_grad)
precision_grad = precision_score(y_test, y_pred_grad)
recall_grad = recall_score(y_test, y_pred_grad)
f1_grad = f1_score(y_test, y_pred_grad)

print(f'\nGradient Boosting - Test set accuracy: {accuracy_grad}')
print(f'Gradient Boosting - Test set precision: {precision_grad}')
print(f'Gradient Boosting - Test set recall: {recall_grad}')
print(f'Gradient Boosting - Test set f1-score: {f1_grad}')

# 4. 輸出混淆矩陣
test_conf_matrix = pd.DataFrame(
    confusion_matrix(y_test, y_pred_ada, labels=[1, 0]), 
    index=['actual yes', 'actual no'], 
    columns=['predicted yes', 'predicted no']
)

print(f'\nConfusion Matrix:\n{test_conf_matrix.to_string()}')


AdaBoost Classifier parameters:
{'algorithm': 'SAMME.R', 'base_estimator': 'deprecated', 'estimator__ccp_alpha': 0.0, 'estimator__class_weight': None, 'estimator__criterion': 'gini', 'estimator__max_depth': 1, 'estimator__max_features': None, 'estimator__max_leaf_nodes': None, 'estimator__min_impurity_decrease': 0.0, 'estimator__min_samples_leaf': 1, 'estimator__min_samples_split': 2, 'estimator__min_weight_fraction_leaf': 0.0, 'estimator__random_state': None, 'estimator__splitter': 'best', 'estimator': DecisionTreeClassifier(max_depth=1), 'learning_rate': 1.0, 'n_estimators': 5, 'random_state': None}

Gradient Boosting Classifier parameters:
{'ccp_alpha': 0.0, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.1, 'loss': 'log_loss', 'max_depth': 3, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 15, 'n_iter_no_change': None, 'random_state': None, 'subs

In [3]:
import datetime

current_date = datetime.datetime.now().strftime("%Y年%m月%d日")
print(f"更新日期: {current_date}")

更新日期: 2024年12月01日
