# 模型演算者第五章（進階）：分類模型與混淆矩陣
使用 sklearn 訓練分類模型並進行評估

## 1. 匯入資料與套件

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# 載入資料
df = pd.read_csv('customer_churn.csv')
X = df[['monthly_fee', 'months_with_company']]
y = df['churn']

## 2. 資料分割

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## 3. 建立並訓練模型

In [None]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

## 4. 預測與評估

In [None]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)
disp.plot(cmap='Blues')
plt.title('混淆矩陣')
plt.tight_layout()
plt.savefig('confusion_matrix.png')
plt.show()