In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons, make_circles, make_blobs
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import argparse

## 1. `generate_dataset` — 多種二維分類資料產生器

**功能**  
- 依照使用者指定的 `dataset_type`，動態產生常見的二維 toy data：  
  - `moons`、`circles`、`blobs`、`linear`、`xor`

**重要參數**  
- `dataset_type`：資料集類型  
- `n_samples`：產生的總樣本數（可指定成 tuple 分別控制各類數量）  
- `noise`：雜訊標準差（僅對 `moons`、`circles`、`xor` 有效）  
- `centers`：`blobs` 中群聚中心數量  
- `factor`：`circles` 內外圈半徑比例  

In [None]:
def generate_dataset(dataset_type='moons',
                     n_samples=200,
                     noise=0.2,
                     centers=3,
                     factor=0.5,
                     random_state=42):
    """產生多種二維分類資料"""
    if dataset_type == 'moons':
        X, y = make_moons(n_samples=n_samples,
                          noise=noise,
                          random_state=random_state)
    elif dataset_type == 'circles':
        X, y = make_circles(n_samples=n_samples,
                            noise=noise,
                            factor=factor,
                            random_state=random_state)
    elif dataset_type == 'blobs':
        X, y = make_blobs(n_samples=n_samples,
                          centers=centers,
                          random_state=random_state)
    elif dataset_type == 'linear':
        X, y = make_blobs(n_samples=n_samples,
                          centers=2,
                          cluster_std=0.5,
                          random_state=random_state)
    elif dataset_type == 'xor':
        np.random.seed(random_state)
        base = np.array([[0,0],[0,1],[1,0],[1,1]])
        labels = np.array([0,1,1,0])
        X = base + noise * np.random.randn(*base.shape)
        y = labels
    else:
        raise ValueError(f"不支援的 dataset_type: {dataset_type}")
    return X, y

## 2. `plot_dataset` — 資料分佈視覺化

**功能**  
- 將 2D 特徵 `X` 及標籤 `y` 繪製成散佈圖，方便觀察資料分佈形狀。  

**重要參數**  
- `X` (array): 特徵矩陣，shape=(n_samples, 2)  
- `y` (array): 標籤向量，shape=(n_samples,)  
- `title` (str): 圖片標題  


In [None]:
def plot_dataset(X, y, title='Dataset'):
    """繪製原始資料分佈"""
    plt.figure(figsize=(5,5))
    plt.scatter(X[:,0], X[:,1], c=y, cmap='bwr',
                edgecolors='k', s=50)
    plt.title(title)
    plt.xlabel('$x_1$')
    plt.ylabel('$x_2$')
    plt.grid(alpha=0.3)
    plt.show()

## 3a. `train_mlp_sklearn` — 使用 Scikit-learn 訓練 MLP

**功能**  
- 利用 `sklearn.neural_network.MLPClassifier` 快速定義並訓練多層感知器 (MLP)。  

**重要參數**  
- `hidden_layers` (tuple): 隱藏層神經元數量，例如 `(10, 10)`  
- `activation` (str): 隱藏層激活函數 (`'identity'`,`'logistic'`,`'tanh'`,`'relu'`)  
- `solver` (str): 最佳化演算法 (`'lbfgs'`,`'sgd'`,`'adam'`)  
- `lr` (float): 初始學習率 (`learning_rate_init`)  
- `max_iter` (int): 最大迭代次數  
- `verbose` (bool): 是否顯示每次迭代的 loss  


In [None]:
def train_mlp_sklearn(X_train, y_train,
                      hidden_layers=(10,10),
                      activation='relu',
                      solver='adam',
                      lr=0.01,
                      max_iter=200,
                      random_state=42,
                      verbose=True):
    """使用 scikit-learn MLPClassifier 訓練"""
    clf = MLPClassifier(hidden_layer_sizes=hidden_layers,
                        activation=activation,
                        solver=solver,
                        learning_rate_init=lr,
                        max_iter=max_iter,
                        random_state=random_state,
                        verbose=verbose)
    clf.fit(X_train, y_train)
    return clf

## 3b. `train_mlp_numpy` — 純 NumPy 手寫 MLP

**功能**  
- 從零實作 MLP forward/backward，使用 MSE loss 及 SGD 更新，適合作為教學示範。  

**重要參數**  
- `hidden_layers` (tuple): 隱藏層神經元數量  
- `activation` (str): 隱藏層激活函數 (`'tanh'` 或 `'relu'`)  
- `lr` (float): 學習率  
- `max_iter` (int): 最大訓練迭代次數  
- `verbose` (bool): 是否印出每 N 次迭代的 loss  

In [None]:
def train_mlp_numpy(X_train, y_train,
                    hidden_layers=(10,10),
                    activation='tanh',
                    solver='sgd',
                    lr=0.01,
                    max_iter=200,
                    random_state=42,
                    verbose=True):
    """
    純 NumPy 實作的簡易 MLP（二分類，MSE loss）
    回傳 dict: weights, biases, loss_history, predict 函式
    """
    np.random.seed(random_state)
    n_samples, n_features = X_train.shape
    y = y_train.reshape(-1,1)

    # network dimensions
    dims = [n_features] + list(hidden_layers) + [1]

    # He initialization
    weights = [np.random.randn(dims[i], dims[i+1]) * np.sqrt(2/dims[i])
               for i in range(len(dims)-1)]
    biases = [np.zeros((1, dims[i+1])) for i in range(len(dims)-1)]

    # activation functions
    if activation == 'tanh':
        act = np.tanh
        act_grad = lambda x: 1 - np.tanh(x)**2
    elif activation == 'relu':
        act = lambda x: np.maximum(0, x)
        act_grad = lambda x: (x>0).astype(float)
    else:
        raise ValueError("activation 只支援 'tanh' 或 'relu'")

    sigmoid = lambda x: 1/(1+np.exp(-x))
    sigmoid_grad = lambda s: s*(1-s)

    loss_history = []

    for it in range(1, max_iter+1):
        # forward
        a = [X_train]
        z_list = []
        for W, b in zip(weights[:-1], biases[:-1]):
            z = a[-1] @ W + b
            z_list.append(z)
            a.append(act(z))
        z_out = a[-1] @ weights[-1] + biases[-1]
        z_list.append(z_out)
        a_out = sigmoid(z_out)
        a.append(a_out)

        # loss (MSE)
        loss = np.mean((a_out - y)**2)
        loss_history.append(loss)
        if verbose and it % 20 == 0:
            print(f"[NumPy] Iter {it:3d}, loss = {loss:.6f}")

        # backward
        grads_W, grads_b = [], []
        delta = (a_out - y) * sigmoid_grad(a_out)
        # output layer grads
        grads_W.insert(0, a[-2].T @ delta / n_samples)
        grads_b.insert(0, np.sum(delta, axis=0, keepdims=True) / n_samples)
        # hidden layers
        for l in range(len(hidden_layers)-1, -1, -1):
            delta = delta @ weights[l+1].T * act_grad(z_list[l])
            grads_W.insert(0, a[l].T @ delta / n_samples)
            grads_b.insert(0, np.sum(delta, axis=0, keepdims=True) / n_samples)

        # update
        for idx in range(len(weights)):
            weights[idx] -= lr * grads_W[idx]
            biases[idx]  -= lr * grads_b[idx]

    def predict(X):
        h = X
        for W, b in zip(weights[:-1], biases[:-1]):
            h = act(h @ W + b)
        out = sigmoid(h @ weights[-1] + biases[-1])
        return (out>0.5).astype(int).reshape(-1)

    return {
        'weights': weights,
        'biases': biases,
        'loss_history': loss_history,
        'predict': predict
    }

## 4. `plot_loss_curve` — 損失曲線繪製

**功能**  
- 自動判別 Sklearn 或 NumPy MLP，畫出每次迭代的 loss 變化。  

In [None]:
def plot_loss_curve(clf):
    """繪製訓練損失曲線 (Sklearn or NumPy)"""
    plt.figure(figsize=(6,4))
    if hasattr(clf, 'loss_curve_'):
        plt.plot(clf.loss_curve_, label='Sklearn Loss')
    else:
        plt.plot(clf['loss_history'], label='NumPy Loss')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training Loss Curve')
    plt.legend()
    plt.grid(alpha=0.3)
    plt.show()

## 5. `plot_decision_boundary` — 決策邊界示意

**功能**  
- 在整體輸入空間網格上用模型 `predict` 填色，並疊加訓練/測試點。  

In [None]:
def plot_decision_boundary(clf, X, y, X_train, y_train, X_test, y_test):
    """畫出決策邊界 (僅示範 Sklearn 版)"""
    xx, yy = np.meshgrid(
        np.linspace(X[:,0].min()-0.5, X[:,0].max()+0.5, 300),
        np.linspace(X[:,1].min()-0.5, X[:,1].max()+0.5, 300)
    )
    grid = np.c_[xx.ravel(), yy.ravel()]
    if hasattr(clf, 'predict'):
        Z = clf.predict(grid).reshape(xx.shape)
    else:
        Z = clf['predict'](grid).reshape(xx.shape)

    plt.figure(figsize=(6,6))
    plt.contourf(xx, yy, Z, alpha=0.3, cmap='bwr')
    plt.scatter(X_train[:,0], X_train[:,1], c=y_train, cmap='bwr',
                edgecolors='k', s=50, label='Train')
    plt.scatter(X_test[:,0], X_test[:,1], c=y_test, cmap='cool',
                marker='x', s=80, label='Test')
    plt.title("Decision Boundary")
    plt.xlabel('$x_1$')
    plt.ylabel('$x_2$')
    plt.legend()
    plt.show()

def evaluate_model(clf, X_train, y_train, X_test, y_test):
    """列印準確率"""
    if hasattr(clf, 'score'):
        tr = clf.score(X_train, y_train)
        te = clf.score(X_test, y_test)
    else:
        y_pred_tr = clf['predict'](X_train)
        y_pred_te = clf['predict'](X_test)
        tr = np.mean(y_pred_tr == y_train)
        te = np.mean(y_pred_te == y_test)
    print(f"Train accuracy: {tr:.3f}")
    print(f"Test accuracy:  {te:.3f}")

## 7. `__main__` — 主程式流程

1. **命令列參數解析**  
2. **資料產生與可視化**  
3. **資料切分 (Train/Test)**  
4. **Sklearn MLP 訓練、繪圖、評估**  
5. **NumPy MLP 訓練、繪圖、評估**  

In [1]:
if __name__ == '__main__':
    import sys
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='blobs',
                        choices=['moons','circles','blobs','linear','xor'],
                        help='選擇資料類型')
    parser.add_argument('--n_samples', type=int, default=200)
    parser.add_argument('--noise', type=float, default=0.2)
    parser.add_argument('--centers', type=int, default=3)
    parser.add_argument('--factor', type=float, default=0.5)
    args, _ = parser.parse_known_args()

    # 1. 產生並可視化資料
    X, y = generate_dataset(dataset_type=args.dataset,
                            n_samples=args.n_samples,
                            noise=args.noise,
                            centers=args.centers,
                            factor=args.factor)
    plot_dataset(X, y, title=f"data: {args.dataset}")

    # 2. 切分
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42)

    # 3a. 訓練 Sklearn MLP
    print("=== Sklearn MLP Training ===")
    clf_sk = train_mlp_sklearn(X_train, y_train)
    plot_loss_curve(clf_sk)
    plot_decision_boundary(clf_sk, X, y, X_train, y_train, X_test, y_test)
    evaluate_model(clf_sk, X_train, y_train, X_test, y_test)

    # 3b. 訓練 NumPy MLP
    print("\n=== NumPy MLP Training ===")
    clf_np = train_mlp_numpy(X_train, y_train,
                             hidden_layers=(10,10),
                             activation='tanh',
                             lr=0.05,
                             max_iter=200,
                             verbose=True)
    plot_loss_curve(clf_np)
    evaluate_model(clf_np, X_train, y_train, X_test, y_test)

NameError: name 'argparse' is not defined