# MNIST 手寫辨識：從零實作 CNN（NumPy）與 Keras CNN 版本

這份 Notebook 包含兩個主要部分：
1. **只使用 pandas + numpy + 標準函式庫，從零實作簡易 CNN**（教學示範，效能不佳但可理解原理）。
2. **使用 TensorFlow Keras 的標準 CNN 模型**，適合實際訓練與應用。

你可以在 Colab 直接上傳 MNIST CSV 檔，或用 Keras 內建的 MNIST 資料集。

## 0. 基本環境設定

- 建議在 Google Colab 執行。
- 如果你使用 CSV 版本的 MNIST，請先把 `mnist_train.csv`、`mnist_test.csv` 上傳到 Colab。

In [None]:
# 如果在 Colab，可以先掛載 Google Drive（可選）：
# from google.colab import drive
# drive.mount('/content/drive')

import numpy as np
import pandas as pd
import math

np.random.seed(42)
print("NumPy / Pandas 已載入完成")

NumPy / Pandas 已載入完成


## Part 1. 只用 pandas + numpy 的簡易 CNN

這一部分示範：
- 從 CSV 載入 MNIST
- 自行實作：卷積層、ReLU、MaxPooling、全連接層、Softmax + Cross-Entropy
- 使用簡單 SGD 訓練

> **提醒：** 這是教學示範，沒有任何最佳化，速度會比 Keras 版慢很多。

### 1.1 載入與前處理 MNIST CSV

假設：
- `mnist_train.csv`、`mnist_test.csv`
- 第一欄為 `label` (0–9)
- 其餘 784 欄為像素值（0–255）

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# === 請確認檔名與路徑 ===
train_path = "/content/drive/MyDrive/Colab Notebooks/Vibe Coding/Data/mnist_train.csv"  # 例如："/content/mnist_train.csv"
test_path  = "/content/drive/MyDrive/Colab Notebooks/Vibe Coding/Data/mnist_test.csv"   # 例如："/content/mnist_test.csv"

train_df = pd.read_csv(train_path)
test_df  = pd.read_csv(test_path)

print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)

# 取出 y 與 X
y_train = train_df.iloc[:, 0].values
X_train = train_df.iloc[:, 1:].values.astype(np.float32)

y_test = test_df.iloc[:, 0].values
X_test = test_df.iloc[:, 1:].values.astype(np.float32)

# 正規化到 0~1
X_train /= 255.0
X_test  /= 255.0

# 轉成 (N, 1, 28, 28)
X_train = X_train.reshape(-1, 1, 28, 28)
X_test  = X_test.reshape(-1, 1, 28, 28)

num_classes = 10

def one_hot(y, num_classes=10):
    N = y.shape[0]
    oh = np.zeros((N, num_classes), dtype=np.float32)
    oh[np.arange(N), y] = 1.0
    return oh

y_train_oh = one_hot(y_train, num_classes)
y_test_oh  = one_hot(y_test, num_classes)

print("X_train:", X_train.shape, "y_train:", y_train.shape)

Train shape: (60000, 785)
Test shape: (10000, 785)
X_train: (60000, 1, 28, 28) y_train: (60000,)


### 1.2 自行實作 CNN 各層

包含：
- `conv_forward` / `conv_backward`
- `relu_forward` / `relu_backward`
- `maxpool_forward` / `maxpool_backward`
- `linear_forward` / `linear_backward`
- `softmax_cross_entropy_loss`

In [18]:
### 卷積層
def conv_forward(X, W, b, pad=1, stride=1):
    """X: (N, C_in, H, W)
       W: (C_out, C_in, K, K)
       b: (C_out,)"""
    N, C_in, H, W_in = X.shape
    C_out, _, K, _ = W.shape

    X_pad = np.pad(X, ((0,0),(0,0),(pad,pad),(pad,pad)), mode='constant')
    _, _, H_pad, W_pad = X_pad.shape

    H_out = (H_pad - K) // stride + 1
    W_out = (W_pad - K) // stride + 1

    out = np.zeros((N, C_out, H_out, W_out), dtype=np.float32)

    for n in range(N):
        for c_out in range(C_out):
            for i in range(H_out):
                for j in range(W_out):
                    hs = i * stride
                    ws = j * stride
                    region = X_pad[n, :, hs:hs+K, ws:ws+K]
                    out[n, c_out, i, j] = np.sum(region * W[c_out]) + b[c_out]

    cache = (X_pad, W, b, pad, stride)
    return out, cache

def conv_backward(dout, cache):
    X_pad, W, b, pad, stride = cache
    N, C_in, H_pad, W_pad = X_pad.shape
    C_out, _, K, _ = W.shape
    _, _, H_out, W_out = dout.shape

    dX_pad = np.zeros_like(X_pad, dtype=np.float32)
    dW = np.zeros_like(W, dtype=np.float32)
    db = np.zeros_like(b, dtype=np.float32)

    for n in range(N):
        for c_out in range(C_out):
            for i in range(H_out):
                for j in range(W_out):
                    hs = i * stride
                    ws = j * stride
                    region = X_pad[n, :, hs:hs+K, ws:ws+K]

                    db[c_out] += dout[n, c_out, i, j]
                    dW[c_out] += dout[n, c_out, i, j] * region
                    dX_pad[n, :, hs:hs+K, ws:ws+K] += dout[n, c_out, i, j] * W[c_out]

    if pad > 0:
        dX = dX_pad[:, :, pad:-pad, pad:-pad]
    else:
        dX = dX_pad

    return dX, dW, db

### ReLU
def relu_forward(X):
    out = np.maximum(0, X)
    cache = X
    return out, cache

def relu_backward(dout, cache):
    X = cache
    dX = dout * (X > 0)
    return dX

### MaxPool 2x2
def maxpool_forward(X, size=2, stride=2):
    N, C, H, W = X.shape
    H_out = (H - size) // stride + 1
    W_out = (W - size) // stride + 1

    out = np.zeros((N, C, H_out, W_out), dtype=np.float32)
    mask = np.zeros_like(X, dtype=np.float32)

    for n in range(N):
        for c in range(C):
            for i in range(H_out):
                for j in range(W_out):
                    hs = i * stride
                    ws = j * stride
                    window = X[n, c, hs:hs+size, ws:ws+size]
                    m = np.max(window)
                    out[n, c, i, j] = m
                    max_pos = np.unravel_index(np.argmax(window), window.shape)
                    mask[n, c, hs + max_pos[0], ws + max_pos[1]] = 1.0

    cache = (mask, size, stride)
    return out, cache

def maxpool_backward(dout, cache):
    mask, size, stride = cache
    N, C, H, W = mask.shape
    _, _, H_out, W_out = dout.shape

    dX = np.zeros_like(mask, dtype=np.float32)

    for n in range(N):
        for c in range(C):
            for i in range(H_out):
                for j in range(W_out):
                    hs = i * stride
                    ws = j * stride
                    dX[n, c, hs:hs+size, ws:ws+size] += dout[n, c, i, j] * mask[n, c, hs:hs+size, ws:ws+size]

    return dX

### 全連接層
def linear_forward(X, W, b):
    out = X @ W + b
    cache = (X, W, b)
    return out, cache

def linear_backward(dout, cache):
    X, W, b = cache
    dX = dout @ W.T
    dW = X.T @ dout
    db = np.sum(dout, axis=0)
    return dX, dW, db

### Softmax + Cross-Entropy
def softmax_cross_entropy_loss(logits, y_onehot):
    logits_shift = logits - np.max(logits, axis=1, keepdims=True)
    exp_scores = np.exp(logits_shift)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    N = logits.shape[0]
    loss = -np.sum(y_onehot * np.log(probs + 1e-9)) / N

    dlogits = (probs - y_onehot) / N
    return loss, dlogits, probs

print("CNN 各基本層函式定義完成")

CNN 各基本層函式定義完成


### 1.3 建立簡易 CNN 模型（1 層 Conv + MaxPool + FC）

架構：
- Conv2D：輸入 (1, 28, 28) → 8 個 3×3 filter (padding=1)
- ReLU
- MaxPool2D：2×2, stride=2 → 特徵圖大小 14×14
- Flatten
- 全連接層：8×14×14 → 10 類別
- Softmax + Cross-Entropy

In [19]:
# 定義模型結構與前向/反向
C_in = 1
C_out = 8
K = 3
pad = 1
stride = 1

H_out_conv = 28  # padding=1, stride=1 -> 高度不變
W_out_conv = 28
H_pool = H_out_conv // 2
W_pool = W_out_conv // 2
D_flat = C_out * H_pool * W_pool  # 8 * 14 * 14 = 1568

rng = np.random.default_rng(42)
W1 = rng.normal(0, 0.1, size=(C_out, C_in, K, K)).astype(np.float32)
b1 = np.zeros(C_out, dtype=np.float32)
W2 = rng.normal(0, 0.1, size=(D_flat, num_classes)).astype(np.float32)
b2 = np.zeros(num_classes, dtype=np.float32)

def forward_pass(X):
    # Conv
    z1, cache_conv = conv_forward(X, W1, b1, pad=pad, stride=stride)
    # ReLU
    a1, cache_relu = relu_forward(z1)
    # MaxPool
    p1, cache_pool = maxpool_forward(a1, size=2, stride=2)
    # Flatten
    N = X.shape[0]
    flat = p1.reshape(N, -1)
    cache_flat = p1.shape
    # Linear
    logits, cache_fc = linear_forward(flat, W2, b2)

    caches = (cache_conv, cache_relu, cache_pool, cache_flat, cache_fc)
    return logits, caches

def backward_pass(dlogits, caches):
    global W1, b1, W2, b2
    cache_conv, cache_relu, cache_pool, cache_flat, cache_fc = caches

    dflat, dW2, db2_ = linear_backward(dlogits, cache_fc)
    dpool = dflat.reshape(cache_flat)
    da1 = maxpool_backward(dpool, cache_pool)
    dz1 = relu_backward(da1, cache_relu)
    dX, dW1, db1_ = conv_backward(dz1, cache_conv)

    return dW1, db1_, dW2, db2_

def accuracy(X, y_true, batch_size=256):
    N = X.shape[0]
    correct = 0
    total = 0
    for i in range(0, N, batch_size):
        X_batch = X[i:i+batch_size]
        y_batch = y_true[i:i+batch_size]
        logits, _ = forward_pass(X_batch)
        preds = np.argmax(logits, axis=1)
        correct += np.sum(preds == y_batch)
        total += y_batch.shape[0]
    return correct / total

print("簡易 CNN 模型初始化完成")

簡易 CNN 模型初始化完成


### 1.4 訓練迴圈（示範）

- 為了節省時間，先只用部分訓練資料（例如前 10000 筆）。
- 可以自行把 `N_train_use` 改為全部樣本數。

In [None]:
# 訓練設定
N_train_use = 10000  # 可以改成 len(X_train) 使用全部資料
X_tr = X_train[:N_train_use]
y_tr = y_train[:N_train_use]
y_tr_oh = y_train_oh[:N_train_use]

learning_rate = 0.01
num_epochs = 3
batch_size = 64

for epoch in range(num_epochs):
    idx = np.random.permutation(N_train_use)
    X_tr = X_tr[idx]
    y_tr = y_tr[idx]
    y_tr_oh = y_tr_oh[idx]

    total_loss = 0.0
    num_batches = 0

    for i in range(0, N_train_use, batch_size):
        X_batch = X_tr[i:i+batch_size]
        y_batch_oh = y_tr_oh[i:i+batch_size]

        logits, caches = forward_pass(X_batch)
        loss, dlogits, probs = softmax_cross_entropy_loss(logits, y_batch_oh)
        total_loss += loss
        num_batches += 1

        dW1, db1_, dW2, db2_ = backward_pass(dlogits, caches)

        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1_
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2_

    train_acc = accuracy(X_tr, y_tr)
    test_acc = accuracy(X_test, y_test)

    print(f"Epoch {epoch+1}/{num_epochs} "
          f"Loss = {total_loss/num_batches:.4f}, "
          f"Train Acc = {train_acc:.4f}, Test Acc = {test_acc:.4f}")

---
## Part 2. 使用 TensorFlow Keras 的 CNN

這一部分改用 TensorFlow Keras：
- 直接載入 Keras 內建 MNIST
- 建立標準 CNN 模型
- 編譯與訓練
- 評估與簡單預測


### 2.1 載入與前處理 MNIST（Keras 內建）

In [13]:
import tensorflow as tf
from tensorflow.keras import layers, models

print("TensorFlow 版本：", tf.__version__)

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train = x_train.astype('float32') / 255.0
x_test  = x_test.astype('float32') / 255.0

# 增加通道維度 (N, 28, 28, 1)
x_train = x_train[..., tf.newaxis]
x_test  = x_test[..., tf.newaxis]

num_classes = 10
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)

TensorFlow 版本： 2.19.0
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
x_train shape: (60000, 28, 28, 1)
y_train shape: (60000,)


### 2.2 建立 CNN 模型

In [14]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_classes, activation='softmax'),
])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### 2.3 編譯與訓練

In [15]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'],
)

history = model.fit(
    x_train, y_train,
    epochs=5,
    batch_size=128,
    validation_split=0.1,
    verbose=1
)

test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f"Test accuracy: {test_acc:.4f}")

Epoch 1/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 100ms/step - accuracy: 0.8089 - loss: 0.6057 - val_accuracy: 0.9785 - val_loss: 0.0725
Epoch 2/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 96ms/step - accuracy: 0.9782 - loss: 0.0681 - val_accuracy: 0.9848 - val_loss: 0.0538
Epoch 3/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 95ms/step - accuracy: 0.9856 - loss: 0.0457 - val_accuracy: 0.9872 - val_loss: 0.0440
Epoch 4/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 98ms/step - accuracy: 0.9891 - loss: 0.0356 - val_accuracy: 0.9847 - val_loss: 0.0495
Epoch 5/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 94ms/step - accuracy: 0.9920 - loss: 0.0270 - val_accuracy: 0.9902 - val_loss: 0.0358
Test accuracy: 0.9893


### 2.4 預測與範例輸出

In [17]:
import numpy as np

idx = np.random.choice(len(x_test), size=5, replace=False)
x_sample = x_test[idx]
y_true = y_test[idx]

y_pred_prob = model.predict(x_sample)
y_pred = np.argmax(y_pred_prob, axis=1)

print("真實標籤：", y_true)
print("預測結果：", y_pred)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
真實標籤： [6 5 4 2 9]
預測結果： [6 5 4 2 9]
