In [None]:
# 1) 数据加载与向量化
import os, numpy as np
from pathlib import Path
from tensorflow import keras

NUM_WORDS = 10000
LOCAL_CACHE = Path("./reuters.npz") 

def load_reuters(num_words=NUM_WORDS):
    if LOCAL_CACHE.exists():
        print(f"检测到离线数据文件: {LOCAL_CACHE.resolve()}")
        with np.load(LOCAL_CACHE, allow_pickle=True) as f:
            x_train, y_train = f["x_train"], f["y_train"]
            x_test, y_test   = f["x_test"], f["y_test"]
        # 若存储的是对象数组（list of lists），需确保类型正确
        x_train = [np.array(seq, dtype="int64").tolist() for seq in x_train]
        x_test  = [np.array(seq, dtype="int64").tolist() for seq in x_test]
        return (x_train, y_train), (x_test, y_test)
    else:
        print("未找到离线 reuters.npz，尝试通过 Keras 在线下载（需要联网）...")
        return keras.datasets.reuters.load_data(num_words=num_words)

def vectorize(seqs, dim=NUM_WORDS):
    x = np.zeros((len(seqs), dim), dtype="float32")
    for i, seq in enumerate(seqs):
        # 过滤越界索引，保证鲁棒
        seq = [w for w in seq if 0 <= w < dim]
        x[i, seq] = 1.0
    return x

(x_train_seq, y_train), (x_test_seq, y_test) = load_reuters(NUM_WORDS)
x_train = vectorize(x_train_seq, NUM_WORDS)
x_test  = vectorize(x_test_seq,  NUM_WORDS)

num_classes = int(np.max([y_train.max(), y_test.max()])) + 1
print("训练/测试样本与类别数：", x_train.shape, x_test.shape, num_classes)

# One-Hot 标签（也可改用稀疏版本：sparse_categorical_crossentropy）
y_train_oh = keras.utils.to_categorical(y_train, num_classes=num_classes)
y_test_oh  = keras.utils.to_categorical(y_test,  num_classes=num_classes)

# 验证集切分
x_val, x_tr = x_train[:1000], x_train[1000:]
y_val, y_tr = y_train_oh[:1000], y_train_oh[1000:]

In [None]:
# 2) 模型构建
from tensorflow.keras import layers, regularizers

def build_model(units=64, hidden_layers=2, lr=1e-3, dropout=0.2, l2=0.0, input_dim=NUM_WORDS, num_classes=46):
    reg = regularizers.l2(l2) if l2 > 0 else None
    m = keras.Sequential([layers.Input(shape=(input_dim,))])
    for _ in range(hidden_layers):
        m.add(layers.Dense(units, activation="relu", kernel_regularizer=reg))
        if dropout > 0: m.add(layers.Dropout(dropout))
    m.add(layers.Dense(num_classes, activation="softmax"))
    m.compile(optimizer=keras.optimizers.RMSprop(learning_rate=lr),
              loss="categorical_crossentropy",
              metrics=["accuracy"])
    return m

model = build_model(units=64, hidden_layers=2, lr=1e-3, dropout=0.2, l2=0.0, input_dim=NUM_WORDS, num_classes=y_train_oh.shape[1])
model.summary()

In [None]:
# 3) 回调：早停、保存最佳、降学习率、TensorBoard
import time
log_dir = f"logs_ann1/{int(time.time())}"
callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True),
    keras.callbacks.ModelCheckpoint("ann1_best.keras", monitor="val_loss", save_best_only=True),
    keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2, min_lr=1e-6),
    keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1),
]
print("TensorBoard 日志目录：", log_dir)

In [None]:
# 4) 训练
history = model.fit(
    x_tr, y_tr,
    validation_data=(x_val, y_val),
    epochs=30,
    batch_size=512,
    callbacks=callbacks,
    verbose=2
)

# 可视化训练曲线
import matplotlib.pyplot as plt
hist = history.history
print("History keys:", hist.keys())

plt.figure()
plt.plot(hist.get("loss", []), label="train_loss")
plt.plot(hist.get("val_loss", []), label="val_loss")
plt.title("Loss")
plt.legend()
plt.show()

plt.figure()
plt.plot(hist.get("accuracy", []), label="train_acc")
plt.plot(hist.get("val_accuracy", []), label="val_acc")
plt.title("Accuracy")
plt.legend()
plt.show()

In [None]:
# 5) 在训练+验证上短轮次重训，然后测试
final_model = build_model(units=64, hidden_layers=2, lr=1e-3, dropout=0.2, input_dim=NUM_WORDS, num_classes=y_train_oh.shape[1])
final_model.fit(x_train, y_train_oh, epochs=9, batch_size=512, verbose=2)
test_loss, test_acc = final_model.evaluate(x_test, y_test_oh, verbose=0)
print(f"[FINAL] test_acc={test_acc:.4f}, test_loss={test_loss:.4f}")

In [None]:
# 6) 评价：分类报告与混淆矩阵
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np, matplotlib.pyplot as plt

y_pred = final_model.predict(x_test, verbose=0).argmax(axis=1)
print(classification_report(np.argmax(y_test_oh, axis=1), y_pred, digits=3))

cm = confusion_matrix(np.argmax(y_test_oh, axis=1), y_pred)
plt.figure(figsize=(8,8))
plt.imshow(cm, aspect="auto")
plt.title("Confusion Matrix")
plt.colorbar()
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()