In [None]:
import os
import shutil
import math
import random
import json

import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

import tensorflow as tf
print("TensorFlow version:", tf.__version__)

from keras.preprocessing.image import ImageDataGenerator
from keras import models, layers, optimizers
from keras.applications import VGG16, ResNet50
from keras.applications.vgg16 import preprocess_input as vgg16_preprocess
from keras.applications.resnet50 import preprocess_input as resnet50_preprocess

print("GPU devices:", tf.config.list_physical_devices("GPU"))


In [None]:
RAW_DATA_DIR = "data_raw"

# 训练 / 验证 / 测试集的目标根目录
BASE_DIR = "data_small"
TRAIN_DIR = os.path.join(BASE_DIR, "train")
VAL_DIR   = os.path.join(BASE_DIR, "validation")
TEST_DIR  = os.path.join(BASE_DIR, "test")

RAW_CLASSES_KEYWORDS = ["husky dog", "ragdoll cat", "golden retriever"]

# 从关键词生成合法的文件夹名
def keyword_to_classname(keyword: str) -> str:
    return "".join(ch if ch.isalnum() else "_" for ch in keyword.strip()).strip("_").lower()

CLASSES = [keyword_to_classname(kw) for kw in RAW_CLASSES_KEYWORDS]

# 每类图片预计下载数量
N_PER_CLASS = 80

# 数据集划分比例
TRAIN_RATIO = 0.6
VAL_RATIO   = 0.2
TEST_RATIO  = 0.2

# 图像大小
IMG_HEIGHT = 150
IMG_WIDTH  = 150
IMG_CHANNELS = 3
INPUT_SHAPE = (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)

# 训练相关超参数
BATCH_SIZE = 32
EPOCHS_BASELINE  = 30   # 基准 CNN
EPOCHS_AUG       = 60   # 数据增强 CNN
EPOCHS_TRANSFER  = 30   # 迁移学习（VGG16 / ResNet 基础阶段）
EPOCHS_FINETUNE  = 60   # 微调阶段

NUM_CLASSES = len(CLASSES)
print("关键词：", RAW_CLASSES_KEYWORDS)
print("类别名：", CLASSES)
print("目标类别数:", NUM_CLASSES)

In [None]:

def count_images_in_dir(directory, classes):
    """统计某个目录下每一类图片数量，并返回总数。"""
    exts = (".jpg", ".jpeg", ".png", ".bmp")
    total = 0
    for cls in classes:
        cls_dir = os.path.join(directory, cls)
        if not os.path.exists(cls_dir):
            print(f"[{os.path.basename(directory)}] 类别 {cls}: 目录不存在")
            continue
        n = len([f for f in os.listdir(cls_dir) if f.lower().endswith(exts)])
        print(f"[{os.path.basename(directory)}] 类别 {cls}: {n} 张")
        total += n
    print(f"总计：{total} 张\n")
    return total


def plot_training_curves(history, title_prefix=""):
    """画出训练 / 验证集的准确率和损失曲线。"""
    acc = history.history.get("acc") or history.history.get("accuracy")
    val_acc = history.history.get("val_acc") or history.history.get("val_accuracy")
    loss = history.history.get("loss")
    val_loss = history.history.get("val_loss")

    epochs = range(1, len(acc) + 1)

    plt.figure()
    plt.plot(epochs, acc, "bo", label="Training acc")
    plt.plot(epochs, val_acc, "b", label="Validation acc")
    plt.title(f"{title_prefix} Training and validation accuracy")
    plt.legend()

    plt.figure()
    plt.plot(epochs, loss, "bo", label="Training loss")
    plt.plot(epochs, val_loss, "b", label="Validation loss")
    plt.title(f"{title_prefix} Training and validation loss")
    plt.legend()
    plt.show()


def build_simple_cnn(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))

    model.compile(
        loss="categorical_crossentropy",
        optimizer=optimizers.RMSprop(learning_rate=1e-4),
        metrics=["acc"]
    )
    return model


## 一、数据准备

### 1. 自动爬取图像数据集（作业 2-(1)）

> 要求：自选一个图像分类主题，爬取 **≥ 3 类** 图片，每类 **≥ 50 张**。  

下面给出一个**可直接运行的爬虫实现**，基于第三方库 `simple_image_download`：

- 使用 `RAW_CLASSES_KEYWORDS` 里的关键词去搜索图片  
- 例如：`["husky dog", "ragdoll cat", "golden retriever"]`  
- 下载的图片会自动整理到：`data_raw/<类别名>/*.jpg`  

> 你需要做的只有两步：
> 1. 修改上一格代码中的 `RAW_CLASSES_KEYWORDS` 为你想要的类别关键词  
> 2. 运行下面这格代码（第一次运行前先 `pip install simple_image_download`）


In [None]:
from simple_image_download import simple_image_download as simp

def download_images_for_classes():
    os.makedirs(RAW_DATA_DIR, exist_ok=True)

    for keyword, cls in zip(RAW_CLASSES_KEYWORDS, CLASSES):
        print(f"=== 正在下载类别：{cls}（搜索关键词：{keyword}） ===")
        response = simp.simple_image_download()
        # 下载到 ./simple_images/<keyword>/ 目录
        response.download(keyword, N_PER_CLASS, extensions=['.jpg', '.png'])

        src_dir = os.path.join("simple_images", keyword)
        dst_dir = os.path.join(RAW_DATA_DIR, cls)
        os.makedirs(dst_dir, exist_ok=True)

        if not os.path.exists(src_dir):
            print(f"  [警告] 未找到目录 {src_dir}，可能下载失败或关键词不匹配")
            continue

        exts = (".jpg", ".jpeg", ".png", ".bmp")
        n = 0
        for fname in os.listdir(src_dir):
            if not fname.lower().endswith(exts):
                continue
            src_path = os.path.join(src_dir, fname)
            # 为避免重名，加上 cls 前缀
            new_name = f"{cls}_{fname}"
            dst_path = os.path.join(dst_dir, new_name)
            shutil.copy(src_path, dst_path)
            n += 1

        print(f"  已复制 {n} 张图片到 {dst_dir}")

    print("\n全部类别下载与整理完成。")
    print("=== RAW_DATA_DIR 中图片数量统计 ===")
    count_images_in_dir(RAW_DATA_DIR, CLASSES)


# 调用一次进行下载
download_images_for_classes()


### 2. 划分训练 / 验证 / 测试集

将 `data_raw/<类别>` 中的图片划分为：  

- `data_small/train/<类别>`  
- `data_small/validation/<类别>`  
- `data_small/test/<类别>`  

默认按照 `TRAIN_RATIO : VAL_RATIO : TEST_RATIO = 0.6 : 0.2 : 0.2` 划分。


In [None]:
# ======================
# 4. 划分数据集并拷贝图片
# ======================

def prepare_dataset():
    os.makedirs(RAW_DATA_DIR, exist_ok=True)
    os.makedirs(TRAIN_DIR, exist_ok=True)
    os.makedirs(VAL_DIR, exist_ok=True)
    os.makedirs(TEST_DIR, exist_ok=True)

    # 为 train/val/test 创建子目录
    for split_dir in [TRAIN_DIR, VAL_DIR, TEST_DIR]:
        for cls in CLASSES:
            cls_dir = os.path.join(split_dir, cls)
            os.makedirs(cls_dir, exist_ok=True)

    exts = (".jpg", ".jpeg", ".png", ".bmp")

    for cls in CLASSES:
        src_dir = os.path.join(RAW_DATA_DIR, cls)
        if not os.path.exists(src_dir):
            print(f"警告：{src_dir} 不存在，请确认爬虫是否成功下载该类图片。")
            continue

        # 如果目标 train 里已经有图片，默认认为已经划分过，避免重复复制
        dst_train_cls_dir = os.path.join(TRAIN_DIR, cls)
        if len(os.listdir(dst_train_cls_dir)) > 0:
            print(f"{cls} 已经划分过数据集，如需重新划分请先手动清空 data_small 目录。")
            continue

        images = [f for f in os.listdir(src_dir) if f.lower().endswith(exts)]
        random.shuffle(images)

        n_total = len(images)
        n_train = int(n_total * TRAIN_RATIO)
        n_val   = int(n_total * VAL_RATIO)
        n_test  = n_total - n_train - n_val

        print(f"{cls}: 总数 {n_total}, 训练 {n_train}, 验证 {n_val}, 测试 {n_test}")

        train_imgs = images[:n_train]
        val_imgs   = images[n_train:n_train+n_val]
        test_imgs  = images[n_train+n_val:]

        for fname in train_imgs:
            shutil.copy(os.path.join(src_dir, fname),
                        os.path.join(TRAIN_DIR, cls, fname))
        for fname in val_imgs:
            shutil.copy(os.path.join(src_dir, fname),
                        os.path.join(VAL_DIR, cls, fname))
        for fname in test_imgs:
            shutil.copy(os.path.join(src_dir, fname),
                        os.path.join(TEST_DIR, cls, fname))

    print("数据划分完成。")


# 调用一次进行数据划分（如已划分好，可跳过）
prepare_dataset()

print("=== 统计划分后图片数量 ===")
n_train = count_images_in_dir(TRAIN_DIR, CLASSES)
n_val   = count_images_in_dir(VAL_DIR, CLASSES)
n_test  = count_images_in_dir(TEST_DIR, CLASSES)

steps_per_epoch = math.ceil(n_train / BATCH_SIZE)
val_steps       = math.ceil(n_val / BATCH_SIZE)
test_steps      = math.ceil(n_test / BATCH_SIZE)

print("steps_per_epoch:", steps_per_epoch)
print("val_steps:", val_steps)
print("test_steps:", test_steps)


## 二、基准 CNN 模型（作业 2-(2)）

本节在你爬取的数据集上，建立一个**不使用数据增强**的 CNN 基准模型，用于后续对比。


In [None]:
# ======================
# 5. 基准 CNN 模型（无数据增强）
# ======================

train_datagen_base = ImageDataGenerator(rescale=1./255)
val_datagen_base   = ImageDataGenerator(rescale=1./255)
test_datagen_base  = ImageDataGenerator(rescale=1./255)

train_generator_base = train_datagen_base.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

val_generator_base = val_datagen_base.flow_from_directory(
    VAL_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

test_generator_base = test_datagen_base.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

model_baseline = build_simple_cnn(INPUT_SHAPE, NUM_CLASSES)
model_baseline.summary()

history_baseline = model_baseline.fit(
    train_generator_base,
    steps_per_epoch=steps_per_epoch,
    epochs=EPOCHS_BASELINE,
    validation_data=val_generator_base,
    validation_steps=val_steps
)

plot_training_curves(history_baseline, title_prefix="[2] Baseline CNN")

test_loss_baseline, test_acc_baseline = model_baseline.evaluate(
    test_generator_base,
    steps=test_steps
)
print("Baseline CNN - 测试集准确率: {:.4f}".format(test_acc_baseline))


## 三、加入数据增强的 CNN（作业 2-(3)）

在基准 CNN 的基础上，对训练集使用随机旋转、平移、缩放、水平翻转等数据增强，以缓解过拟合。


In [None]:
# ======================
# 6. 使用数据增强的 CNN 模型
# ======================

train_datagen_aug = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)
val_datagen_aug  = ImageDataGenerator(rescale=1./255)
test_datagen_aug = ImageDataGenerator(rescale=1./255)

train_generator_aug = train_datagen_aug.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

val_generator_aug = val_datagen_aug.flow_from_directory(
    VAL_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

test_generator_aug = test_datagen_aug.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

model_aug = build_simple_cnn(INPUT_SHAPE, NUM_CLASSES)
model_aug.summary()

history_aug = model_aug.fit(
    train_generator_aug,
    steps_per_epoch=steps_per_epoch,
    epochs=EPOCHS_AUG,
    validation_data=val_generator_aug,
    validation_steps=val_steps
)

plot_training_curves(history_aug, title_prefix="[3] CNN + Data Augmentation")

test_loss_aug, test_acc_aug = model_aug.evaluate(
    test_generator_aug,
    steps=test_steps
)
print("CNN + Data Augmentation - 测试集准确率: {:.4f}".format(test_acc_aug))


## 四、直接使用预训练 VGG16（冻结卷积基）（作业 2-(4)）

按 PPT 做法：

1. 载入在 ImageNet 上预训练好的 **VGG16 卷积基**（`include_top=False`）。  
2. **冻结卷积基参数**，只在其顶部添加新的全连接分类器，对自己的数据集进行训练。  
3. 使用 `ImageDataGenerator` + 数据增强进行端到端训练。


In [None]:
# ======================
# 7. VGG16 迁移学习（冻结卷积基）
# ======================

conv_base_vgg = VGG16(weights="imagenet",
                      include_top=False,
                      input_shape=INPUT_SHAPE)
print(conv_base_vgg.summary())

conv_base_vgg.trainable = False

model_vgg_frozen = models.Sequential()
model_vgg_frozen.add(conv_base_vgg)
model_vgg_frozen.add(layers.Flatten())
model_vgg_frozen.add(layers.Dense(256, activation="relu"))
model_vgg_frozen.add(layers.Dropout(0.5))
model_vgg_frozen.add(layers.Dense(NUM_CLASSES, activation="softmax"))

model_vgg_frozen.compile(
    loss="categorical_crossentropy",
    optimizer=optimizers.RMSprop(learning_rate=2e-5),
    metrics=["acc"]
)
model_vgg_frozen.summary()

train_datagen_vgg = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)
val_test_datagen_vgg = ImageDataGenerator(rescale=1./255)

train_generator_vgg = train_datagen_vgg.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

val_generator_vgg = val_test_datagen_vgg.flow_from_directory(
    VAL_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

test_generator_vgg = val_test_datagen_vgg.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

history_vgg_frozen = model_vgg_frozen.fit(
    train_generator_vgg,
    steps_per_epoch=steps_per_epoch,
    epochs=EPOCHS_TRANSFER,
    validation_data=val_generator_vgg,
    validation_steps=val_steps
)

plot_training_curves(history_vgg_frozen, title_prefix="[4] VGG16 Frozen Conv Base")

test_loss_vgg_frozen, test_acc_vgg_frozen = model_vgg_frozen.evaluate(
    test_generator_vgg,
    steps=test_steps
)
print("VGG16 Frozen Conv Base - 测试集准确率: {:.4f}".format(test_acc_vgg_frozen))


## 五、VGG16 特征提取 + 全连接分类器（作业 2-(5)）

对应 PPT 中 **“卷积基用法 1：先提特征，再训练 Dense 分类器”**。


In [None]:
# ======================
# 8. VGG16 卷积基特征提取
# ======================

conv_base_fe = VGG16(weights="imagenet",
                     include_top=False,
                     input_shape=INPUT_SHAPE)
print(conv_base_fe.summary())

datagen_fe = ImageDataGenerator(rescale=1./255)

def extract_features(directory, sample_count, batch_size=BATCH_SIZE):
    """使用 VGG16 卷积基提取特征 (samples, 4, 4, 512) 和 one-hot 标签。"""
    features = np.zeros(shape=(sample_count, 4, 4, 512), dtype=np.float32)
    labels = np.zeros(shape=(sample_count, NUM_CLASSES), dtype=np.float32)

    generator = datagen_fe.flow_from_directory(
        directory,
        target_size=(IMG_HEIGHT, IMG_WIDTH),
        batch_size=batch_size,
        class_mode="categorical",
        shuffle=False
    )

    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base_fe.predict(inputs_batch)
        start = i * batch_size
        end = start + inputs_batch.shape[0]
        features[start:end] = features_batch
        labels[start:end] = labels_batch
        i += 1
        if end >= sample_count:
            break

    return features, labels

print("开始提取训练集特征...")
train_features, train_labels = extract_features(TRAIN_DIR, n_train, BATCH_SIZE)
print("开始提取验证集特征...")
val_features, val_labels = extract_features(VAL_DIR, n_val, BATCH_SIZE)
print("开始提取测试集特征...")
test_features, test_labels = extract_features(TEST_DIR, n_test, BATCH_SIZE)

train_features = np.reshape(train_features, (n_train, 4 * 4 * 512))
val_features   = np.reshape(val_features,   (n_val,   4 * 4 * 512))
test_features  = np.reshape(test_features,  (n_test,  4 * 4 * 512))

model_fe = models.Sequential()
model_fe.add(layers.Dense(256, activation="relu", input_dim=4 * 4 * 512))
model_fe.add(layers.Dropout(0.5))
model_fe.add(layers.Dense(NUM_CLASSES, activation="softmax"))

model_fe.compile(
    optimizer=optimizers.RMSprop(learning_rate=2e-5),
    loss="categorical_crossentropy",
    metrics=["acc"]
)

history_fe = model_fe.fit(
    train_features, train_labels,
    epochs=EPOCHS_TRANSFER,
    batch_size=BATCH_SIZE,
    validation_data=(val_features, val_labels)
)

plot_training_curves(history_fe, title_prefix="[5] VGG16 Feature Extraction")

test_loss_fe, test_acc_fe = model_fe.evaluate(test_features, test_labels)
print("VGG16 Feature Extraction - 测试集准确率: {:.4f}".format(test_acc_fe))


## 六、微调 VGG16 顶部卷积层（作业 2-(6)）

在 (4) 的基础上进行**微调（fine-tuning）**：

1. 先使用冻结卷积基的模型训练好顶部 Dense 分类器；  
2. 再将 VGG16 的**顶部若干卷积层解冻**（例如从 `block5_conv1` 开始）；  
3. 用较小的学习率继续训练整个网络。


In [None]:
# ======================
# 9. 微调 VGG16 顶部卷积块
# ======================

conv_base_vgg.trainable = True

set_trainable = False
for layer in conv_base_vgg.layers:
    if layer.name == "block5_conv1":
        set_trainable = True
    layer.trainable = set_trainable

model_vgg_frozen.compile(
    loss="categorical_crossentropy",
    optimizer=optimizers.RMSprop(learning_rate=1e-5),
    metrics=["acc"]
)
model_vgg_frozen.summary()

history_vgg_finetune = model_vgg_frozen.fit(
    train_generator_vgg,
    steps_per_epoch=steps_per_epoch,
    epochs=EPOCHS_FINETUNE,
    validation_data=val_generator_vgg,
    validation_steps=val_steps
)

plot_training_curves(history_vgg_finetune, title_prefix="[6] VGG16 Fine-tune Top Block"]

test_loss_vgg_finetune, test_acc_vgg_finetune = model_vgg_frozen.evaluate(
    test_generator_vgg,
    steps=test_steps
)
print("VGG16 Fine-tuned - 测试集准确率: {:.4f}".format(test_acc_vgg_finetune))


## 七、更换预训练模型 ResNet50（作业 2-(7)）

将 VGG16 换成另外一个预训练模型 **ResNet50**，比较性能差异。


In [None]:
# ======================
# 10. 使用 ResNet50 预训练模型
# ======================

conv_base_resnet = ResNet50(weights="imagenet",
                            include_top=False,
                            input_shape=INPUT_SHAPE)
print(conv_base_resnet.summary())

conv_base_resnet.trainable = False

model_resnet = models.Sequential()
model_resnet.add(conv_base_resnet)
model_resnet.add(layers.GlobalAveragePooling2D())
model_resnet.add(layers.Dense(256, activation="relu"))
model_resnet.add(layers.Dropout(0.5))
model_resnet.add(layers.Dense(NUM_CLASSES, activation="softmax"))

model_resnet.compile(
    loss="categorical_crossentropy",
    optimizer=optimizers.RMSprop(learning_rate=2e-5),
    metrics=["acc"]
)
model_resnet.summary()

train_datagen_resnet = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest",
    preprocessing_function=resnet50_preprocess
)
val_test_datagen_resnet = ImageDataGenerator(
    preprocessing_function=resnet50_preprocess
)

train_generator_resnet = train_datagen_resnet.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

val_generator_resnet = val_test_datagen_resnet.flow_from_directory(
    VAL_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

test_generator_resnet = val_test_datagen_resnet.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

history_resnet = model_resnet.fit(
    train_generator_resnet,
    steps_per_epoch=steps_per_epoch,
    epochs=EPOCHS_TRANSFER,
    validation_data=val_generator_resnet,
    validation_steps=val_steps
)

plot_training_curves(history_resnet, title_prefix="[7] ResNet50 Frozen Conv Base")

test_loss_resnet, test_acc_resnet = model_resnet.evaluate(
    test_generator_resnet,
    steps=test_steps
)
print("ResNet50 Frozen Conv Base - 测试集准确率: {:.4f}".format(test_acc_resnet))


## 八、结果汇总与性能分析（作业要求）

作业要求对 (2)~(7) 每一种方法进行**性能分析**，你可以：

1. 将所有模型在测试集上的准确率汇总成一张表；  
2. 简要讨论不同方法的优缺点，例如过拟合、训练速度、迁移学习提升幅度等。


In [None]:
# ======================
# 11. 将各方法的测试集准确率整理成字典 / 表格
# ======================

results = {
    "baseline_cnn": float(test_acc_baseline) if "test_acc_baseline" in globals() else None,
    "aug_cnn": float(test_acc_aug) if "test_acc_aug" in globals() else None,
    "vgg16_frozen": float(test_acc_vgg_frozen) if "test_acc_vgg_frozen" in globals() else None,
    "vgg16_feature_extract": float(test_acc_fe) if "test_acc_fe" in globals() else None,
    "vgg16_finetune": float(test_acc_vgg_finetune) if "test_acc_vgg_finetune" in globals() else None,
    "resnet50_frozen": float(test_acc_resnet) if "test_acc_resnet" in globals() else None,
}

print(json.dumps(results, indent=4, ensure_ascii=False))


### 你的实验结论（请自行补充）

> 下面是一个示例，请根据你实际跑出来的结果修改文字。

- 基准 CNN 在测试集上的准确率约为 XX%，存在明显过拟合现象（训练准确率远高于验证准确率）。  
- 引入数据增强后，验证准确率提升到 XX%，过拟合有所缓解。  
- 直接使用预训练 VGG16（冻结卷积基）后，测试准确率达到 XX%，相比从头训练的 CNN 有明显提升。  
- 使用 “先特征提取再训练 Dense 分类器” 的方式，性能为 XX%，但训练速度更快。  
- 微调 VGG16 顶部卷积层后，测试准确率进一步提升 / 下降到 XX%，分析可能的原因（例如学习率、数据量）。  
- 更换为 ResNet50 后，测试准确率为 XX%，与 VGG16 相比，表现略优 / 略差 / 相近，并给出自己的理解。  

最后总结：在小数据集场景下，**基于预训练模型的迁移学习 + 合理的数据增强 + 适度微调**，通常能显著优于从头训练的 CNN。
