## 安装依赖并保存

In [None]:
%pip install seaborn
%pip install tensorflow
%pip freeze > requirements.txt

## 任务名称​：基于多种经典卷积神经网络架构的MNIST手写数字识别对比研究

In [5]:
import tensorflow as tf 
from tensorflow.keras import layers, models # 无法解析导入tensorflow.keras
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from time import time
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 加载MNIST数据集
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

# 数据预处理
def preprocess_data(images, labels, img_size=28):
    """统一数据预处理函数"""
    images = images.reshape((-1, img_size, img_size, 1)).astype('float32') / 255.0
    return images, labels

# 调整图像尺寸函数（用于适应不同网络输入）
def resize_images(images, target_size):
    """调整图像尺寸以适应不同网络输入要求"""
    if images.shape[1:3] == (target_size, target_size):
        return images
    
    resized_images = tf.image.resize(images, [target_size, target_size])
    return resized_images.numpy()

# 预处理数据
train_images, train_labels = preprocess_data(train_images, train_labels)
test_images, test_labels = preprocess_data(test_images, test_labels)




# 2. 经典CNN模型实现
class ClassicCNNModels:
    """经典CNN模型实现类"""
    
    @staticmethod
    def simple_cnn(input_shape=(28, 28, 1)):
        """简单CNN模型（基准模型）"""
        model = models.Sequential([
            layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.MaxPooling2D((2, 2)),
            layers.Flatten(),
            layers.Dense(64, activation='relu'),
            layers.Dense(10, activation='softmax')
        ], name='SimpleCNN')
        return model
    
    @staticmethod
    def alexnet(input_shape=(28, 28, 1)):
        """适应MNIST的简化AlexNet[7](@ref)"""
        # 调整输入尺寸
        if input_shape[0] < 32:
            input_shape = (32, 32, 1)
            
        model = models.Sequential([
            # 第一个卷积块
            layers.Conv2D(96, (11, 11), strides=4, activation='relu', 
                         input_shape=input_shape, padding='same'),
            layers.MaxPooling2D((3, 3), strides=2),
            layers.BatchNormalization(),
            
            # 第二个卷积块
            layers.Conv2D(256, (5, 5), activation='relu', padding='same'),
            layers.MaxPooling2D((3, 3), strides=2),
            layers.BatchNormalization(),
            
            # 第三到第五个卷积块
            layers.Conv2D(384, (3, 3), activation='relu', padding='same'),
            layers.Conv2D(384, (3, 3), activation='relu', padding='same'),
            layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((3, 3), strides=2),
            
            # 全连接层
            layers.Flatten(),
            layers.Dense(4096, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(4096, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(10, activation='softmax')
        ], name='AlexNet')
        return model
    
    @staticmethod
    def vgg16(input_shape=(28, 28, 1)):
        """适应MNIST的简化VGG16[4,7](@ref)"""
        # 调整输入尺寸
        if input_shape[0] < 32:
            input_shape = (32, 32, 1)
            
        model = models.Sequential([
            # 第一个卷积块（2层卷积）
            layers.Conv2D(64, (3, 3), activation='relu', padding='same', 
                         input_shape=input_shape),
            layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), strides=2),
            
            # 第二个卷积块（2层卷积）
            layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
            layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), strides=2),
            
            # 第三到第五个卷积块（适应MNIST尺寸，减少层数）
            layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
            layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), strides=2),
            
            # 全连接层
            layers.Flatten(),
            layers.Dense(512, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(512, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(10, activation='softmax')
        ], name='VGG16')
        return model
    
    @staticmethod
    def lenet5(input_shape=(28, 28, 1)):
        """经典LeNet-5架构[7](@ref)"""
        model = models.Sequential([
            layers.Conv2D(6, (5, 5), activation='relu', input_shape=input_shape),
            layers.AveragePooling2D((2, 2)),
            layers.Conv2D(16, (5, 5), activation='relu'),
            layers.AveragePooling2D((2, 2)),
            layers.Flatten(),
            layers.Dense(120, activation='relu'),
            layers.Dense(84, activation='relu'),
            layers.Dense(10, activation='softmax')
        ], name='LeNet5')
        return model


# 3. 模型训练与评估框架
class ModelTrainer:
    """模型训练和评估类"""
    
    def __init__(self, models_dict, train_data, test_data, epochs=10):
        self.models = models_dict
        self.train_images, self.train_labels = train_data
        self.test_images, self.test_labels = test_data
        self.epochs = epochs
        self.histories = {}
        self.metrics = {}
        
    def train_models(self):
        """训练所有模型"""
        for name, model in self.models.items():
            print(f"\n=== 训练 {name} 模型 ===")
            
            # 调整输入尺寸
            input_size = model.input_shape[1]
            train_imgs = resize_images(self.train_images, input_size)
            test_imgs = resize_images(self.test_images, input_size)
            
            # 编译模型
            model.compile(optimizer='adam',
                         loss='sparse_categorical_crossentropy',
                         metrics=['accuracy'])
            
            # 添加回调函数
            callbacks = [
                tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
                tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=2)
            ]
            
            # 训练模型
            start_time = time()
            history = model.fit(train_imgs, self.train_labels,
                              epochs=self.epochs,
                              batch_size=128,
                              validation_data=(test_imgs, self.test_labels),
                              callbacks=callbacks,
                              verbose=1)
            training_time = time() - start_time
            
            # 评估模型
            test_loss, test_accuracy = model.evaluate(test_imgs, self.test_labels, verbose=0)
            
            # 保存结果
            self.histories[name] = history.history
            self.metrics[name] = {
                'test_accuracy': test_accuracy,
                'test_loss': test_loss,
                'training_time': training_time,
                'params': model.count_params(),
                'predictions': model.predict(test_imgs)
            }
            
            print(f"{name} - 测试准确率: {test_accuracy:.4f}, 训练时间: {training_time:.2f}秒")
    
    def get_comparison_dataframe(self):
        """生成模型对比DataFrame"""
        comparison_data = []
        for name, metrics in self.metrics.items():
            comparison_data.append({
                'Model': name,
                'Test Accuracy': metrics['test_accuracy'],
                'Test Loss': metrics['test_loss'],
                'Training Time (s)': metrics['training_time'],
                'Parameters': metrics['params'],
                'Parameters (M)': metrics['params'] / 1e6
            })
        return pd.DataFrame(comparison_data)

# 4. 执行模型训练
# 创建模型实例
input_shape = (28, 28, 1)

models_dict = {
    'SimpleCNN': ClassicCNNModels.simple_cnn(input_shape),
    'LeNet5': ClassicCNNModels.lenet5(input_shape),
    'AlexNet': ClassicCNNModels.alexnet(input_shape),
    'VGG16': ClassicCNNModels.vgg16(input_shape)
}

# 显示模型结构
for name, model in models_dict.items():
    print(f"\n{name} 模型结构:")
    model.summary()

# 训练所有模型
trainer = ModelTrainer(models_dict, 
                      (train_images, train_labels), 
                      (test_images, test_labels), 
                      epochs=10)

trainer.train_models()

# 获取对比数据
comparison_df = trainer.get_comparison_dataframe()
print("\n模型性能对比:")
print(comparison_df.to_string(index=False))


# 5. 综合可视化分析
def plot_comprehensive_comparison(trainer, comparison_df):
    """绘制综合对比图表"""
    fig = plt.figure(figsize=(20, 15))
    
    # 1. 准确率对比柱状图
    plt.subplot(3, 3, 1)
    sns.barplot(x='Model', y='Test Accuracy', data=comparison_df, palette='viridis')
    plt.title('模型测试准确率对比[6](@ref)', fontsize=14, fontweight='bold')
    plt.xticks(rotation=45)
    plt.ylabel('准确率')
    
    # 2. 参数量与准确率关系散点图
    plt.subplot(3, 3, 2)
    scatter = plt.scatter(comparison_df['Parameters (M)'], comparison_df['Test Accuracy'], 
                        s=200, c=comparison_df['Training Time (s)'], cmap='plasma', alpha=0.7)
    for i, row in comparison_df.iterrows():
        plt.annotate(row['Model'], (row['Parameters (M)'], row['Test Accuracy']), 
                    xytext=(5, 5), textcoords='offset points')
    plt.colorbar(scatter, label='训练时间 (秒)')
    plt.xlabel('参数量 (百万)')
    plt.ylabel('测试准确率')
    plt.title('参数量 vs 准确率 vs 训练时间[6](@ref)', fontsize=14, fontweight='bold')
    
    # 3. 训练时间对比
    plt.subplot(3, 3, 3)
    sns.barplot(x='Model', y='Training Time (s)', data=comparison_df, palette='coolwarm')
    plt.title('模型训练时间对比', fontsize=14, fontweight='bold')
    plt.xticks(rotation=45)
    plt.ylabel('时间 (秒)')
    
    # 4. 训练过程准确率曲线
    plt.subplot(3, 3, 4)
    for name, history in trainer.histories.items():
        plt.plot(history['accuracy'], label=f'{name} - 训练', linewidth=2)
        plt.plot(history['val_accuracy'], '--', label=f'{name} - 验证', linewidth=2)
    plt.title('训练和验证准确率曲线[10](@ref)', fontsize=14, fontweight='bold')
    plt.xlabel('Epoch')
    plt.ylabel('准确率')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # 5. 训练过程损失曲线
    plt.subplot(3, 3, 5)
    for name, history in trainer.histories.items():
        plt.plot(history['loss'], label=f'{name} - 训练损失', linewidth=2)
        plt.plot(history['val_loss'], '--', label=f'{name} - 验证损失', linewidth=2)
    plt.title('训练和验证损失曲线[10](@ref)', fontsize=14, fontweight='bold')
    plt.xlabel('Epoch')
    plt.ylabel('损失')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # 6. 模型效率雷达图
    plt.subplot(3, 3, 6)
    # 归一化指标进行雷达图展示
    normalized_df = comparison_df.copy()
    for col in ['Test Accuracy', 'Training Time (s)', 'Parameters (M)']:
        if col == 'Training Time (s)':
            # 训练时间越短越好，所以取倒数
            normalized_df[col] = 1 / normalized_df[col]
        normalized_df[col] = (normalized_df[col] - normalized_df[col].min()) / \
                            (normalized_df[col].max() - normalized_df[col].min())
    
    categories = ['准确率', '训练效率', '参数效率']
    angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist()
    angles += angles[:1]  # 闭合雷达图
    
    for i, row in normalized_df.iterrows():
        values = [row['Test Accuracy'], row['Training Time (s)'], 
                1 - row['Parameters (M)']]  # 参数越少越好 
        values += values[:1]
        plt.polar(angles, values, 'o-', linewidth=2, label=row['Model'])
    
    plt.thetagrids(np.degrees(angles[:-1]), categories)
    plt.title('模型性能雷达图（归一化）', fontsize=14, fontweight='bold')
    plt.legend(bbox_to_anchor=(1.2, 1), loc='upper left')
    
    # 7. 混淆矩阵示例（显示最佳模型的混淆矩阵）
    plt.subplot(3, 3, 7)
    best_model_name = comparison_df.loc[comparison_df['Test Accuracy'].idxmax(), 'Model']
    best_model = trainer.models[best_model_name]
    best_predictions = np.argmax(trainer.metrics[best_model_name]['predictions'], axis=1)
    
    cm = confusion_matrix(test_labels, best_predictions)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'{best_model_name} 混淆矩阵\n(最佳模型)', fontsize=14, fontweight='bold')
    plt.ylabel('实际标签')
    plt.xlabel('预测标签')
    
    # 8. 各模型参数量分布
    plt.subplot(3, 3, 8)
    plt.pie(comparison_df['Parameters'], labels=comparison_df['Model'], autopct='%1.1f%%')
    plt.title('各模型参数量分布', fontsize=14, fontweight='bold')
    
    # 9. 性能综合评分
    plt.subplot(3, 3, 9)
    # 计算综合评分（准确率权重0.6，训练效率0.2，参数效率0.2）
    comparison_df['综合评分'] = (comparison_df['Test Accuracy'] * 0.6 + 
                              (1/comparison_df['Training Time (s)']) * 0.2 +
                              (1/comparison_df['Parameters (M)']) * 0.2)
    sns.barplot(x='Model', y='综合评分', data=comparison_df, palette='Set2')
    plt.title('模型综合性能评分', fontsize=14, fontweight='bold')
    plt.xticks(rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    return comparison_df

# 执行可视化
comparison_result = plot_comprehensive_comparison(trainer, comparison_df)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



SimpleCNN 模型结构:



LeNet5 模型结构:



AlexNet 模型结构:



VGG16 模型结构:



=== 训练 SimpleCNN 模型 ===
Epoch 1/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - accuracy: 0.9307 - loss: 0.2374 - val_accuracy: 0.9796 - val_loss: 0.0701 - learning_rate: 0.0010
Epoch 2/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.9811 - loss: 0.0628 - val_accuracy: 0.9844 - val_loss: 0.0486 - learning_rate: 0.0010
Epoch 3/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.9862 - loss: 0.0454 - val_accuracy: 0.9870 - val_loss: 0.0394 - learning_rate: 0.0010
Epoch 4/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.9900 - loss: 0.0341 - val_accuracy: 0.9880 - val_loss: 0.0352 - learning_rate: 0.0010
Epoch 5/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 33ms/step - accuracy: 0.9914 - loss: 0.0276 - val_accuracy: 0.9889 - val_loss: 0.0328 - learning_rate: 0.0010
Epoch 6/10
[1m469/469[0m [32m━━━━━━━

ValueError: Exception encountered when calling MaxPooling2D.call().

[1mNegative dimension size caused by subtracting 3 from 1 for '{{node AlexNet_1/max_pooling2d_4_1/MaxPool2d}} = MaxPool[T=DT_FLOAT, data_format="NHWC", explicit_paddings=[], ksize=[1, 3, 3, 1], padding="VALID", strides=[1, 2, 2, 1]](AlexNet_1/conv2d_8_1/Relu)' with input shapes: [?,1,1,256].[0m

Arguments received by MaxPooling2D.call():
  • inputs=tf.Tensor(shape=(None, 1, 1, 256), dtype=float32)