In [11]:
import mindspore
import mindspore.dataset as ds
import mindspore.dataset.transforms as transforms
import mindspore.dataset.vision as vision
from mindspore import context, Tensor
from mindspore.train import Model
from mindspore.train.callback import LossMonitor, TimeMonitor, Callback
import mindspore.nn as nn
import numpy as np
import os
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import tempfile
import requests
import gzip
import shutil
import urllib.request

print("环境准备完毕")

环境准备完毕


In [12]:
# 设置运行模式和设备
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")

In [13]:
def download_and_extract(url, filename, extract_to):
    if not os.path.exists(filename):
        print(f'Downloading {filename}...')
        urllib.request.urlretrieve(url, filename)
        print(f'{filename} downloaded.')
    if not os.path.exists(extract_to):
        with gzip.open(filename, 'rb') as f_in:
            with open(extract_to, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
        print(f'{filename} extracted to {extract_to}.')

def create_fashion_mnist_csv(data_path):
    urls = {
        'train_images': 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz',
        'train_labels': 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz',
        'test_images': 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz',
        'test_labels': 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz'
    }
    paths = {
        'train_images': os.path.join(data_path, 'train-images-idx3-ubyte.gz'),
        'train_labels': os.path.join(data_path, 'train-labels-idx1-ubyte.gz'),
        'test_images': os.path.join(data_path, 't10k-images-idx3-ubyte.gz'),
        'test_labels': os.path.join(data_path, 't10k-labels-idx1-ubyte.gz')
    }
    extract_paths = {
        'train_images': os.path.join(data_path, 'train-images-idx3-ubyte'),
        'train_labels': os.path.join(data_path, 'train-labels-idx1-ubyte'),
        'test_images': os.path.join(data_path, 't10k-images-idx3-ubyte'),
        'test_labels': os.path.join(data_path, 't10k-labels-idx1-ubyte')
    }
    
    for key in urls:
        download_and_extract(urls[key], paths[key], extract_paths[key])
    
    def read_images(file):
        with open(file, 'rb') as f:
            f.read(16)
            buf = f.read()
            data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
            data = data.reshape(-1, 28 * 28)
            return data
    
    def read_labels(file):
        with open(file, 'rb') as f:
            f.read(8)
            buf = f.read()
            labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int32)
            return labels
    
    def save_to_csv(images, labels, csv_file):
        with open(csv_file, 'w') as f:
            f.write('label,' + ','.join(['pixel' + str(i) for i in range(28 * 28)]) + '\n')
            for i in range(len(images)):
                f.write(str(labels[i]) + ',' + ','.join(map(str, images[i])) + '\n')
        print(f"Saved {len(images)} entries to {csv_file}")

    train_images = read_images(extract_paths['train_images'])
    train_labels = read_labels(extract_paths['train_labels'])
    test_images = read_images(extract_paths['test_images'])
    test_labels = read_labels(extract_paths['test_labels'])
    
    save_to_csv(train_images, train_labels, os.path.join(data_path, 'fashion-mnist_train.csv'))
    save_to_csv(test_images, test_labels, os.path.join(data_path, 'fashion-mnist_test.csv'))

def create_csv_dataset(data_path, batch_size=32, repeat_size=1, num_parallel_workers=1, is_train=True):
    csv_file = os.path.join(data_path, "fashion-mnist_train.csv" if is_train else "fashion-mnist_test.csv")

    dataset = ds.CSVDataset(csv_file, column_defaults=[str() for _ in range(785)], 
                            column_names=["label"] + ["pixel" + str(i) for i in range(784)], shuffle=is_train)

    # 数据预处理
    def preprocess(*data):
        label = np.array(data[0]).astype(np.int32)
        image = np.array(data[1:]).reshape(28, 28).astype(np.float32) / 255.0
        return image, label

    dataset = dataset.map(operations=preprocess, input_columns=["label"] + ["pixel" + str(i) for i in range(784)], 
                          output_columns=["image", "label"])
    dataset = dataset.project(["image", "label"])
    dataset = dataset.map(operations=vision.Resize((32, 32)), input_columns=["image"])
    dataset = dataset.map(operations=vision.Rescale(1.0 / 255.0, 0.0), input_columns=["image"])
    dataset = dataset.map(operations=vision.Normalize(mean=[0.5], std=[0.5]), input_columns=["image"])
    dataset = dataset.map(operations=vision.HWC2CHW(), input_columns=["image"])
    dataset = dataset.batch(batch_size, drop_remainder=True)
    dataset = dataset.repeat(repeat_size)

    return dataset

In [14]:
# 回调函数定义
class SaveBestModel(Callback):
    def __init__(self, model, eval_dataset, eval_metric, model_dir):
        self.model = model
        self.eval_dataset = eval_dataset
        self.eval_metric = eval_metric
        self.model_dir = model_dir
        self.best_acc = 0

    def epoch_end(self, run_context):
        cb_params = run_context.original_args()
        acc = self.model.eval(self.eval_dataset, dataset_sink_mode=False)[self.eval_metric]
        if acc > self.best_acc:
            self.best_acc = acc
            if not os.path.exists(self.model_dir):
                os.makedirs(self.model_dir)
            mindspore.save_checkpoint(cb_params.train_network, os.path.join(self.model_dir, 'best.ckpt'))
            print(f"Model saved with {self.eval_metric}: {acc}")

class EpochLossMonitor(Callback):
    def __init__(self, print_step=1):
        super(EpochLossMonitor, self).__init__()
        self.print_step = print_step

    def epoch_end(self, run_context):
        cb_params = run_context.original_args()
        epoch_num = cb_params.cur_epoch_num
        loss = cb_params.net_outputs
        if epoch_num % self.print_step == 0:
            print(f"Epoch {epoch_num}, Loss: {loss}")

### 定义全连接神经网络 

In [15]:
class FCNet(nn.Cell):
    def __init__(self):
        super(FCNet, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Dense(32 * 32, 128)
        self.fc2 = nn.Dense(128, 64)
        self.fc3 = nn.Dense(64, 10)
        self.relu = nn.ReLU()

    def construct(self, x):
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

#### 定义LeNet 

In [16]:
class LeNet5(nn.Cell):
    def __init__(self, num_class=10, num_channel=1):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid')
        self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid')
        self.fc1 = nn.Dense(16 * 5 * 5, 120)
        self.fc2 = nn.Dense(120, 84)
        self.fc3 = nn.Dense(84, num_class)
        self.relu = nn.ReLU()
        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()

    def construct(self, x):
        x = self.relu(self.max_pool2d(self.conv1(x)))
        x = self.relu(self.max_pool2d(self.conv2(x)))
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

#### 定义AlexNet 

In [17]:
class AlexNet(nn.Cell):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.features = nn.SequentialCell([
            nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        ])
        self.classifier = nn.SequentialCell([
            nn.Dropout(),
            nn.Dense(256 * 4 * 4, 1024),
            nn.ReLU(),
            nn.Dropout(),
            nn.Dense(1024, 1024),
            nn.ReLU(),
            nn.Dense(1024, num_classes)
        ])

    def construct(self, x):
        x = self.features(x)
        x = x.view(x.shape[0], -1)
        x = self.classifier(x)
        return x

#### 定义VggNet网络 

In [18]:
class VGGNet(nn.Cell):
    def __init__(self):
        super(VGGNet, self).__init__()
        self.features = nn.SequentialCell([
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        ])
        self.classifier = nn.SequentialCell([
            nn.Dense(512 * 1 * 1, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Dense(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Dense(4096, 10)
        ])

    def construct(self, x):
        x = self.features(x)
        x = x.view(x.shape[0], -1)
        x = self.classifier(x)
        return x

#### 定义ResNet网络 

In [19]:
class BasicBlock(nn.Cell):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, has_bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, has_bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.SequentialCell()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.SequentialCell([
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, has_bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            ])

    def construct(self, x):
        out = ops.ReLU()(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = ops.ReLU()(out)
        return out

# 定义 ResNet18
class ResNet(nn.Cell):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, has_bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Dense(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.SequentialCell(*layers)

    def construct(self, x):
        out = ops.ReLU()(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = ops.AdaptiveAvgPool2D((1, 1))(out)
        out = ops.Flatten()(out)
        out = self.linear(out)
        return out

def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])

#### 定义DenseNet网络 

In [20]:
class Bottleneck(nn.Cell):
    def __init__(self, in_planes, growth_rate):
        super(Bottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, 4 * growth_rate, kernel_size=1, has_bias=False)
        self.bn2 = nn.BatchNorm2d(4 * growth_rate)
        self.conv2 = nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, padding=1, has_bias=False)

    def construct(self, x):
        out = self.conv1(ops.ReLU()(self.bn1(x)))
        out = self.conv2(ops.ReLU()(self.bn2(out)))
        out = ops.Concat(1)([out, x])
        return out

class Transition(nn.Cell):
    def __init__(self, in_planes, out_planes):
        super(Transition, self).__init__()
        self.bn = nn.BatchNorm2d(in_planes)
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, has_bias=False)

    def construct(self, x):
        out = self.conv(ops.ReLU()(self.bn(x)))
        out = ops.AvgPool2D(2)(out)
        return out

class DenseNet(nn.Cell):
    def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
        super(DenseNet, self).__init__()
        self.growth_rate = growth_rate

        num_planes = 2 * growth_rate
        self.conv1 = nn.Conv2d(1, num_planes, kernel_size=3, padding=1, has_bias=False)

        self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
        num_planes += nblocks[0] * growth_rate
        self.trans1 = Transition(num_planes, int(num_planes * reduction))
        num_planes = int(num_planes * reduction)

        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
        num_planes += nblocks[1] * growth_rate
        self.trans2 = Transition(num_planes, int(num_planes * reduction))
        num_planes = int(num_planes * reduction)

        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
        num_planes += nblocks[2] * growth_rate

        self.bn = nn.BatchNorm2d(num_planes)
        self.linear = nn.Dense(num_planes, num_classes)

    def _make_dense_layers(self, block, in_planes, nblock):
        layers = []
        for _ in range(nblock):
            layers.append(block(in_planes, self.growth_rate))
            in_planes += self.growth_rate
        return nn.SequentialCell(*layers)

    def construct(self, x):
        out = self.conv1(x)
        out = self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.dense3(out)
        out = ops.ReLU()(self.bn(out))
        out = ops.AdaptiveAvgPool2D((1, 1))(out)
        out = ops.Flatten()(out)
        out = self.linear(out)
        return out

def DenseNet121():
    return DenseNet(Bottleneck, [6, 12, 24], growth_rate=12)

#### 训练和评估函数 

In [21]:
def train_and_evaluate(net, train_dataset, test_dataset, model_dir, num_epochs, lr):
    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    opt = nn.Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9)
    model = Model(net, loss_fn=loss, optimizer=opt, metrics={"accuracy"})

    # 定义回调
    time_cb = TimeMonitor(data_size=train_dataset.get_dataset_size())
    loss_cb = EpochLossMonitor(print_step=1)
    save_cb = SaveBestModel(model, test_dataset, 'accuracy', model_dir)

    # 开始训练
    model.train(num_epochs, train_dataset, callbacks=[time_cb, loss_cb, save_cb], dataset_sink_mode=False)

#### 数据集下载与定义

In [22]:
data_path = "datasets/fashionMNIST"
os.makedirs(data_path, exist_ok=True)

# 下载并创建数据集CSV文件
# create_fashion_mnist_csv(data_path)

# 检查CSV文件是否正确创建并包含数据
print("检查数据集文件是否存在并包含数据...")
train_csv_path = os.path.join(data_path, 'fashion-mnist_train.csv')
test_csv_path = os.path.join(data_path, 'fashion-mnist_test.csv')
print(f"训练数据集文件大小: {os.path.getsize(train_csv_path)} bytes")
print(f"测试数据集文件大小: {os.path.getsize(test_csv_path)} bytes")

batch_size = 32

# 加载数据集
train_loader = create_csv_dataset(data_path, batch_size=batch_size, is_train=True)
test_loader = create_csv_dataset(data_path, batch_size=batch_size, is_train=False)

# 检查数据集是否加载成功
print(f"训练数据集大小: {train_loader.get_dataset_size()} 批次")
print(f"测试数据集大小: {test_loader.get_dataset_size()} 批次")

num_epochs, lr = 100, 0.001

检查数据集文件是否存在并包含数据...
训练数据集文件大小: 133067916 bytes
测试数据集文件大小: 37883023 bytes
训练数据集大小: 1874 批次
测试数据集大小: 312 批次


#### 定义模型保存路径 

In [23]:
model_dir = './results/mindspore_weight'
os.makedirs(model_dir, exist_ok=True)

#### 训练全连接神经网络 

In [None]:
print("全连接神经网络开始训练")
fc_net = FCNet()
train_and_evaluate(fc_net, train_loader, test_loader, os.path.join(model_dir, 'FCNet.ckpt'),num_epochs,lr)

全连接神经网络开始训练




#### 训练LeNet卷积神经网络 

In [15]:
print("LeNet神经网络开始训练")
Le_net = LeNet5()
train_and_evaluate(Le_net, train_loader, test_loader, os.path.join(model_dir, 'LeNet.ckpt'),num_epochs,lr)

LeNet神经网络开始训练




ValueError: 

#### 训练AlexNet卷积神经网络 

In [None]:
print("AlexNet神经网络开始训练")
Alex_net = AlexNet()
train_and_evaluate(Le_net, train_loader, test_loader, os.path.join(model_dir, 'AlexNet.ckpt'),num_epochs,lr)

#### 训练VggNet卷积神经网络 

In [None]:
print("VggNet神经网络开始训练")
Vgg_net = VGGNet()
train_and_evaluate(Le_net, train_loader, test_loader, os.path.join(model_dir, 'VggNet.ckpt'),num_epochs,lr)

#### 训练ResNet卷积神经网络 

In [None]:
print("ResNet神经网络开始训练")
Vgg_net = ResNet18()
train_and_evaluate(Le_net, train_loader, test_loader, os.path.join(model_dir, 'ResNet.ckpt'),num_epochs,lr)

#### 训练DenseNet卷积神经网络 

In [None]:
print("DenseNet神经网络开始训练")
Dense_net = DenseNet121()
train_and_evaluate(Le_net, train_loader, test_loader, os.path.join(model_dir, 'DenseNet.ckpt'),num_epochs,lr)