# 使用 CNN卷积神经网络 完成图像分类器

## 数据加载与归一

1. 数据导入神经网络 （数据加载），对神经网络进行训练
    数据集 CIFAR10 含10类6万张图 (学习阶段的小型图像数据集)
          ImageNet 含1000个类超100万张图
数据归一
 -  输入数据变成[0,1] 或 [-1，1]之间
 - 图像数据像素值一般[0，255]

#### Pytorch数据加载： torchvision.dataset
#### Pytorch数据归一： torchvision.transforms

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

In [None]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
# Normalize的2个参数， mean 三个通道的平均值, std 三个通道的方差

# 数据增强方式：t

# 训练集
train_set = torchvision.datasets.CIFAR10(root='./data',train=True, download=False, transform=transform)
# 下载cifar10到data目录，作为训练集
train_loader = torch.utils.data.DataLoader(train_set, batch_size=4, shuffle=True,num_workers=2)

In [None]:
# 测试集
test_set = torchvision.datasets.CIFAR10(root='./data', train=False,download=False,transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=4, shuffle=False,num_workers=2)

In [None]:
# 加载自己的数据集
dir_images = ''
# private_set = torchvision.datasets.ImageFolder(root=dir_images,transform=transform)
# private_data_loader = torchvision.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

def im_show(img):
    # 输入数据 torch.tensor [c h w]
    img = img /2 + 0.5
    nping = img.numpy()
    nping = np.transpose(nping, (1,2,0)) # [h w c] 转置
    plt.imshow(nping)

In [None]:
data_iter = iter(train_loader)

In [None]:
images,labels = data_iter.next() # 获取batch size图片显示
im_show(torchvision.utils.make_grid(images))

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    """定义神经网络结构，输入数据 3*32*32 """
    def __init__(self):
        super(Net, self).__init__()
        # 第1层 卷积层
        self.conv1 = nn.Conv2d(3,6,3) # 输入频道3，输出频道6，卷积3*3
        # 第2层 卷积层
        self.conv2 = nn.Conv2d(6, 16, 3) # 输入频道6， 输出频道16， 卷积3*3
        # 第3层 全连接层
        self.fc1 = nn.Linear(16*28*28, 512) # 输入维度 16*28*28 输出维度 512
        # 第4层 全连接层
        self.fc2 = nn.Linear(512, 64) #
        # 第5层 全连接层
        self.fc3 = nn.Linear(64, 10) # 输出10 10个类
    def forward(self,x): # 定义数据流向
        x = self.conv1(x)
        x = F.relu(x) # 使用激活函数固定数据到一个范围

        x = self.conv2(x)
        x = F.relu(x)

        x = x.view(-1, 16*28*28) # 展开
        x = self.fc1(x)
        x = F.relu(x)

        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        return x

In [None]:
net = Net()

In [None]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
epochs = 2
for epoch in range(epochs):
    for i , data in enumerate(train_loader):
        images, labels = data
        output = net(images)
        loss = criterion(output, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 1000 == 0:
            print(f'epoch:{epoch}, step: {i}, loss:{loss.item():.3f} ')

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f9dbb54d670>
Traceback (most recent call last):
  File "/Users/edgar/miniconda3/envs/juliang/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/Users/edgar/miniconda3/envs/juliang/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1297, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/Users/edgar/miniconda3/envs/juliang/lib/python3.8/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/Users/edgar/miniconda3/envs/juliang/lib/python3.8/multiprocessing/popen_fork.py", line 44, in wait
    if not wait([self.sentinel], timeout):
  File "/Users/edgar/miniconda3/envs/juliang/lib/python3.8/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/Users/edgar/miniconda3/envs/juliang/lib/python3.8/selectors.py", line 415, in se

KeyboardInterrupt: 

In [None]:
# 训练测试2
train_loss_hist = []
test_loss_hist = []
epochs = 20
for epoch in tqdm(range(epochs)):
    # 训练
    net.train()
    train_loss = 0.0
    for i , data in enumerate(train_loader):
        images, labels = data
        output = net(images)
        loss = criterion(output, labels) # 计算损失
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        if i % 250 == 0: # 每250 mini batch 测试一次
            correct = 0.0
            total = 0.0
            net.eval()
            with torch.no_grad():
                for test_data in test_loader:
                    test_images, test_labels = test_data
                    test_output = net(test_images)
                    test_loss = criterion(test_output, test_labels) # 计算损失

            train_loss_hist.append(train_loss / 250 )
            test_loss_hist.append(test_loss.item())
            train_loss = 0.0
            # print(f'epoch:{epoch}, step: {i}, loss:{loss.item():.3f} ')

In [None]:
plt.figure()
plt.plot(train_loss_hist)
plt.plot(test_loss_hist)
plt.legend('train_loss_hist','test_loss_hist')
plt.title('Loss')
plt.xlabel('#mini batch *250')
plt.ylabel('Loss')

In [None]:
# 模型测试
correct = 0.0
total = 0.0
with torch.no_grad():
    for test_data in test_loader:
        test_images, test_labels = test_data
        test_output = net(test_images)
        _,predicted = torch.max(test_output.data,1)
        correct += (predicted == test_labels).sum()
        total +=labels.size(0)

print('准确率', float(correct/total))


In [None]:
# 保存模型
torch.save(net.state_dict(), './model.pt')

In [None]:
# 读取模型文件
net2 = Net()
net2.load_state_dict(torch.load('./model.pt'))

# 模型测试
correct = 0.0
total = 0.0
with torch.no_grad():
    for test_data in test_loader:
        test_images, test_labels = test_data
        test_output = net2(test_images)
        _,predicted = torch.max(test_output.data,1)
        correct += (predicted == test_labels).sum()
        total +=labels.size(0)

print('准确率', float(correct/total))
