In [1]:
from pathlib import Path
import numpy as np
import PIL.Image as Image
import cv2
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, models, transforms
from IPython.display import display
from sklearn.model_selection import train_test_split
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
%matplotlib inline
%config InlineBackend.figure_format='retina'

import pandas as pd
import glob
import time
import copy
import os

In [4]:
# setting the path for joining multiple files
crop_path = "crop/"
files = os.path.join(crop_path, "*/")
# list of merged files returned
files = glob.glob(files)

In [5]:
!mkdir -p "crop_data/train"
!mkdir -p "crop_data/val"
for i in files:
    categorie=i.split('/')[1]
    print(categorie)
    !mkdir -p "crop_data/train/{categorie}"
    !mkdir -p "crop_data/val/{categorie}"

In [6]:
import shutil
for dirname in files:
    dirname = os.path.join(dirname, "*")
    dirname = glob.glob(dirname)
    train_data, val_data = train_test_split(dirname, test_size=0.1)
    for i in train_data:
#         print("crop_data/train/"+i.split('/')[1])
        if i not in "crop_data/train/"+i.split('/')[1]:
            shutil.copy2(i[:-4] + ".jpg", "crop_data/train/"+i.split('/')[1])
    for i in val_data:             
        if i not in "crop_data/val/"+i.split('/')[1]:
            shutil.copy2(i[:-4] + ".jpg", "crop_data/val/"+i.split('/')[1])

In [20]:
# 从硬盘文件夹中加载图像数据集

# 数据存储总路径
data_dir = 'crop_data'
# 图像的大小为224*256
image_size = 256
batch_sizes = 128

In [21]:
os.path.join(data_dir, 'train')

'crop_data/train'

In [22]:
# 从data_dir/train加载文件
# 加载的过程将会对图像自动作如下的图像增强操作：
# 1. 随机从原始图像中切下来一块224*224大小的区域
# 2. 随机水平翻转图像
# 3. 将图像的色彩数值标准化
train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'),
                                    transforms.Compose([
                                        transforms.RandomResizedCrop(image_size),
                                        transforms.RandomHorizontalFlip(),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                    ])
                                    )

In [23]:
# 加载校验数据集，对每个加载的数据进行如下处理：
# 1. 放大到256*256像素
# 2. 从中心区域切割下224*224大小的图像区域
# 3. 将图像的色彩数值标准化
val_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'),
                                    transforms.Compose([
                                        transforms.Resize(256),
                                        transforms.CenterCrop(image_size),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                    ])
                                    )

In [24]:

# 创建相应的数据加载器
train_loader = torch.utils.data.DataLoader(train_dataset, batch_sizes, shuffle = True, num_workers=4)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_sizes, shuffle = True, num_workers=4)

# 读取得出数据中的分类类别数
num_classes = len(train_dataset.classes)
num_classes

40

In [7]:
len(train_dataset)

13352

In [8]:
len(val_dataset)

1503

In [25]:
# 检测本机器是否安装GPU，将检测结果记录在布尔变量use_cuda中
use_cuda = torch.cuda.is_available()

# 当可用GPU的时候，将新建立的张量自动加载到GPU中
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
itype = torch.cuda.LongTensor if use_cuda else torch.LongTensor

In [15]:
# def imshow(inp, title=None):
#     # 将一张图打印显示出来，inp为一个张量，title为显示在图像上的文字
    
#     #一般的张量格式为：channels*image_width*image_height
#     #而一般的图像为image_width*image_height*channels所以，需要将channels转换到最后一个维度
#     inp = inp.numpy().transpose((1, 2, 0)) 
    
#     #由于在读入图像的时候所有图像的色彩都标准化了，因此我们需要先调回去
#     mean = np.array([0.485, 0.456, 0.406])
#     std = np.array([0.229, 0.224, 0.225])
#     inp = std * inp + mean
#     inp = np.clip(inp, 0, 1)
    
#     #将图像绘制出来
#     plt.imshow(inp)
#     if title is not None:
#         plt.title(title)
#     plt.pause(0.001)  # 暂停一会是为了能够将图像显示出来。


# #获取第一个图像batch和标签
# images, labels = next(iter(train_loader))

# # 将这个batch中的图像制成表格绘制出来
# out = torchvision.utils.make_grid(images)

# imshow(out, title=[train_dataset.classes[x] for x in labels])

In [10]:
# 加载模型库中的residual network，并设置pretrained为true，这样便可加载相应的权重
net = models.resnet50(pretrained=True)
# 如果存在GPU，就将网络加载到GPU上
net = net.cuda() if use_cuda else net
# 将网络的架构打印出来
net

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [17]:
# class ConvNet(nn.Module):
#     def __init__(self):
#         super(ConvNet, self).__init__()
#         self.conv1 = nn.Conv2d(3, 32, 3, padding = 1) #输入通道为3，输出通道为64，窗口大小为3，padding为1
#         self.conv2 = nn.Conv2d(64, 64, 3, padding = 1) #第二层卷积，输入通道为64, 输出通道为64，窗口为3，padding为1
#         self.pool = nn.MaxPool2d(2, 2) #一个窗口为2*2的pooling运算
#         self.conv3 = nn.Conv2d(64, 64, 3, padding = 1) 
#         self.conv4 = nn.Conv2d(128, 128, 3, padding = 1) 
#         self.conv5 = nn.Conv2d(128, 256, 3, padding = 1) 
#         self.conv6 = nn.Conv2d(256, 256, 3, padding = 1)
#         self.conv7 = nn.Conv2d(256, 512, 3, padding = 1)
#         self.conv8 = nn.Conv2d(512, 512, 3, padding = 1)
#         self.conv9 = nn.Conv2d(512, 512, 3, padding = 1)
#         self.conv10 = nn.Conv2d(512, 512, 3, padding = 1)
#         self.conv11 = nn.Conv2d(512, 512, 3, padding = 1)
#         self.conv12 = nn.Conv2d(512, 512, 3, padding = 1)
#         self.fc1 = nn.Linear(image_size // 32 * image_size // 32 * 512, 2048) #一个线性连接层，输入尺寸为最后一层立方体的平铺，输出层512个节点
#         self.fc2 = nn.Linear(2048, 40) #最后一层线性分类单元，输入为

#     def forward(self, x):
#         #神经网络完成一步前馈运算的过程，从输入到输出
#         x = F.relu(self.conv1(x))
#         x = torch.cat((F.relu6(x), F.relu6(-x)), 1)
#         x = F.relu(self.conv2(x))
#         x = self.pool(x)
#         x = F.relu(self.conv3(x))
#         x = torch.cat((F.relu6(x), F.relu6(-x)), 1)
#         x = F.relu(self.conv4(x))
#         x = self.pool(x)
#         x = F.relu(self.conv5(x))
#         x = F.relu(self.conv6(x))
#         x = self.pool(x)
#         x = F.relu(self.conv7(x))
#         x = F.relu(self.conv8(x))
#         x = F.relu(self.conv9(x))
#         x = self.pool(x)
#         x = F.relu(self.conv10(x))
#         x = F.relu(self.conv11(x))
#         x = F.relu(self.conv12(x))
#         x = self.pool(x)
#         x = x.view(-1, image_size // 32 * image_size // 32 * 512)
#         x = F.dropout(x, training=self.training) #以默认为0.5的概率对这一层进行dropout操作
#         x = F.relu(self.fc1(x)) #全链接，激活函数
#         x = F.dropout(x, training=self.training) #以默认为0.5的概率对这一层进行dropout操作
#         x = self.fc2(x) #全链接，激活函数
#         x = F.log_softmax(x, dim=1) #log_softmax可以理解为概率对数值
#         return x
    
#     def retrieve_features(self, x):
#         #提取卷积神经网络的特征图的函数，返回feature_map1, feature_map2为前两层卷积层的特征图
#         feature_map1 = F.relu(self.conv1(x))
#         x = self.pool(feature_map1)
#         feature_map2 = F.relu(self.conv2(x))
#         return (feature_map1, feature_map2)

In [11]:
def rightness(predictions, labels):
    """计算预测错误率的函数，其中predictions是模型给出的一组预测结果，batch_size行10列的矩阵，labels是数据之中的正确答案"""
    pred = torch.max(predictions.data, 1)[1] # 对于任意一行（一个样本）的输出值的第1个维度，求最大，得到每一行的最大元素的下标
    rights = pred.eq(labels.data.view_as(pred)).sum() #将下标与labels中包含的类别进行比较，并累计得到比较正确的数量
    # rights装到cpu中，以便后面打印出来   --hq20200726
    rights = rights.cpu() if rights.is_cuda else rights
    return rights, len(labels) #返回正确的数量和这一次一共比较了多少元素

In [14]:
# 读取最后线性层的输入单元数，这是前面各层卷积提取到的特征数量
num_ftrs = net.fc.in_features

# 重新定义一个全新的线性层，它的输出为2，原本是1000
net.fc = nn.Linear(num_ftrs, 40)

#如果存在GPU则将网络加载到GPU中
net.fc = net.fc.cuda() if use_cuda else net.fc

criterion = nn.CrossEntropyLoss() #Loss函数的定义
# 将网络的所有参数放入优化器中
# optimizer = optim.SGD(net.parameters(), lr = 0.0001, momentum=0.9)
torch.optim.Adam(net.parameters(),lr=0.001)

record = [] #记录准确率等数值的容器

#开始训练循环
num_epochs = 80
net.train(True) # 给网络模型做标记，标志说模型在训练集上训练
best_model = net
best_r = 0.0

In [20]:
# # 加载网络
# net = ConvNet()
# # 如果有GPU就把网络加载到GPU中
# net = net.cuda() if use_cuda else net
# criterion = nn.CrossEntropyLoss() #Loss函数的定义
# # optimizer = optim.SGD(net.parameters(), lr = 0.1, momentum=0.9)
# optimizer=optim.Adam(net.parameters(),
#                 lr=0.1,
#                 betas=(0.9, 0.999),
#                 eps=1e-08,
#                 weight_decay=0.0005,
#                 amsgrad=False)

# record = [] #记录准确率等数值的容器

# #开始训练循环
# num_epochs = 20
# net.train(True) # 给网络模型做标记，标志说模型在训练集上训练
# best_model = net
# best_r = 0.0

In [15]:
from apex import amp
model, optimizer = amp.initialize(net, optimizer, opt_level="O1") # 这里是“欧一”，不是“零一”

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic




In [16]:
total = sum([param.nelement() for param in net.parameters()])
print("Number of parameter: %.2fM" % (total/1e6))

Number of parameter: 23.59M


In [26]:
for epoch in range(num_epochs):
    #optimizer = exp_lr_scheduler(optimizer, epoch)
    train_rights = [] #记录训练数据集准确率的容器
    train_losses = []
    for batch_idx, (data, target) in enumerate(train_loader):  #针对容器中的每一个批进行循环
        if(batch_idx%100==0):
            print(batch_idx,epoch)
        data, target = data.clone().detach().requires_grad_(True), target.clone().detach() #data为图像，target为标签
        #如果存在GPU则将变量加载到GPU中
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        output = net(data) #完成一次预测
        loss = criterion(output, target) #计算误差
        optimizer.zero_grad() #清空梯度
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
#         loss.backward() #反向传播
        optimizer.step() #一步随机梯度下降
        right = rightness(output, target) #计算准确率所需数值，返回正确的数值为（正确样例数，总样本数）
        train_rights.append(right) #将计算结果装到列表容器中
        loss = loss.cpu() if use_cuda else loss
        train_losses.append(loss.data.numpy())

    train_r = (sum([tup[0] for tup in train_rights]), sum([tup[1] for tup in train_rights]))

    #在测试集上分批运行，并计算总的正确率
    net.eval() #标志模型当前为运行阶段
    test_loss = 0
    correct = 0
    vals = []
    #对测试数据集进行循环
    for data, target in val_loader:
        #如果存在GPU则将变量加载到GPU中
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        data, target = data.clone().detach().requires_grad_(False), target.clone().detach()
        output = net(data) #将特征数据喂入网络，得到分类的输出
        val = rightness(output, target) #获得正确样本数以及总样本数
        vals.append(val) #记录结果

    #计算准确率
    val_r = (sum([tup[0] for tup in vals]), sum([tup[1] for tup in vals]))
    val_ratio = 1.0*val_r[0].numpy()/val_r[1]
    
    if val_ratio > best_r:
        best_r = val_ratio
        best_model = copy.deepcopy(net)
    #打印准确率等数值，其中正确率为本训练周期Epoch开始后到目前撮的正确率的平均值
    print('训练周期: {} \tLoss: {:.6f}\t训练正确率: {:.2f}%, 校验正确率: {:.2f}%'.format(
        epoch, np.mean(train_losses), 100. * train_r[0].numpy() / train_r[1], 100. * val_r[0].numpy()/val_r[1]))       
    record.append([np.mean(train_losses), train_r[0].numpy() / train_r[1], val_r[0].numpy()/val_r[1]])
    torch.save(net,'checkp/model_res50')      # 将模型net另存为文件minst_conv_checkpoint

0 0
100 0
训练周期: 0 	Loss: 1.060304	训练正确率: 69.47%, 校验正确率: 70.13%
0 1
100 1
训练周期: 1 	Loss: 1.034491	训练正确率: 70.62%, 校验正确率: 71.26%
0 2
100 2
训练周期: 2 	Loss: 1.004780	训练正确率: 71.68%, 校验正确率: 69.93%
0 3
100 3
训练周期: 3 	Loss: 1.003571	训练正确率: 71.54%, 校验正确率: 70.19%
0 4
100 4
训练周期: 4 	Loss: 0.965135	训练正确率: 72.30%, 校验正确率: 70.92%
0 5
100 5
训练周期: 5 	Loss: 0.954295	训练正确率: 72.60%, 校验正确率: 71.99%
0 6
100 6
训练周期: 6 	Loss: 0.951282	训练正确率: 72.96%, 校验正确率: 71.32%
0 7
100 7
训练周期: 7 	Loss: 0.927835	训练正确率: 73.35%, 校验正确率: 73.45%
0 8
100 8
训练周期: 8 	Loss: 0.943730	训练正确率: 73.20%, 校验正确率: 73.05%
0 9
100 9
训练周期: 9 	Loss: 0.904337	训练正确率: 74.10%, 校验正确率: 71.12%
0 10
100 10
训练周期: 10 	Loss: 0.886320	训练正确率: 74.45%, 校验正确率: 71.39%
0 11
100 11
训练周期: 11 	Loss: 0.893561	训练正确率: 74.58%, 校验正确率: 71.06%
0 12
100 12
训练周期: 12 	Loss: 0.886128	训练正确率: 74.54%, 校验正确率: 72.12%
0 13
100 13
训练周期: 13 	Loss: 0.875359	训练正确率: 74.87%, 校验正确率: 71.79%
0 14
100 14
训练周期: 14 	Loss: 0.847779	训练正确率: 75.27%, 校验正确率: 72.99%
0 15
100 15
训练周期: 15 	Loss: 0.857484	训练正

In [28]:
torch.save(best_model,'checkp/model_res50_best')

In [29]:
net_best=torch.load('checkp/model_res50_best')

In [33]:
for data, target in val_loader:
    #如果存在GPU则将变量加载到GPU中
    if use_cuda:
        data, target = data.cuda(), target.cuda()
    data, target = data.clone().detach().requires_grad_(False), target.clone().detach()
    output = net_best(data) #将特征数据喂入网络，得到分类的输出
    val = rightness(output, target) #获得正确样本数以及总样本数
    vals.append(val) #记录结果
#计算准确率
val_r = (sum([tup[0] for tup in vals]), sum([tup[1] for tup in vals]))
val_ratio = 1.0*val_r[0].numpy()/val_r[1]
print('测验正确率: {:.2f}%'.format(val_ratio*100.))

测验正确率: 77.58%
