In [2]:
import torch

In [3]:
import gzip, struct

In [4]:
import numpy as np

In [5]:
import torch

In [6]:
from torch import nn

In [7]:
from torch.autograd import Variable

In [8]:
from torch.nn import functional as F

In [9]:
import torch.optim as optim

In [10]:
from torch.utils.data import TensorDataset, DataLoader

In [11]:
from torchvision import transforms

In [12]:
import torchvision.datasets as datasets

In [13]:
import math
import os
import random

In [14]:
data_trains = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3082,))
])

In [None]:
train_data=datasets.CIFAR10('data',train=True,download=True, transform = data_trains)
test_data=datasets.CIFAR10('data',train=False,download=True, transform = data_trains)

In [16]:
n_train = int(len(train_data)*0.9)
n_validation = len(train_data) - n_train

In [17]:
batch_size=64

In [18]:
train_data, valid_data = torch.utils.data.random_split(train_data,[n_train, n_validation])

In [19]:
#制作数据的iterator
train_iterator = torch.utils.data.DataLoader(train_data,shuffle=True,batch_size=batch_size)
valid_iterator = torch.utils.data.DataLoader(valid_data, batch_size=batch_size)
test_iterator = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

In [31]:
class Bottleneck(nn.Module):
    def __init__(self, in_planes, growth_rate):
        super(Bottleneck, self).__init__()
        self.bn1=nn.BatchNorm2d(in_planes)
        self.conv1=nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
        self.bn2=nn.BatchNorm2d(4*growth_rate)
        self.conv2=nn.Conv2d(4*growth_rate, growth_rate, kernel_size = 3, padding = 1, bias = False)
        
    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = torch.cat([out,x], 1)
        return out

class Transition(nn.Module):
    def __init__(self, in_planes, out_planes):
        super(Transition, self).__init__()
        self.bn=nn.BatchNorm2d(in_planes)
        self.conv=nn.Conv2d(in_planes, out_planes, kernel_size=1,bias=False)
    
    def forward(self, x):
        out = self.conv(F.relu(self.bn(x)))
        out = F.avg_pool2d(out, 2)
        return out
    
class DenseNet(nn.Module):#Densenet-BC
    def make_dense_layers(self, block, in_planes, nblock):
        #block:bottleneck
        #nblock代表构建denseblock中有多少bottleneck层
        layers=[]
        for i in range(nblock):
            layers.append(block(in_planes, self.growth_rate))
            in_planes+=self.growth_rate
        return nn.Sequential(*layers)
    
    def __init__(self, block, nblock, growth_rate=12, reduction=0.5, num_classes=10):
        super(DenseNet, self).__init__()
        self.growth_rate = growth_rate
        num_planes=2*growth_rate
        #最初的感知层
        self.conv1=nn.Conv2d(3, num_planes, kernel_size = 3, padding = 1, bias = False)
        #第一个DenseBlock
        self.dense1 = self.make_dense_layers(block, num_planes, nblock[0])
        num_planes+=nblock[0]*growth_rate#计算DenseBlock的输出也就是Transition的输入
        out_planes=int(math.floor(num_planes*reduction))#通过压缩系数计算Transition的输出作为下一个DenseBlock的输入
        self.trans1=Transition(num_planes, out_planes)
        num_planes=out_planes
        #第二个DenseBlock
        self.dense2=self.make_dense_layers(block, num_planes, nblock[1])
        num_planes+=nblock[1]*growth_rate
        out_planes=int(math.floor(num_planes*reduction))
        self.trans2=Transition(num_planes, out_planes)
        num_planes=out_planes
        #第三个DenseBlock
        self.dense3=self.make_dense_layers(block, num_planes, nblock[2])
        num_planes+=nblock[2]*growth_rate
        out_planes=int(math.floor(num_planes*reduction))
        self.trans3=Transition(num_planes, out_planes)
        num_planes=out_planes
        #第四个DenseBlock
        self.dense4=self.make_dense_layers(block, num_planes, nblock[3])
        num_planes+=nblock[3]*growth_rate
        #分类层
        self.bn=nn.BatchNorm2d( num_planes)
        self.linear=nn.Linear(num_planes, num_classes)
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.trans3(self.dense3(out))
        out = self.dense4(out)
        out = F.avg_pool2d(F.relu(self.bn(out)), 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out
    
def DenseNet121():
    return DenseNet(Bottleneck, [6, 12, 24, 16], growth_rate=32)

In [32]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [33]:
model=DenseNet121().to(device)#构建了一个计算图模型并载入到了内存

In [34]:
model_dir='models'
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
    
model_path=os.path.join(model_dir, 'densenet_mnist.pt')
print(model_path)

models\densenet_mnist.pt


In [35]:
optimizer = optim.Adam(model.parameters())

In [36]:
lossfunc = nn.CrossEntropyLoss()

In [37]:
def accu(fx, y):
    pred = fx.max(1, keepdim=True)[1]#argmax得到预测的类型
    correct=pred.eq(y.view_as(pred)).sum()#得到正确答案的数量
    acc = correct.float()/pred.shape[0]
    return acc

In [38]:
def train(model, device, iterator, optimizer, lossfunc):#训练一个epoch
    epoch_loss = 0
    epoch_acc = 0
    model.train()              #将model的状态改成PHASE=Train
    for (x,y) in iterator:     #加载每一个minibatch
        x = x.to(device)       #将张量加入到设备中CPU/GPU
        y = y.to(device)       #将label加入到设备中CPU/GPU
        optimizer.zero_grad()  #将所有的神经元/参数的梯度“寄存器”都置零
        fx = model(x)          #对于输入的训练样本进行模型预测
        loss=lossfunc(fx, y)   #计算预测与label之间的差距
        acc = accu(fx,y)       #计算预测的准确率，用来做显示
        loss.backward()        #通过loss求得各个参数的梯度，求得的梯度寄存在梯度“寄存器”中
        optimizer.step()       #对于参数进行统一的更新
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [39]:
#做一个评测集/验证集的完整性评测并给出相应的分数
def evaluate(model, device, iterator, lossfunc):
    epoch_loss = 0
    epoch_acc = 0
    model.eval()       #将模型的状态参数改为PHASE=Eval，这样不用自动求导
    with torch.no_grad():
        for (x, y) in iterator:
            x = x.to(device)
            y = y.to(device)
            fx = model(x)
            loss = lossfunc(fx, y)
            acc = accu(fx, y)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [40]:
epochs = 1
best_valid_loss=float('inf')
model

DenseNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (dense1): Sequential(
    (0): Bottleneck(
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): Bottleneck(
      (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(96, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (2): Bottleneck(
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_sta

In [41]:
for epoch in range(epochs):
    #对于全部的1个epoch的训练集数据进行梯度下降，并输出平均train_loss和train_acc
    train_loss, train_acc = train(model, device, train_iterator, optimizer, lossfunc)
    #使用验证集对已经更新好参数的模型进行一轮验证，并输出平均的val_loss和val_acc
    valid_loss, valid_acc = evaluate(model, device, valid_iterator, lossfunc)
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), model_path)
    print('Epoch:{0}|Train Loss:{1}|Train Acc:{2}|Val Loss:{3}|Val Acc:{4}'.format(epoch+1,train_loss,train_acc,valid_loss,valid_acc))
    

RuntimeError: Given groups=1, weight of size 64 3 3 3, expected input[64, 1, 32, 32] to have 3 channels, but got 1 channels instead

In [None]:
model.load_state_dict(torch.load(model_path))
test_loss,test_acc=evaluate(model,device,test_iterator,lossfunc)
print('Test Loss:{0}|Test Acc:{1}'.format(test_loss,test_acc))