In [3]:
import torch

In [4]:
import gzip, struct

In [5]:
import numpy as np

In [6]:
import torch

In [7]:
from torch import nn

In [8]:
from torch.autograd import Variable

In [9]:
from torch.nn import functional as F

In [10]:
import torch.optim as optim

In [11]:
from torch.utils.data import TensorDataset, DataLoader

In [12]:
from torchvision import transforms

In [13]:
import torchvision.datasets as datasets

In [14]:
import math
import os
import random

In [15]:
data_trains = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3082,))
])

In [16]:
train_data=datasets.MNIST('data',train=True,download=True, transform = data_trains)
test_data=datasets.MNIST('data',train=False,download=True, transform = data_trains)

In [17]:
n_train = int(len(train_data)*0.9)
n_validation = len(train_data) - n_train

In [18]:
batch_size=64

In [19]:
train_data, valid_data = torch.utils.data.random_split(train_data,[n_train, n_validation])

In [20]:
#制作数据的iterator
train_iterator = torch.utils.data.DataLoader(train_data,shuffle=True,batch_size=batch_size)
valid_iterator = torch.utils.data.DataLoader(valid_data, batch_size=batch_size)
test_iterator = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

In [30]:
class Inception(nn.Module):
    def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
        super(Inception, self).__init__()
        #Inception一共有4个Branch
        #B1 -> 1*1 Conv
        self.b1=nn.Sequential(
            nn.Conv2d(in_planes, n1x1, kernel_size = 1),
            nn.BatchNorm2d(n1x1),
            nn.ReLU(True),
        )
        #B2 -> 3*3 bottle-neck -> 3*3 Conv
        self.b2=nn.Sequential(
            nn.Conv2d(in_planes, n3x3red, kernel_size = 1),
            nn.BatchNorm2d(n3x3red),
            nn.ReLU(True),
            nn.Conv2d(n3x3red, n3x3, kernel_size = 3, padding=1),
            nn.BatchNorm2d(n3x3),
            nn.ReLU(True),
        )
        #B3 -> 5*5 bottle-neck -> 5*5 Conv
        self.b3=nn.Sequential(
            nn.Conv2d(in_planes, n5x5red, kernel_size = 1),
            nn.BatchNorm2d(n5x5red),
            nn.ReLU(True),
            nn.Conv2d(n5x5red, n5x5, kernel_size = 5, padding=2),
            nn.BatchNorm2d(n5x5),
            nn.ReLU(True),
        )
        #B4 -> MaxPooling -> 1*1 Conv
        self.b4=nn.Sequential(
            nn.MaxPool2d(3, stride=1, padding=1),
            nn.Conv2d(in_planes, pool_planes, kernel_size = 1),
            nn.BatchNorm2d(pool_planes),
            nn.ReLU(True)
        )
        
    def forward(self, x):
        x1 = self.b1(x)
        x2 = self.b2(x)
        x3 = self.b3(x)
        x4 = self.b4(x)
        
        return torch.cat([x1, x2, x3, x4], 1)
    

class GoogLeNet(nn.Module):
    def __init__(self):
        super(GoogLeNet, self).__init__()
        self.feature_block=nn.Sequential(
            nn.Conv2d(1, 192, kernel_size = 3, padding = 1),
            nn.BatchNorm2d(192),
            nn.ReLU(True),
        )
        self.a3=Inception(192, 64, 96, 128, 16, 32, 32)
        self.b3=Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool=nn.MaxPool2d(3,stride=2,padding=1)
        self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
        self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
        self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
        self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
        self.avgpool=nn.AvgPool2d(8,stride=1)
        self.linear=nn.Linear(1024,10)
        
    def forward(self, x):
        out=self.feature_block(x)
        out=self.a3(out)
        out=self.b3(out)
        out=self.maxpool(out)
        out=self.a4(out)
        out=self.b4(out)
        out=self.c4(out)
        out=self.d4(out)
        out=self.e4(out)
        out = self.maxpool(out)
        out = self.a5(out)
        out = self.b5(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [31]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [32]:
model_dir='models'
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
    
model_path=os.path.join(model_dir, 'get_ipythonooglenet_mnist.pt')
print(model_path)

models\get_ipythonooglenet_mnist.pt


In [33]:
model=GoogLeNet().to(device)#构建了一个计算图模型并载入到了内存

NameError: name 'in_planes' is not defined

In [60]:
optimizer = optim.Adam(model.parameters())

In [61]:
lossfunc = nn.CrossEntropyLoss()

In [62]:
def accu(fx, y):
    pred = fx.max(1, keepdim=True)[1]#argmax得到预测的类型
    correct=pred.eq(y.view_as(pred)).sum()#得到正确答案的数量
    acc = correct.float()/pred.shape[0]
    return acc

In [63]:
def train(model, device, iterator, optimizer, lossfunc):#训练一个epoch
    epoch_loss = 0
    epoch_acc = 0
    model.train()              #将model的状态改成PHASE=Train
    for (x,y) in iterator:     #加载每一个minibatch
        x = x.to(device)       #将张量加入到设备中CPU/GPU
        y = y.to(device)       #将label加入到设备中CPU/GPU
        optimizer.zero_grad()  #将所有的神经元/参数的梯度“寄存器”都置零
        fx = model(x)          #对于输入的训练样本进行模型预测
        loss=lossfunc(fx, y)   #计算预测与label之间的差距
        acc = accu(fx,y)       #计算预测的准确率，用来做显示
        loss.backward()        #通过loss求得各个参数的梯度，求得的梯度寄存在梯度“寄存器”中
        optimizer.step()       #对于参数进行统一的更新
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [64]:
#做一个评测集/验证集的完整性评测并给出相应的分数
def evaluate(model, device, iterator, lossfunc):
    epoch_loss = 0
    epoch_acc = 0
    model.eval()       #将模型的状态参数改为PHASE=Eval，这样不用自动求导
    with torch.no_grad():
        for (x, y) in iterator:
            x = x.to(device)
            y = y.to(device)
            fx = model(x)
            loss = lossfunc(fx, y)
            acc = accu(fx, y)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [65]:
epochs = 1
best_valid_loss=float('inf')
model

VGGNet11(
  (feature_block): Sequential(
    (0): VGGBlock(
      (model_block): Sequential(
        (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace)
      )
    )
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): VGGBlock(
      (model_block): Sequential(
        (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace)
      )
    )
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): VGGBlock(
      (model_block): Sequential(
        (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace)
      

In [None]:
for epoch in range(epochs):
    #对于全部的1个epoch的训练集数据进行梯度下降，并输出平均train_loss和train_acc
    train_loss, train_acc = train(model, device, train_iterator, optimizer, lossfunc)
    #使用验证集对已经更新好参数的模型进行一轮验证，并输出平均的val_loss和val_acc
    valid_loss, valid_acc = evaluate(model, device, valid_iterator, lossfunc)
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), model_path)
    print('Epoch:{0}|Train Loss:{1}|Train Acc:{2}|Val Loss:{3}|Val Acc:{4}'.format(epoch+1,train_loss,train_acc,valid_loss,valid_acc))
    

In [None]:
model.load_state_dict(torch.load(model_path))
test_loss,test_acc=evaluate(model,device,test_iterator,lossfunc)
print('Test Loss:{0}|Test Acc:{1}'.format(test_loss,test_acc))