In [1]:
import torch

In [2]:
import gzip, struct

In [3]:
import numpy as np

In [4]:
import torch

In [5]:
from torch import nn

In [6]:
from torch.autograd import Variable

In [7]:
from torch.nn import functional as F

In [8]:
import torch.optim as optim

In [9]:
from torch.utils.data import TensorDataset, DataLoader

In [10]:
from torchvision import transforms

In [11]:
import torchvision.datasets as datasets

In [12]:
import math
import os
import random

In [13]:
data_trains = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3082,))
])

In [14]:
train_data=datasets.CIFAR10('data',train=True,download=True, transform = data_trains)
test_data=datasets.CIFAR10('data',train=False,download=True, transform = data_trains)

Files already downloaded and verified
Files already downloaded and verified


In [15]:
n_train = int(len(train_data)*0.9)
n_validation = len(train_data) - n_train

In [16]:
batch_size=64

In [17]:
train_data, valid_data = torch.utils.data.random_split(train_data,[n_train, n_validation])

In [18]:
#制作数据的iterator
train_iterator = torch.utils.data.DataLoader(train_data,shuffle=True,batch_size=batch_size)
valid_iterator = torch.utils.data.DataLoader(valid_data, batch_size=batch_size)
test_iterator = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

In [19]:
import torch.nn.init as init
class Fire(nn.Module):
    def __init__(self,inplanes,s1,e1,e3):
        super(Fire,self).__init__()
        self.inplanes=inplanes
        self.squeeze=nn.Conv2d(inplanes,s1,kernel_size=1)
        self.squeeze_activation=nn.ReLU(inplace=True)
        self.expand1x1=nn.Conv2d(s1,e1,kernel_size=1)
        self.expand1x1_activation=nn.ReLU(inplace=True)
        self.expand3x3=nn.Conv2d(s1,e3,kernel_size=True)
        self.expand3x3_activation=nn.ReLU(inplace=True)
    def forward(self,x):
        x=self.squeeze_activation(self.squeeze(x))
        return torch.cat([
            self.expand1x1_activation(self.expand1x1(x)),
            self.expand3x3_activation(self.expand3x3(x))
        ],1)
    
class SqueezeNet(nn.Module):

    def __init__(self, version=1.0, num_classes=10):
        super(SqueezeNet, self).__init__()
        if version not in [1.0, 1.1]:
            raise ValueError("Unsupported SqueezeNet version {version}:"
                             "1.0 or 1.1 expected".format(version=version))
        self.num_classes = num_classes
        if version == 1.0:
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=7, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(96, 16, 64, 64),
                Fire(128, 16, 64, 64),
                Fire(128, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 32, 128, 128),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(512, 64, 256, 256),
            )
        else:
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64),
                Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128),
                Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                Fire(512, 64, 256, 256),
            )
        # Final convolution is initialized differently form the rest
        final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            final_conv,
            #nn.ReLU(inplace=True),
            #nn.AvgPool2d(4, stride=1)
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m is final_conv:
                    init.normal(m.weight.data, mean=0.0, std=0.01)
                else:
                    init.kaiming_uniform(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x.view(x.size(0), self.num_classes)

In [20]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [21]:
model=SqueezeNet().to(device)#构建了一个计算图模型并载入到了内存



In [22]:
model_dir='models'
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
    
model_path=os.path.join(model_dir, 'resnet_mnist.pt')
print(model_path)

models\resnet_mnist.pt


In [23]:
optimizer = optim.Adam(model.parameters())

In [24]:
lossfunc = nn.CrossEntropyLoss()

In [25]:
def accu(fx, y):
    pred = fx.max(1, keepdim=True)[1]#argmax得到预测的类型
    correct=pred.eq(y.view_as(pred)).sum()#得到正确答案的数量
    acc = correct.float()/pred.shape[0]
    return acc

In [26]:
def train(model, device, iterator, optimizer, lossfunc):#训练一个epoch
    epoch_loss = 0
    epoch_acc = 0
    model.train()              #将model的状态改成PHASE=Train
    for (x,y) in iterator:     #加载每一个minibatch
        x = x.to(device)       #将张量加入到设备中CPU/GPU
        y = y.to(device)       #将label加入到设备中CPU/GPU
        optimizer.zero_grad()  #将所有的神经元/参数的梯度“寄存器”都置零
        fx = model(x)          #对于输入的训练样本进行模型预测
        loss=lossfunc(fx, y)   #计算预测与label之间的差距
        acc = accu(fx,y)       #计算预测的准确率，用来做显示
        loss.backward()        #通过loss求得各个参数的梯度，求得的梯度寄存在梯度“寄存器”中
        optimizer.step()       #对于参数进行统一的更新
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [27]:
#做一个评测集/验证集的完整性评测并给出相应的分数
def evaluate(model, device, iterator, lossfunc):
    epoch_loss = 0
    epoch_acc = 0
    model.eval()       #将模型的状态参数改为PHASE=Eval，这样不用自动求导
    with torch.no_grad():
        for (x, y) in iterator:
            x = x.to(device)
            y = y.to(device)
            fx = model(x)
            loss = lossfunc(fx, y)
            acc = accu(fx, y)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [28]:
epochs = 1
best_valid_loss=float('inf')
model

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(96, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace)
      (expand3x3): Conv2d(16, 64, kernel_size=(True, True), stride=(1, 1))
      (expand3x3_activation): ReLU(inplace)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace)
      (expand3x3): Conv2d(16, 64, kernel_size=(True, True), stride=(1, 1))
      (expand3x3_activation): ReLU(inplace)
    )
    (5): Fire(
      (squeeze): Conv2d(128, 32, kernel_size=(1, 1), strid

In [None]:
for epoch in range(epochs):
    #对于全部的1个epoch的训练集数据进行梯度下降，并输出平均train_loss和train_acc
    train_loss, train_acc = train(model, device, train_iterator, optimizer, lossfunc)
    #使用验证集对已经更新好参数的模型进行一轮验证，并输出平均的val_loss和val_acc
    valid_loss, valid_acc = evaluate(model, device, valid_iterator, lossfunc)
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), model_path)
    print('Epoch:{0}|Train Loss:{1}|Train Acc:{2}|Val Loss:{3}|Val Acc:{4}'.format(epoch+1,train_loss,train_acc,valid_loss,valid_acc))
    

In [None]:
model.load_state_dict(torch.load(model_path))
test_loss,test_acc=evaluate(model,device,test_iterator,lossfunc)
print('Test Loss:{0}|Test Acc:{1}'.format(test_loss,test_acc))