In [1]:
import numpy as np
import torch
from datetime import datetime
from torch import nn
from torch.autograd import Variable
from torchvision.datasets import CIFAR10
from torchvision import transforms

In [2]:
def vgg_block(num_convs,in_channels,out_channels):
    net=[nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1),nn.ReLU()]
    for i in range(num_convs-1):
        net.append(nn.Conv2d(out_channels,out_channels,kernel_size=3,padding=1))
        net.append(nn.ReLU())
    
    net.append(nn.MaxPool2d(2,2))
    return nn.Sequential(*net)

In [3]:
def vgg_stack(num_convs,channels):
    net=[]
    for n,c in zip(num_convs,channels):
        in_channels=c[0]
        out_channels=c[1]
        net.append(vgg_block(n,in_channels,out_channels))
    return nn.Sequential(*net)

In [4]:
class vggnet(nn.Module):
    def __init__(self):
        super(vggnet,self).__init__()
        self.feature=vgg_stack((1, 1, 2, 2, 2), ((3, 64), (64, 128), (128, 256), (256, 512), (512, 512)))
        self.classify=nn.Sequential(
            nn.Linear(512,200),
            nn.ReLU(),
            nn.Linear(200,10),
        )
    def forward(self,x):
        x=self.feature(x)
        x=x.view(x.size(0),-1)
        x=self.classify(x)
        return x

net=vggnet()

In [29]:
def data_tf(x):
    x=np.array(x,dtype='float32')/255
    x=(x-0.5)/0.5
    x=x.transpose((2,0,1))
    x=torch.from_numpy(x)
    return x

In [30]:
train_set=CIFAR10('./data',train=True,transform=data_tf, download=True)
test_set=CIFAR10('./data',train=False,transform=data_tf, download=True)
print(train_set[0][0][0])

Files already downloaded and verified
Files already downloaded and verified
tensor([[ 1.0000,  1.0000,  1.0000,  ...,  0.5312,  0.6641,  0.4297],
        [ 1.0000,  0.9922,  0.9922,  ...,  0.3359,  0.2656,  0.1484],
        [ 1.0000,  0.9922,  1.0000,  ...,  0.4844,  0.3047, -0.0469],
        ...,
        [ 0.1641,  0.1172,  0.1016,  ..., -0.7578, -0.4844, -0.3984],
        [-0.0391, -0.0547, -0.0078,  ..., -0.8203, -0.2344,  0.1094],
        [-0.3125, -0.3047, -0.2031,  ..., -0.7266, -0.1719,  0.0859]])


In [7]:
def get_acc(output, label):
    total = output.shape[0]
    _, pred_label = output.max(1)
    num_correct = (pred_label == label).sum().item()
    return num_correct / total

def set_learning_rate(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [8]:
def train(net, train_data, valid_data, num_epochs, optimizer, criterion):
    prev_time = datetime.now()
    net=net.cuda()
    for epoch in range(num_epochs):
        if epoch==15:
            set_learning_rate(optimizer,1e-2)
        train_loss = 0
        train_acc = 0
        net = net.train()
        for im, label in train_data:
            im = Variable(im)
            label = Variable(label)
            im=im.cuda()
            label=label.cuda()
            # forward
            output = net(im)
            loss = criterion(output, label)
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            train_acc += get_acc(output, label)
            
        cur_time = datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        
        if valid_data is not None:
            valid_loss = 0
            valid_acc = 0
            net = net.eval()
            for im, label in valid_data:
                im = Variable(im, volatile=True)
                label = Variable(label, volatile=True)
                im=im.cuda()
                label=label.cuda()
                
                output = net(im)
                loss = criterion(output, label)
                
                valid_loss += loss.item()
                valid_acc += get_acc(output, label)
            epoch_str = (
                "Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, "
                % (epoch, train_loss / len(train_data),
                   train_acc / len(train_data), valid_loss / len(valid_data),
                   valid_acc / len(valid_data)))
        else:
            epoch_str = ("Epoch %d. Train Loss: %f, Train Acc: %f, " %
                         (epoch, train_loss / len(train_data),
                          train_acc / len(train_data)))
        prev_time = cur_time
        print(epoch_str + time_str)

In [9]:
from torch.utils.data import DataLoader
train_data = DataLoader(train_set, batch_size=64, shuffle=True)
test_data = DataLoader(test_set, batch_size=128, shuffle=False)

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=1e-1) # 使用随机梯度下降，学习率 0.1

In [None]:
train(net,train_data,test_data,20,optimizer,criterion)



Epoch 0. Train Loss: 2.925825, Train Acc: 0.248481, Valid Loss: 3.085552, Valid Acc: 0.220926, Time 00:00:42
Epoch 1. Train Loss: 2.864115, Train Acc: 0.261449, Valid Loss: 3.006512, Valid Acc: 0.243473, Time 00:00:45
Epoch 2. Train Loss: 2.801509, Train Acc: 0.273438, Valid Loss: 3.322412, Valid Acc: 0.189873, Time 00:00:45
Epoch 3. Train Loss: 2.736911, Train Acc: 0.288103, Valid Loss: 2.858321, Valid Acc: 0.267306, Time 00:00:45
Epoch 4. Train Loss: 2.675574, Train Acc: 0.298414, Valid Loss: 3.238603, Valid Acc: 0.214695, Time 00:00:45
