In [1]:
import time

import torch
import torch.nn as nn
import torch.utils.data
import torchvision.datasets as datasets
import numpy as np

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        def conv_bn(inp, oup, stride):
            return nn.Sequential(
                nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
                nn.BatchNorm2d(oup),
                nn.ReLU(inplace=True)
            )

        def conv_dw(inp, oup, stride):
            return nn.Sequential(
                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
                nn.BatchNorm2d(inp),
                nn.ReLU(inplace=True),
    
                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
                nn.ReLU(inplace=True),
            )

        self.model = nn.Sequential(
            conv_bn(  1,  32, 2), 
            conv_dw( 32,  64, 1),
            conv_dw( 64, 128, 2),
            conv_dw(128, 256, 2),
            conv_dw(256, 512, 2),
            conv_dw(512, 1024, 1),
            nn.AvgPool2d(7),
        )
        self.fc = nn.Linear(1024, 7)

    def forward(self, x):
        x = self.model(x)
        x = x.view(-1, 1024)
        x = self.fc(x)
        return x


In [3]:
model = Net()

In [4]:
f_file = "data/train.csv"
datas = []
with open(f_file) as file:
    for line_id, line in enumerate(file):
        if line_id == 0:
            continue
        else:
            label, feature = line.split(',')
            feature = np.fromstring(feature, dtype=int, sep=' ')
            feature = feature.reshape((1, 48, 48))
            
            datas.append((feature, int(label)))
features, labels = zip(*datas)

features = np.asarray(features)
print(features.shape)
# labels = to_categorical(np.asarray(labels, dtype = np.int32))

(28709, 1, 48, 48)


In [5]:
np.random.shuffle(datas)

In [6]:
batch_size = 256
workers = 4
train_loader = torch.utils.data.DataLoader(
    datas[2000:],
    batch_size=batch_size, shuffle=True,
    num_workers=workers, pin_memory=True,
    )

val_loader = torch.utils.data.DataLoader(
    datas[:2000],
    batch_size=batch_size, shuffle=False,
    num_workers=workers, pin_memory=True)

In [7]:
def train(train_loader, model, criterion, optimizer, epoch):
    print_freq = 10
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

#         target = target(async=True)
        input = input.float()
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1, input.size(0))
        top5.update(prec5, input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1, top5=top5))
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
#         target = target(async=True)
        input = input.float()
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1, input.size(0))
        top5.update(prec5, input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   i, len(val_loader), batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))

    return top1.avg

In [8]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [9]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [10]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [None]:
criterion = nn.CrossEntropyLoss()
best_prec1 = 0
lr = 0.1
momentum = 0.9
weight_decay = 1e-4
optimizer = torch.optim.SGD(model.parameters(), lr,
                                momentum=momentum,
                                weight_decay=weight_decay)
for epoch in range(200):
#     adjust_learning_rate(optimizer, epoch)
    lr = lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
        
    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    prec1 = validate(val_loader, model, criterion)

    # remember best prec@1 and save checkpoint
    is_best = prec1 > best_prec1
    best_prec1 = max(prec1, best_prec1)
    save_checkpoint({
        'epoch': epoch + 1,
        'arch': args.arch,
        'state_dict': model.state_dict(),
        'best_prec1': best_prec1,
        'optimizer' : optimizer.state_dict(),
    }, is_best)

Epoch: [0][0/105]	Time 1.445 (1.445)	Data 0.057 (0.057)	Loss 1.9815 (1.9815)	Prec@1 5.078 (5.078)	Prec@5 71.094 (71.094)
Epoch: [0][10/105]	Time 1.268 (1.298)	Data 0.001 (0.006)	Loss 2.0631 (1.9014)	Prec@1 15.234 (21.413)	Prec@5 87.500 (85.227)
Epoch: [0][20/105]	Time 1.218 (1.277)	Data 0.001 (0.003)	Loss 1.7994 (1.8797)	Prec@1 23.828 (22.582)	Prec@5 89.844 (86.942)
Epoch: [0][30/105]	Time 1.235 (1.262)	Data 0.001 (0.003)	Loss 1.8094 (1.8607)	Prec@1 22.266 (23.097)	Prec@5 90.625 (87.563)
Epoch: [0][40/105]	Time 1.246 (1.252)	Data 0.001 (0.002)	Loss 1.6942 (1.8379)	Prec@1 29.688 (24.019)	Prec@5 91.016 (88.176)
Epoch: [0][50/105]	Time 1.272 (1.249)	Data 0.001 (0.002)	Loss 1.7363 (1.8249)	Prec@1 28.516 (24.862)	Prec@5 91.797 (88.741)
Epoch: [0][60/105]	Time 1.232 (1.248)	Data 0.001 (0.002)	Loss 1.6458 (1.8099)	Prec@1 33.594 (25.640)	Prec@5 95.312 (89.306)
Epoch: [0][70/105]	Time 1.237 (1.248)	Data 0.001 (0.002)	Loss 1.6504 (1.7862)	Prec@1 35.156 (27.074)	Prec@5 93.359 (89.877)
Epoch: [0][

In [69]:
o

tensor([[-0.0115,  0.0792, -0.1836,  0.3623, -0.0815,  0.2355,  0.1580],
        [-0.0224,  0.0808, -0.1764,  0.2261, -0.1029, -0.0853,  0.0463]],
       grad_fn=<AddmmBackward>)