In [1]:
from __future__ import print_function
import torch
from torch.autograd import Variable, Function
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import scipy.io as sio
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

from minimax_entropy import MinimaxEntropyEstimator
from pickle import dump

In [2]:
# global params
batch_size = 64
test_batch_size = 1000
random_seed = 1
n_classes = 10

entro = MinimaxEntropyEstimator('poly_coeff_entro.mat', gpu=True)

TypeError: __init__() takes at least 3 arguments (3 given)

In [None]:
# loading data
torch.cuda.manual_seed(random_seed)

kwargs = {'num_workers': 1, 'pin_memory': True}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=False,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=test_batch_size, shuffle=True, **kwargs)

In [3]:
class MNISTModel(nn.Module):
    def __init__(self, dropout=0.):
        super(MNISTModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d(dropout)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
        
        self._dropout = dropout

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.dropout(F.relu(self.fc1(x)), p=self._dropout, training=self.training)
        x = F.softmax(self.fc2(x))
        return x

In [4]:
def pred_from_output(output, var=True):
    pred = output.data.max(1, keepdim=True)[1]
    return Variable(pred) if var else pred

def metric_accuracy(model, X, Y):
    output = model(X)
    pred = pred_from_output(output, var=False)
    correct = pred.eq(Y.data.view_as(pred)).cpu().sum()
    
    return correct/1./Y.size()[0]

def metric_loss_gen(L, convert_onehot=False):
    def metric_loss(model, X, Y):
        output = model(X)
        
        if convert_onehot:
            batch_size, n_classes = output.size()
            target = Variable(torch.DoubleTensor(batch_size, n_classes)).cuda()
            for i in range(batch_size):
                target[i, Y.data[i]] = 1.
            pred = output
        else:
            target = Y
            pred = pred_from_output(output)
            
        pred = pred.double()
        losses = [L(pred[i], target[i]).data.cpu().numpy() for i in range(batch_size)]
        return np.mean(losses)
        
    return metric_loss    

def eval_model(model, metrics, data_loader, n_batches=0):
    model.eval()
    
    ind_results = {key:[] for key in metrics}
    for t, (X, Y) in enumerate(data_loader):
        X, Y = X.cuda(), Y.cuda()
        X, Y = Variable(X, volatile=True), Variable(Y)
        
        for key in ind_results:
            ind_results[key].append(metrics[key](model, X, Y))
        
        if n_batches != 0 and t == n_batches:
            break
            
    results = {key: np.mean(val) for key, val in ind_results.items()}
    return results
            
def train(model, opt, L, train_loader, test_loader, n_classes, epoch,
          n_samples=1, convert_onehot=False, log_interval=100):
    model.train()
        
    evals = {'tr':[], 't':[], 'ts': [], 'es': []}
    for e in range(epoch):
        for t, (X, Y) in enumerate(train_loader):
            X, Y = X.cuda(), Y.cuda()
            X, Y = Variable(X), Variable(Y)
            opt.zero_grad()

            batch_size = Y.size()[0]
            if convert_onehot:
                target_onehot = Variable(torch.DoubleTensor(batch_size, n_classes)).cuda()
                for i in range(batch_size):
                    target_onehot[i, Y.data[i]] = 1.
                target = target_onehot
            else:
                target = Y

            mean_output = Variable(torch.DoubleTensor(batch_size, n_classes)).cuda()
            
            outputs = [model(X).double() for _ in range(n_samples)]
            for i in range(batch_size):
                for output in outputs:
                    mean_output[i] = mean_output[i] + output[i]
            mean_output /= n_samples

            loss = Variable(torch.zeros(1)).double().cuda()
            for i in range(batch_size):
                loss += L(output[i], target[i])
            loss.backward()

            opt.step()
            if t % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]'.format(
                    e, t * len(X), len(train_loader.dataset),
                    100. * t / len(train_loader)))

                mean_loss = loss.data[0] / batch_size
                tr_evals = eval_model(model, {'accu': metric_accuracy}, train_loader, 5)
                t_evals = eval_model(model, {'loss': metric_loss_gen(L, convert_onehot), 'accu': metric_accuracy}, test_loader, 0)
                print('Train| Loss: {:.6f} | Accu: {:.2f}%'.format(mean_loss, tr_evals['accu'] * 100))
                print('Test | Loss: {:.6f} | Accu: {:.2f}%'.format(t_evals['loss'], t_evals['accu'] * 100))
                
                evals['tr'].append(tr_evals)
                evals['tr'][-1]['loss'] = mean_loss
                evals['t'].append(t_evals)
                evals['ts'].append(t)
                evals['es'].append(e)
            
    return evals

In [5]:
# Training
epochs = 1
log_interval = 50
dropout = 0.3
n_samples = 50

In [6]:
model_mm = MNISTModel(dropout=dropout)
model_mm.cuda()
opt = optim.Adam(model_mm.parameters())

mm_evals = train(model_mm, opt, entro.minimax_cross_entro_loss, train_loader, test_loader, n_classes, epoch,
         n_samples=n_samples, convert_onehot=True, log_interval=log_interval)

NameError: name 'entro' is not defined

In [169]:
with open('results/mnist_mm_evals.pkl', 'w') as f:
    dump(mm_evals, f)

In [165]:
model = MNISTModel(dropout=dropout)
model.cuda()
opt = optim.Adam(model.parameters())

mle_evals = train(model, opt, entro.cross_entro_loss, train_loader, test_loader, n_classes, epoch,
         n_samples=n_samples, convert_onehot=True, log_interval=log_interval)

Train| Loss: 3.287637 | Accu: 10.16%
{'accu': 0.10669999999999999, 'loss': 3.2391072843750024}
Test | Loss: 3.239107 | Accu: 10.67%
Train| Loss: 0.890246 | Accu: 84.64%
{'accu': 0.82469999999999977, 'loss': 0.92736552437999697}
Test | Loss: 0.927366 | Accu: 82.47%
Train| Loss: 0.350702 | Accu: 86.98%
{'accu': 0.91280000000000006, 'loss': 0.49408003280185786}
Test | Loss: 0.494080 | Accu: 91.28%
Train| Loss: 0.426072 | Accu: 90.89%
{'accu': 0.93720000000000003, 'loss': 0.3688450110859246}
Test | Loss: 0.368845 | Accu: 93.72%
Train| Loss: 0.237211 | Accu: 94.27%
{'accu': 0.9494999999999999, 'loss': 0.29456302777046467}
Test | Loss: 0.294563 | Accu: 94.95%
Train| Loss: 0.190189 | Accu: 92.71%
{'accu': 0.95419999999999994, 'loss': 0.2744839988554752}
Test | Loss: 0.274484 | Accu: 95.42%
Train| Loss: 0.181120 | Accu: 94.53%
{'accu': 0.96329999999999993, 'loss': 0.21491335787594865}
Test | Loss: 0.214913 | Accu: 96.33%
Train| Loss: 0.145831 | Accu: 97.40%
{'accu': 0.96629999999999983, 'loss'

In [167]:
with open('results/mnist_mle_evals.pkl', 'w') as f:
    dump(mle_evals, f)