In [19]:
from torch import nn
import numpy as np
import torch
from torch.autograd import Function
from torch.autograd import Variable

# Setup

### Parameters

In [20]:
batch_size_train = 200
batch_size_test = 1000
D = 784

### Load MNIST data

In [21]:
import torch.utils.data
from torchvision import datasets, transforms
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('MNIST_data', train=True, download=True,
               transform=transforms.Compose([
                   transforms.ToTensor(),
                   transforms.Normalize((0.1307,), (0.3081,))
               ])),
    batch_size=batch_size_train, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('MNIST_data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=batch_size_test, shuffle=True)

# Train Feature Selector

In [22]:
def acc_fn(x1, x2, y1, y2):
    predictions = torch.sign(torch.sum(x1 * x2, dim=1))
    similarities = torch.sign(torch.eq(y1, y2).double() - 0.5)
    return torch.mean(torch.eq(predictions, similarities).double())

def hinge_loss_fn(x1, x2, y1, y2, gamma=0.5):    
    inner_products = torch.sum(x1 * x2, dim=1)
    same_class = torch.sign(torch.eq(y1, y2).double() - 0.5)
    scores = torch.clamp(gamma - same_class * inner_products, min=0)
    return torch.mean(scores)

def logistic_loss_fn(x1, x2, y1, y2):
    inner_products = torch.sum(x1 * x2, dim=1)
    same_class = torch.sign(torch.eq(y1, y2).double() - 0.5)
    scores = torch.log1p(torch.exp(-same_class * inner_products))
    return torch.mean(scores)

In [23]:
def features_train(epoch, shape):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data1, data2 = Variable(data).view(shape).chunk(2)
        classes1, classes2 = Variable(target).view(data.shape[0], 1).chunk(2)
        optimizer.zero_grad()
        features1, features2 = model(data1).double(), model(data2).double()
        loss = loss_fn(features1, features2, classes1, classes2)
        loss.backward()
        optimizer.step()        
        if batch_idx % 25 == 0:
            acc = acc_fn(features1, features2, classes1, classes2)
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0], acc.data[0]))

def features_test(shape):
    model.eval()
    loss, acc, total = 0, 0, 0
    for data, target in test_loader:
        data1, data2 = Variable(data).view(shape).chunk(2)
        classes1, classes2 = Variable(target).view(data.shape[0], 1).chunk(2)
        optimizer.zero_grad()
        features1, features2 = model(data1).double(), model(data2).double()
        loss += loss_fn(features1, features2, classes1, classes2)
        acc += acc_fn(features1, features2, classes1, classes2)
        total += 1

    loss /= total
    acc /= total
    print('Test set: Loss: {:.4f}, Accuracy: {:.4f}%'.format(loss.data[0], 100. * acc.data[0]))
    return acc.data[0]

# Models

### 3 features, 3-layer NN

In [24]:
for lr in [0.0001]:
    for wd in [0.001]:
        loss_fn = hinge_loss_fn
        model = torch.nn.Sequential(
            torch.nn.Linear(D, 128),
            torch.nn.BatchNorm1d(128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 64),
            torch.nn.BatchNorm1d(64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 64),
            torch.nn.BatchNorm1d(64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 3)
        )   
        optimizer = torch.optim.Adam(model.parameters(), lr = lr, weight_decay=wd)
        print('hinge loss | lr: {}, weight_decay: {}'.format(lr, wd))
        for epoch in range(8):
            features_train(epoch, (batch_size_train, -1))
            features_test((batch_size_test, -1))
        print()
dense = model

hinge loss | lr: 0.0001, weight_decay: 0.001
Test set: Loss: 0.4980, Accuracy: 49.7886%
Test set: Loss: 0.4996, Accuracy: 49.7446%
Test set: Loss: 0.5007, Accuracy: 49.1751%
Test set: Loss: 0.5007, Accuracy: 50.2915%
Test set: Loss: 0.5001, Accuracy: 50.3640%
Test set: Loss: 0.5008, Accuracy: 51.2474%
Test set: Loss: 0.4997, Accuracy: 50.0958%
Test set: Loss: 0.5012, Accuracy: 48.5463%



In [32]:
class conv_net(nn.Module):
    def __init__(self, num_classes=1000):
        super(conv_net, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, 3),
            nn.ReLU(inplace=True),
            nn.Conv2d(16, 32, 3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=1),
            nn.Conv2d(32, 64, 3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(64 * 10 * 10, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 3),
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 64 * 10 * 10)
        x = self.classifier(x)
        return x
model = conv_net()
loss_fn = hinge_loss_fn
optimizer = torch.optim.Adam(model.parameters(), lr = 0.00001, weight_decay=1)
for epoch in range(5):
    features_train(epoch, (batch_size_train, 1, 28, 28))
    features_test((batch_size_test, 1, 28, 28))
print()
conv = model

Test set: Loss: 0.5009, Accuracy: 9.7600%
Test set: Loss: 0.5006, Accuracy: 10.0800%
Test set: Loss: 0.5005, Accuracy: 10.3600%
Test set: Loss: 0.5004, Accuracy: 10.4200%
Test set: Loss: 0.5003, Accuracy: 9.4800%



# Train linear classifier

In [None]:
import tensorflow as tf
from sklearn.svm import LinearSVC
mnist = tf.contrib.learn.datasets.load_dataset("mnist")
train_data = mnist.train.images # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
test_data = mnist.test.images # Returns np.array
test_labels = np.asarray(mnist.test.labels, dtype=np.int32)

In [30]:
svc = LinearSVC(random_state=0)
# features_train = model(Variable(torch.from_numpy(train_data.reshape(train_data.shape[0],1,28,28)))).data.numpy()
f = model(Variable(torch.from_numpy(test_data.reshape(test_data.shape[0],1,28,28)))).data.numpy()
svc.fit(f, test_labels)
# print("train accuracy: {:.4f}".format(svc.score(features_train, train_labels)))
print("test accuracy: {:.4f}".format(svc.score(f, test_labels)))

test accuracy: 0.1135
