In [1]:
import numpy as np
import torch
from torch.autograd import Function
from torch.autograd import Variable

# Load MNIST data

In [2]:
import torch.utils.data
from torchvision import datasets, transforms
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
               transform=transforms.Compose([
                   transforms.ToTensor(),
                   transforms.Normalize((0.1307,), (0.3081,))
               ])),
    batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=1000, shuffle=True)

# Train Feature Selector

In [181]:
def acc_fn(features, classes):
    inner_products = features.mm(features.t())
    predictions = torch.sign(inner_products)
    class_sims = 2 * classes.mm(classes.t()) - 1
    num_incorrect = torch.sum(torch.abs(predictions - class_sims))/2
    num_correct = torch.sum(torch.abs(predictions + class_sims))/2
    return num_correct.double()

def hinge_loss_fn(features, classes, gamma=1.0):    
    inner_products = features.mm(features.t())
    same_class = 2 * classes.mm(classes.t()) - 1
    scores = torch.clamp(gamma - torch.mul(same_class, inner_products), min=0)
    output = torch.sum(scores)
    return output

def logistic_loss_fn(features, classes):
    inner_products = features.mm(features.t())
    same_class = 2 * classes.mm(classes.t()) - 1
    scores = torch.log1p(torch.exp(-same_class * inner_products))
    output = torch.sum(scores)
    return output

def reg_fn(x, alpha = 1):
    return alpha * torch.sum(x **2)

In [177]:
def train(epoch):
    model.train()
    classes = Variable(torch.DoubleTensor(128, 10))
    for batch_idx, (data, target) in enumerate(train_loader):
        if (data.shape[0] != 128):
            classes = Variable(torch.DoubleTensor(data.shape[0], 10))
            data = Variable(data).view(data.shape[0], 784)
            target = Variable(target).view(data.shape[0], 1)
        else:
            data = Variable(data).view(128, 784)
            target = Variable(target).view(128, 1)
        classes.zero_()
        classes.scatter_(1, target, 1)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fn(output.double(), classes.double()) + reg_fn(output.double())
        loss.backward()
        optimizer.step()
        acc = acc_fn(output.double(), classes.double())
        if False and batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

def test():
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    classes = Variable(torch.DoubleTensor(50, 10))
    for data, target in test_loader:
        if (data.shape[0] != 50):
            classes = Variable(torch.DoubleTensor(data.shape[0], 10))
            data = Variable(data).view(data.shape[0], 784)
            target = Variable(target).view(data.shape[0], 1)
        else:
            data = Variable(data).view(50, 784)
            target = Variable(target).view(50, 1)
        classes.zero_()
        classes.scatter_(1, target, 1)
        optimizer.zero_grad()
        output = model(data)
        test_loss += loss_fn(output.double(), classes.double()) + reg_fn(output.double())
        correct += acc_fn(output.double(), classes.double())
        total += data.shape[0] * data.shape[0]

    test_loss /= total
    correct /= total
    print('Test set: Average loss: {:.4f}, Accuracy: {:.4f}%'.format(
        test_loss.data[0], 100. * correct.data[0]))
    return correct.data[0]

### 32 features, 2-layer NN

In [54]:
for lr in [0.005, 0.01]:
    loss_fn = logistic_loss_fn
    model = torch.nn.Sequential(
        torch.nn.Linear(784, 64),
        torch.nn.BatchNorm1d(64),
        torch.nn.ReLU(),
        torch.nn.Linear(64, 32)
    )
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    print('logistic loss, lr: {}'.format(lr))
    for epoch in range(5):
        train(epoch)
        test()
    print()
    
for lr in [0.005, 0.01]:
    loss_fn = hinge_loss_fn
    model = torch.nn.Sequential(
        torch.nn.Linear(784, 64),
        torch.nn.BatchNorm1d(64),
        torch.nn.ReLU(),
        torch.nn.Linear(64, 32)
    )    
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    print('hinge loss, lr: {}'.format(lr))
    for epoch in range(5):
        train(epoch)
        test()
    print()

logistic loss, lr: 0.005
Test set: Average loss: 0.3883, Accuracy: 89.6519%
Test set: Average loss: 0.3418, Accuracy: 91.8059%
Test set: Average loss: 0.3104, Accuracy: 92.9372%
Test set: Average loss: 0.2925, Accuracy: 93.6002%
Test set: Average loss: 0.2851, Accuracy: 94.1550%

logistic loss, lr: 0.01
Test set: Average loss: 0.3580, Accuracy: 90.2348%
Test set: Average loss: 0.3161, Accuracy: 93.0629%
Test set: Average loss: 0.2859, Accuracy: 93.8923%
Test set: Average loss: 0.2950, Accuracy: 93.9531%
Test set: Average loss: 0.2829, Accuracy: 94.5218%

hinge loss, lr: 0.005
Test set: Average loss: 0.3190, Accuracy: 91.0822%
Test set: Average loss: 0.2516, Accuracy: 93.3953%
Test set: Average loss: 0.2111, Accuracy: 94.2449%
Test set: Average loss: 0.1986, Accuracy: 94.6151%
Test set: Average loss: 0.1864, Accuracy: 95.0634%

hinge loss, lr: 0.01
Test set: Average loss: 0.3010, Accuracy: 91.7323%
Test set: Average loss: 0.2310, Accuracy: 93.8121%
Test set: Average loss: 0.2068, Accura

### 5 features, 2-layer NN

In [55]:
for lr in [0.0005, 0.001, 0.005, 0.01]:
    loss_fn = logistic_loss_fn
    model = torch.nn.Sequential(
        torch.nn.Linear(784, 64),
        torch.nn.BatchNorm1d(64),
        torch.nn.ReLU(),
        torch.nn.Linear(64, 5)
    )
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    print('logistic loss, lr: {}'.format(lr))
    for epoch in range(5):
        train(epoch)
        test()
    print()
    
for lr in [0.0005, 0.001, 0.005, 0.01]:
    loss_fn = hinge_loss_fn
    model = torch.nn.Sequential(
        torch.nn.Linear(784, 64),
        torch.nn.BatchNorm1d(64),
        torch.nn.ReLU(),
        torch.nn.Linear(64, 5)
    )    
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    print('hinge loss, lr: {}'.format(lr))
    for epoch in range(5):
        train(epoch)
        test()
    print()

logistic loss, lr: 0.0005
Test set: Average loss: 0.5781, Accuracy: 73.8855%
Test set: Average loss: 0.5564, Accuracy: 75.3544%
Test set: Average loss: 0.5448, Accuracy: 75.1283%
Test set: Average loss: 0.5350, Accuracy: 77.1252%
Test set: Average loss: 0.5232, Accuracy: 76.3542%

logistic loss, lr: 0.001
Test set: Average loss: 0.5572, Accuracy: 75.7373%
Test set: Average loss: 0.5337, Accuracy: 76.4281%
Test set: Average loss: 0.5150, Accuracy: 76.6726%
Test set: Average loss: 0.5032, Accuracy: 75.3771%
Test set: Average loss: 0.4902, Accuracy: 75.6581%

logistic loss, lr: 0.005
Test set: Average loss: 0.5003, Accuracy: 75.9919%
Test set: Average loss: 0.4645, Accuracy: 77.1719%
Test set: Average loss: 0.4441, Accuracy: 77.5151%
Test set: Average loss: 0.4390, Accuracy: 78.0368%
Test set: Average loss: 0.4475, Accuracy: 77.7977%

logistic loss, lr: 0.01
Test set: Average loss: 0.4870, Accuracy: 75.7246%
Test set: Average loss: 0.4526, Accuracy: 77.0228%
Test set: Average loss: 0.4339

### 5 features, 3-layer NN

In [182]:
feature_selector = None
best_acc = 0

In [183]:
for lr in [0.005]:
    loss_fn = logistic_loss_fn
    model = torch.nn.Sequential(
        torch.nn.Linear(784, 64),
        torch.nn.BatchNorm1d(64),
        torch.nn.ReLU(),
        torch.nn.Linear(64, 64),
        torch.nn.BatchNorm1d(64),
        torch.nn.ReLU(),
        torch.nn.Linear(64, 5)
    )
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    print('logistic loss, lr: {}'.format(lr))
    for epoch in range(10):
        train(epoch)
        acc = test()
        if acc > best_acc:
            best_acc = acc
            feature_selector = model
    print()
    
for lr in [0.005, 0.01]:
    loss_fn = hinge_loss_fn
    model = torch.nn.Sequential(
        torch.nn.Linear(784, 64),
        torch.nn.BatchNorm1d(64),
        torch.nn.ReLU(),
        torch.nn.Linear(64, 5)
    )    
    optimizer = torch.optim.Adam(model.parameters(), lr = lr)
    print('hinge loss, lr: {}'.format(lr))
    for epoch in range(10):
        train(epoch)
        acc = test()
        if acc > best_acc:
            best_acc = acc
            feature_selector = model
    print()

logistic loss, lr: 0.005
Test set: Average loss: 0.4488, Accuracy: 77.5119%
Test set: Average loss: 0.4170, Accuracy: 78.9075%
Test set: Average loss: 0.4195, Accuracy: 79.2945%
Test set: Average loss: 0.4072, Accuracy: 79.1910%
Test set: Average loss: 0.4053, Accuracy: 79.4282%
Test set: Average loss: 0.4122, Accuracy: 79.6226%
Test set: Average loss: 0.4000, Accuracy: 79.6144%
Test set: Average loss: 0.4053, Accuracy: 79.4527%
Test set: Average loss: 0.4069, Accuracy: 79.7583%
Test set: Average loss: 0.4001, Accuracy: 79.6932%

hinge loss, lr: 0.005
Test set: Average loss: 0.5517, Accuracy: 83.2714%
Test set: Average loss: 0.5190, Accuracy: 84.2642%
Test set: Average loss: 0.4995, Accuracy: 84.7137%
Test set: Average loss: 0.4960, Accuracy: 85.2199%
Test set: Average loss: 0.4899, Accuracy: 85.4691%
Test set: Average loss: 0.4836, Accuracy: 85.5986%
Test set: Average loss: 0.4781, Accuracy: 85.9910%
Test set: Average loss: 0.4735, Accuracy: 85.8993%
Test set: Average loss: 0.4728, Ac

# Train linear classifier

In [252]:
import torch.nn.functional as F

def acc_fn(output, target):    
    classes = torch.max(output, dim=1)[1]  
    num_equal = torch.sum(torch.clamp(-torch.abs(classes - target) + 1,min=0))
#     num_equal = torch.sum(torch.eq(classes, target))
#     print(classes)
#     print(target)
    return num_equal

def mm_loss_fn(output, target, gamma=1.0):
    scores = torch.clamp(gamma + torch.max(output, dim=1)[0] - output.gather(1, target.view(-1,1)), min=0)
    loss = torch.sum(scores)
    return loss

def log_loss_fn(output, target):
    log_softmax = F.log_softmax(output, dim=1)
    scores = log_softmax.gather(1, target.view(-1,1))
    loss = -torch.sum(scores)
    return loss

def train_classifier(epoch):
    linear_classifier.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        features = feature_selector(data.view(data.shape[0], 784))
        output = linear_classifier(features)
        loss = mm_loss_fn(output, target) + reg_fn(output)
        loss.backward()
        optimizer.step()
        if False and batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

def test_classifier():
    linear_classifier.eval()
    test_loss = 0
    correct = 0
    total = 0
    for data, target in test_loader:
        data, target = Variable(data), Variable(target)
        features = feature_selector(data.view(data.shape[0], 784))      
        optimizer.zero_grad()
        output = linear_classifier(features)        
        test_loss += mm_loss_fn(output, target) + reg_fn(output)
        correct += acc_fn(output, target)
        total += data.shape[0]

    test_loss /= total
    correct = correct.double()
    correct /= total
    print('Test set: Average loss: {:.4f}, Accuracy: {:.4f}%'.format(
        test_loss.data[0], 100. * correct.data[0]))
    return correct.data[0]

In [254]:
loss_fn = log_loss_fn
linear_classifier = torch.nn.Sequential(
    torch.nn.Linear(5, 10)
)    
lr = 0.0001
optimizer = torch.optim.Adam(linear_classifier.parameters(), lr = lr)
print('hinge loss, lr: {}'.format(lr))
for epoch in range(10):
    train_classifier(epoch)
    test_classifier()
print()

hinge loss, lr: 0.0001
Test set: Average loss: 1690.0898, Accuracy: 11.6900%
Test set: Average loss: 1494.5939, Accuracy: 20.6200%
Test set: Average loss: 1344.7733, Accuracy: 28.6800%
Test set: Average loss: 1227.3981, Accuracy: 36.6400%
Test set: Average loss: 1133.5254, Accuracy: 25.3300%
Test set: Average loss: 1063.1982, Accuracy: 50.1100%
Test set: Average loss: 1049.9664, Accuracy: 50.6900%
Test set: Average loss: 1046.5115, Accuracy: 53.3700%
Test set: Average loss: 1035.0486, Accuracy: 52.8600%
Test set: Average loss: 1034.7437, Accuracy: 54.8100%

