# Binary Connect Implementation

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

from torchvision import datasets, transforms
import numpy as np

batch_size = 128
n_epochs = 1000
validation_steps = 10
learning_rate = 1e-3


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [2]:
train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=True, download=True,
                        transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.5,), (0.5,))
                       ])), batch_size=batch_size, shuffle=True)

valid_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=False, 
                        transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.5,), (0.5,))
                       ])), batch_size=batch_size, shuffle=True)

In [3]:
class DNNModel(nn.Module):
    def __init__(self, image_size, output_size=10, hidden_size=1024):
        super(DNNModel, self).__init__()
        
        self.image_size = image_size
        
        self.fc1 = nn.Sequential(
                   nn.Linear(image_size * image_size, hidden_size),
                   nn.LeakyReLU(0.2),
                   nn.BatchNorm1d(hidden_size))
        self.fc2 = nn.Sequential(
                   nn.Linear(hidden_size, hidden_size),
                   nn.LeakyReLU(0.2),
                   nn.BatchNorm1d(hidden_size))
        self.fc3 = nn.Sequential(
                   nn.Linear(hidden_size, hidden_size),
                   nn.LeakyReLU(0.2),
                   nn.BatchNorm1d(hidden_size))
        self.output_layer = nn.Sequential(
                    nn.Linear(hidden_size, output_size),
                    nn.LeakyReLU(0.2),
                    nn.BatchNorm1d(output_size))
        
    def forward(self, x):
        x = x.view(-1, self.image_size * self.image_size)
        
        for layer in [self.fc1, self.fc2, self.fc3, self.output_layer]:
            x = layer(x)
        return x
    
class L2SVMLoss(nn.Module):
    def __init__(self):
        super(L2SVMLoss, self).__init__()
    
    def forward(self, output, target):
        y = one_hot_encoding(target)
        ot = output * y
        loss = torch.sum(torch.sqrt(F.relu(1 - ot)))
        return loss
    
def one_hot_encoding(labels):
    y = torch.eye(10) * 2 - 1
    return y[labels].to(device)

In [4]:
model = DNNModel(image_size=28).to(device)
loss_function = L2SVMLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [5]:
print("Training...")

for epoch in range(n_epochs):
    print("========[EPOCH {}/{}]========".format(epoch, n_epochs))
    
    # Training
    train_acc = 0
    train_loss = 0
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = loss_function(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.data.cpu().numpy()
        train_acc += np.mean(torch.argmax(outputs, 1).data.cpu().numpy() == labels.data.cpu().numpy())

    train_acc = train_acc / len(train_loader)
    print("[TRAIN ACCURACY]: {:.4f}".format(train_acc))
    print("[TRAIN LOSS]: {:.4f}".format(train_loss))

    if epoch % validation_steps == 0:
        # Validation
        valid_acc = 0
        for i, (images, labels) in enumerate(valid_loader):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)

            valid_acc += np.mean(torch.argmax(outputs, 1).data.cpu().numpy() == labels.data.cpu().numpy())

        valid_acc = valid_acc / len(valid_loader)
        print("[VALIDATION ACCURACY]: {:.4f}".format(valid_acc))    

Training...
[TRAIN ACCURACY]: 0.8579
[TRAIN LOSS]: 43958.0797
[VALIDATION ACCURACY]: 0.7947
[TRAIN ACCURACY]: 0.9021
[TRAIN LOSS]: 29371.6306
[TRAIN ACCURACY]: 0.9386
[TRAIN LOSS]: 19208.1137
[TRAIN ACCURACY]: 0.9542
[TRAIN LOSS]: 13316.7394
[TRAIN ACCURACY]: 0.9593
[TRAIN LOSS]: 11785.0189
[TRAIN ACCURACY]: 0.9530
[TRAIN LOSS]: 13659.6593
[TRAIN ACCURACY]: 0.9264
[TRAIN LOSS]: 18770.6326
[TRAIN ACCURACY]: 0.9504
[TRAIN LOSS]: 14458.1929
[TRAIN ACCURACY]: 0.9449
[TRAIN LOSS]: 15280.7611
[TRAIN ACCURACY]: 0.9650
[TRAIN LOSS]: 10765.3166
[TRAIN ACCURACY]: 0.9678
[TRAIN LOSS]: 9531.6880
[VALIDATION ACCURACY]: 0.9622
[TRAIN ACCURACY]: 0.9660
[TRAIN LOSS]: 10077.5595
[TRAIN ACCURACY]: 0.9649
[TRAIN LOSS]: 10282.2280
[TRAIN ACCURACY]: 0.9702
[TRAIN LOSS]: 8755.1339
[TRAIN ACCURACY]: 0.9732
[TRAIN LOSS]: 8043.5502
[TRAIN ACCURACY]: 0.9746
[TRAIN LOSS]: 7539.7309
[TRAIN ACCURACY]: 0.9775
[TRAIN LOSS]: 6623.7778
[TRAIN ACCURACY]: 0.9687
[TRAIN LOSS]: 9046.8788
[TRAIN ACCURACY]: 0.9762
[TRAIN LO

[TRAIN ACCURACY]: 0.9914
[TRAIN LOSS]: 2606.5003
[TRAIN ACCURACY]: 0.9899
[TRAIN LOSS]: 2877.0619
[TRAIN ACCURACY]: 0.9933
[TRAIN LOSS]: 1995.7148
[TRAIN ACCURACY]: 0.9924
[TRAIN LOSS]: 2322.4502
[VALIDATION ACCURACY]: 0.9803
[TRAIN ACCURACY]: 0.9935
[TRAIN LOSS]: 2105.4227
[TRAIN ACCURACY]: 0.9950
[TRAIN LOSS]: 1564.8818
[TRAIN ACCURACY]: 0.9956
[TRAIN LOSS]: 1448.1252
[TRAIN ACCURACY]: 0.9951
[TRAIN LOSS]: 1621.5534
[TRAIN ACCURACY]: 0.9945
[TRAIN LOSS]: 1635.3511
[TRAIN ACCURACY]: 0.9960
[TRAIN LOSS]: 1315.6184
[TRAIN ACCURACY]: 0.9962
[TRAIN LOSS]: 1272.8568
[TRAIN ACCURACY]: 0.9963
[TRAIN LOSS]: 1127.1763
[TRAIN ACCURACY]: 0.9963
[TRAIN LOSS]: 1192.0999
[TRAIN ACCURACY]: 0.9934
[TRAIN LOSS]: 2102.1027
[VALIDATION ACCURACY]: 0.9803
[TRAIN ACCURACY]: 0.9954
[TRAIN LOSS]: 1479.7899
[TRAIN ACCURACY]: 0.9952
[TRAIN LOSS]: 1550.3961
[TRAIN ACCURACY]: 0.9944
[TRAIN LOSS]: 1797.6927
[TRAIN ACCURACY]: 0.9944
[TRAIN LOSS]: 1677.8987
[TRAIN ACCURACY]: 0.9966
[TRAIN LOSS]: 1181.1789
[TRAIN AC

KeyboardInterrupt: 

In [None]:
        # Train the generator
        for _ in range(g_iters):
            optimizer_g.zero_grad()

            z = Variable(torch.cuda.FloatTensor(np.random.randn(images.shape[0], 64), device=device))

            generated_images = generator(z)
            gen_logits = discriminator(generated_images)
            g_loss = adversarial_loss(gen_logits, real)

            g_loss.backward()
            optimizer_g.step()