In [135]:
import argparse, json, pickle
from pprint import pprint
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.lines as mlines

# import tensorflow as tf
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from tensorboardX import SummaryWriter

In [136]:
# Training settings
batch_size = 1024
test_batch_size = 3000

parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                    help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=20, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                    help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                    help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                    help='how many batches to wait before logging training status')
args = parser.parse_args('--batch-size {} --test-batch-size {}'.format(batch_size, test_batch_size).split()) # default: taken from sys.argv
args.cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)


kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

MNIST_data_path = '../HW1-1/git_ignored/MNIST/MNIST_data'

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST(MNIST_data_path, train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,)) # (mean, std)
                   ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST(MNIST_data_path, train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,)) 
                   ])),
    batch_size=args.test_batch_size, shuffle=True, **kwargs)

train_eval_loader = torch.utils.data.DataLoader(
    datasets.MNIST(MNIST_data_path, train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,)) 
                   ])),
    batch_size=args.test_batch_size, shuffle=True, **kwargs)

In [137]:
tensorboard_category = 'batch_{}'.format(batch_size)

class CNN_2FC(nn.Module):
    def __init__(self):
        super(CNN_2FC, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.max_pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d() # p=0.5 by default
        self.max_pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(self.max_pool1(self.conv1(x))) # (input, kernel_size)
        x = F.relu(self.max_pool2(self.conv2_drop(self.conv2(x)),))
        x = x.view(-1, 320) # flatten
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

model = CNN_2FC()
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
num_params = sum([np.prod(p.size()) for p in model_parameters])
print('model params:', num_params)
if args.cuda:
    model.cuda()

optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

def train(epoch):
    model.train() # set to training mode
    for batch_idx, (data, target) in enumerate(train_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, requires_grad=True), Variable(target) # storing gradients for input data
        
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        # get gradients of loss wrt. input here
        data.grad #64x1x28x28
        # IMPORTANT: after calculating gradient of input data, we do not update on it
        data.requires_grad = False
        optimizer.step()
        
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

def trained_epoch(epoch):
    model.eval()# set to evaluation mode
    trained_loss = 0
    correct = 0
    for data, target in train_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target) # no intermediate states will be saved
        output = model(data)
        trained_loss += F.nll_loss(output, target, size_average=False).data[0] # want to average loss over entire test set, not just minibatch
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()
    
    trained_loss /= len(train_loader.dataset)
    trained_accuracy = correct / len(train_loader.dataset)
    writer.add_scalar('{}/train_loss'.format(tensorboard_category), trained_loss, epoch) # try add_scalars() later
    writer.add_scalar('{}/train_accuracy'.format(tensorboard_category), trained_accuracy, epoch)
    print('\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        trained_loss, correct, len(train_loader.dataset),
        100. * trained_accuracy))
            
def test(epoch):
    model.eval()# set to evaluation mode
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target) # no intermediate states will be saved
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data[0] # want to average loss over entire test set, not just minibatch
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()

    test_loss /= len(test_loader.dataset)
    test_accuracy = correct / len(test_loader.dataset)
    writer.add_scalar('{}/test_loss'.format(tensorboard_category), test_loss, epoch) # try add_scalars() later
    writer.add_scalar('{}/test_accuracy'.format(tensorboard_category), test_accuracy, epoch)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * test_accuracy))
    
    writer.export_scalars_to_json("./batch_{}.json".format(batch_size))
    
def model_eval_and_save(save_path):
    model.eval()# eval only affects stuff like dropout and batch norm, doesn't remove the ability to backprop
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
    sensitivity_test = 0
    sensitivity_train = 0
    
    for idx, (data, target) in enumerate(test_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, requires_grad=True), Variable(target) # no intermediate states will be saved
        
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target) # not caring about loss value here, just need it to compute gradients
        loss.backward()
        grads_on_input = np.array(data.grad.data).reshape(-1,28,28) # (1000, 28, 28)
        batch_of_sensitivities = np.sqrt(np.sum(grads_on_input**2, axis=(1,2))) # (1000,)
        sensitivity_test += np.sum(batch_of_sensitivities)
    sensitivity_test /= len(datasets.MNIST(MNIST_data_path, train=False))

    for idx, (data, target) in enumerate(train_eval_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, requires_grad=True), Variable(target) # no intermediate states will be saved
        
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target) # not caring about loss value here, just need it to compute gradients
        loss.backward()
        grads_on_input = np.array(data.grad.data).reshape(-1,28,28) # (1000, 28, 28)
        batch_of_sensitivities = np.sqrt(np.sum(grads_on_input**2, axis=(1,2))) # (1000,)
        sensitivity_train += np.sum(batch_of_sensitivities)
    sensitivity_train /= len(datasets.MNIST(MNIST_data_path, train=True))
    
    torch.save(model.state_dict(), save_path)
    
    return sensitivity_train, sensitivity_test

model params: 21840


In [138]:
writer = SummaryWriter()
for epoch in range(1, args.epochs + 1):
    train(epoch)
    trained_epoch(epoch)
    test(epoch)
writer.close()

save_path = './model_batch_{}.pth'.format(batch_size)
sensitivity_train, sensitivity_test = model_eval_and_save(save_path)
json.dump({'train': sensitivity_train, 'test': sensitivity_test},
          open("sensitivity_batch_{}.json".format(batch_size), 'w'))


Train set: Average loss: 2.1836, Accuracy: 21552/60000 (36%)

Test set: Average loss: 2.1807, Accuracy: 3676/10000 (37%)


Train set: Average loss: 1.5838, Accuracy: 36854/60000 (61%)

Test set: Average loss: 1.5672, Accuracy: 6274/10000 (63%)


Train set: Average loss: 0.9060, Accuracy: 47626/60000 (79%)

Test set: Average loss: 0.8836, Accuracy: 8042/10000 (80%)


Train set: Average loss: 0.5928, Accuracy: 51103/60000 (85%)

Test set: Average loss: 0.5733, Accuracy: 8609/10000 (86%)


Train set: Average loss: 0.4719, Accuracy: 52385/60000 (87%)

Test set: Average loss: 0.4558, Accuracy: 8775/10000 (88%)


Train set: Average loss: 0.4020, Accuracy: 53338/60000 (89%)

Test set: Average loss: 0.3879, Accuracy: 8918/10000 (89%)


Train set: Average loss: 0.3564, Accuracy: 53943/60000 (90%)

Test set: Average loss: 0.3424, Accuracy: 9052/10000 (91%)


Train set: Average loss: 0.3265, Accuracy: 54416/60000 (91%)

Test set: Average loss: 0.3153, Accuracy: 9112/10000 (91%)


Train set: Aver

#### load params into model

In [None]:
the_model = TheModelClass(*args, **kwargs)
the_model.load_state_dict(torch.load(PATH))

#### use tensorboardX to draw model structure

In [None]:
# need to comment out model's cuda() before writing graph
with SummaryWriter(comment='MNIST_2FC') as w:
    dummy_input = Variable(torch.rand(1000, 1, 28, 28))
    w.add_graph(model, (dummy_input, ))

## Plot graphs

In [None]:
model_logs = {}
data = json.load(open('CNN_2FC.json'.format(model_id)))
model_logs[model_id] = {'loss': np.array(data['train_loss'])[:,2],
                       'accuracy': np.array(data['train_accuracy'])[:,2]}

In [None]:
mode = 'loss'
x = [i+1 for i in range(len(model_logs[2][mode]))]
acc_2FC, = plt.plot(x, model_logs[2][mode], label='2FC')
acc_3FC, = plt.plot(x, model_logs[3][mode], label='3FC')
acc_5FC, = plt.plot(x, model_logs[5][mode], label='5FC')
plt.legend(handles=[acc_2FC, acc_3FC, acc_5FC])
plt.xlabel('epoch')
plt.ylabel(mode)
plt.title('MNIST')
plt.savefig('{}.png'.format(mode))
plt.show()