In [1]:
import os
from pathlib import Path
from itertools import islice
import gzip
import pickle

import numpy as np
import matplotlib.pyplot as plt

from bokeh.io import push_notebook, show, output_notebook, output_file
from bokeh.plotting import figure, ColumnDataSource, save
from bokeh.transform import linear_cmap
from bokeh.palettes import Plasma11, Viridis11
from bokeh.util.hex import hexbin
from bokeh.layouts import gridplot
output_notebook()

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
import torchvision
import torchvision.transforms as transforms

from parse import *

In [2]:
dir_path = Path().absolute()
dataset_path = dir_path.parent / "data/mnist.pkl.gz"
if not dataset_path.exists():
    print('Downloading dataset with curl ...')
    if not dataset_path.parent.exists():
        os.mkdir(dataset_path.parent)
    url = 'http://ericjmichaud.com/downloads/mnist.pkl.gz'
    os.system('curl -L {} -o {}'.format(url, dataset_path))
print('Download failed') if not dataset_path.exists() else print('Dataset acquired')
f = gzip.open(dataset_path, 'rb')
mnist = pickle.load(f)
f.close()
print('Loaded data to variable `mnist`')

Dataset acquired
Loaded data to variable `mnist`


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# dtype = torch.cuda.float if torch.cuda.is_available() else torch.float
dtype = torch.float32
torch.set_default_dtype(dtype)

In [4]:
device

device(type='cuda')

In [5]:
class MNISTDataset(Dataset):
    """MNIST Digits Dataset."""
    def __init__(self, data, transform=None):
        self.mnist = data
        self.transform = transform
        
    def __len__(self):
        return len(self.mnist)

    def __getitem__(self, idx):
        sample = self.mnist[idx]
        if self.transform:
            sample = self.transform(sample)
        return sample

class ToTensor(object):
    """Convert samples (np.ndarray, np.ndarray) to (torch.tensor, torch.tensor)"""
    def __call__(self, sample):
        image, one_hot_label = sample
        image = torch.from_numpy(image).to(dtype)
        label = torch.tensor(np.argmax(one_hot_label)).to(torch.long)
        return (image, label)


In [6]:
class SoftmaxRegression(nn.Module):
    """Single-layer softmax network."""
    def __init__(self, n_in, n_out):
        super(SoftmaxRegression, self).__init__()
        self.linear = nn.Linear(n_in, n_out, bias=False)
    
    def forward(self, x):
        return F.log_softmax(self.linear(x), dim=1)
    
    
class FullyConnected(nn.Module):
    """Single-hidden-layer dense neural network."""
    def __init__(self, *layers):
        super(FullyConnected, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(len(layers) - 1):
            self.layers.append(nn.Linear(layers[i], layers[i+1], bias=False))
        
    def forward(self, x):
        for i in range(len(self.layers) - 1):
            x = torch.tanh(self.layers[i](x))
        return F.log_softmax(self.layers[i+1](x), dim=1)

## No Hidden Layer Softmax Network

In [16]:
training = MNISTDataset(mnist[:60000], transform=ToTensor())
test = MNISTDataset(mnist[60000:], transform=ToTensor())
training_loader = torch.utils.data.DataLoader(training, batch_size=20, shuffle=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=20, shuffle=True)

In [17]:
model = SoftmaxRegression(28*28, 10).to(device)
# model = FullyConnected(28*28, 500, 500, 10).to(device)
loss_fn = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters())

In [18]:
# --- DEFINE OUTPUT ---

source = ColumnDataSource(data={
    'num_batches': [],
    'training_loss': [],
    'test_loss': [],
    'EI': []
})

loss_plot = figure(plot_width=800, plot_height=150, 
                       x_axis_label='batches', title='Loss')
EI_plot = figure(plot_width=800, plot_height=150,
                x_axis_label='batches', y_axis_label='bits', title='Effective Information')
# weight_image = figure(plot_width=5*150, plot_height=2*150, title='Output Weights')
# weight_image.xgrid.visible = False
# weight_image.ygrid.visible = False

loss_plot.line('num_batches', 'test_loss', source=source, line_width=2, color='red')
loss_plot.line('num_batches', 'training_loss', source=source, line_width=2, color='green')
EI_plot.line('num_batches', 'EI', source=source, line_width=2)
# weight_image.image([np.zeros((784, 10)).reshape(112, -1)], 0, 0, 1, 1, palette=Plasma11)

grid = gridplot([[loss_plot], [EI_plot]])
display = show(grid, notebook_handle=True)

def update_metrics():
    with torch.no_grad():
        input = next(iter(test_loader))[0].to(device)
        ei = EI(model, input, device=device)
    
    outof = 0
    loss = 0
    with torch.no_grad():
        for x, labels in islice(test_loader, 0, 500): # 500 batches of 20 samples
            output = model(x.to(device))
            loss += loss_fn(output, labels.to(device)).item()
            _, pred = torch.max(output, 1)
            outof += len(labels)
    test_loss = loss / outof
    
    outof = 0
    loss = 0
    with torch.no_grad():
        for x, labels in islice(training_loader, 0, 500): # 500 batches of 20 samples
            output = model(x.to(device))
            loss += loss_fn(output, labels.to(device)).item()
            _, pred = torch.max(output, 1)
            outof += len(labels)
    training_loss = loss / outof
    
    source.stream({
        'num_batches': [num_batches],
        'training_loss': [training_loss],
        'test_loss': [test_loss],
        'EI': [ei.item()]
    })
#     weight = model.linear.weight.cpu().detach().numpy()
#     for i in range(len(weight)):
#         x = i % 5
#         y = i // 5
#         weight_image.image([weight[i].reshape(28, 28)], x*29, -y*29, 28, 28, palette=Viridis11)
    push_notebook(handle=display)


# --- TRAIN ---

num_batches = 0
ac = 0
ei = None
for epoch in range(75):
    for sample, target in training_loader:
        optimizer.zero_grad()
        loss = loss_fn(model(sample.to(device)), target.to(device))
        loss.backward()
        optimizer.step()
        num_batches += 1
        if num_batches % 100 == 0:
            update_metrics()

In [19]:
output_file('plots/plot0/save.html')
save(grid)

'/home/eric/Code/ai-experiments/effective-information/plots/plot0/save.html'

## Single Hidden Layer tanh with softmax output network

In [28]:
output_notebook()

In [29]:
training = MNISTDataset(mnist[:60000], transform=ToTensor())
test = MNISTDataset(mnist[60000:], transform=ToTensor())
training_loader = torch.utils.data.DataLoader(training, batch_size=20, shuffle=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=20, shuffle=True)

In [30]:
model = FullyConnected(28*28, 100, 10).to(device)
loss_fn = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters())

In [31]:
# --- DEFINE OUTPUT ---

source = ColumnDataSource(data={
    'num_batches': [],
    'training_loss': [],
    'test_loss': [],
    'EI': []
})

loss_plot = figure(plot_width=800, plot_height=150, 
                       x_axis_label='batches', title='Loss')
EI_plot = figure(plot_width=800, plot_height=150,
                x_axis_label='batches', y_axis_label='bits', title='Effective Information')
# weight_image = figure(plot_width=5*150, plot_height=2*150, title='Output Weights')
# weight_image.xgrid.visible = False
# weight_image.ygrid.visible = False

loss_plot.line('num_batches', 'test_loss', source=source, line_width=2, color='red')
loss_plot.line('num_batches', 'training_loss', source=source, line_width=2, color='green')
EI_plot.line('num_batches', 'EI', source=source, line_width=2)
# weight_image.image([np.zeros((784, 10)).reshape(112, -1)], 0, 0, 1, 1, palette=Plasma11)

grid = gridplot([[loss_plot], [EI_plot]])
display = show(grid, notebook_handle=True)

def update_metrics():
    with torch.no_grad():
        input = next(iter(test_loader))[0].to(device)
        ei = EI(model, input, device=device)
    
    outof = 0
    loss = 0
    with torch.no_grad():
        for x, labels in islice(test_loader, 0, 500): # 500 batches of 20 samples
            output = model(x.to(device))
            loss += loss_fn(output, labels.to(device)).item()
            _, pred = torch.max(output, 1)
            outof += len(labels)
    test_loss = loss / outof
    
    outof = 0
    loss = 0
    with torch.no_grad():
        for x, labels in islice(training_loader, 0, 500): # 500 batches of 20 samples
            output = model(x.to(device))
            loss += loss_fn(output, labels.to(device)).item()
            _, pred = torch.max(output, 1)
            outof += len(labels)
    training_loss = loss / outof
    
    source.stream({
        'num_batches': [num_batches],
        'training_loss': [training_loss],
        'test_loss': [test_loss],
        'EI': [ei.item()]
    })
#     weight = model.linear.weight.cpu().detach().numpy()
#     for i in range(len(weight)):
#         x = i % 5
#         y = i // 5
#         weight_image.image([weight[i].reshape(28, 28)], x*29, -y*29, 28, 28, palette=Viridis11)
    push_notebook(handle=display)


# --- TRAIN ---

num_batches = 0
ac = 0
ei = None
for epoch in range(100):
    for sample, target in training_loader:
        optimizer.zero_grad()
        loss = loss_fn(model(sample.to(device)), target.to(device))
        loss.backward()
        optimizer.step()
        num_batches += 1
        if num_batches % 100 == 0:
            update_metrics()

KeyboardInterrupt: 

In [None]:
output_file('plots/plot1/save.html')
save(grid)

## Convolutional Net

In [None]:
output_notebook()

In [None]:
class MNISTConv(nn.Module):
    """Single-hidden-layer convolutional neural network."""
    def __init__(self, features):
        super(MNISTConv, self).__init__()
        self.features = features
        self.conv_pool = nn.Sequential(
            nn.Conv2d(1, features, kernel_size=5, stride=1, padding=0),
            nn.ReLU())
        self.fc1 = nn.Linear(self.features*24*24, 200)
        self.fc2 = nn.Linear(200, 10)
        
    def forward(self, x):
        x = self.conv_pool(x)
        x = x.reshape((-1, self.features*24*24))
        x = torch.tanh(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [None]:
class MNISTDataset(Dataset):
    """MNIST Digits Dataset."""
    def __init__(self, data, transform=None):
        self.mnist = data
        self.transform = transform
        
    def __len__(self):
        return len(self.mnist)

    def __getitem__(self, idx):
        sample = self.mnist[idx]
        if self.transform:
            sample = self.transform(sample)
        return sample

class ToTensor(object):
    """Convert samples (np.ndarray, np.ndarray) to (torch.tensor, torch.tensor)"""
    def __call__(self, sample):
        image, one_hot_label = sample
        image = torch.from_numpy(image).to(dtype)
        label = torch.tensor(np.argmax(one_hot_label)).to(torch.long)
        return (image, label)
    
class Reshape(object):
    """Convert array of shape (784,) -> (1, 28, 28)."""
    def __call__(self, sample):
        image, label = sample
        image = image.reshape((1, 28, 28))
        return (image, label)

In [None]:
training = MNISTDataset(mnist[:60000], transform=transforms.Compose([Reshape(), ToTensor()]))
test = MNISTDataset(mnist[60000:], transform=transforms.Compose([Reshape(), ToTensor()]))
training_loader = torch.utils.data.DataLoader(training, batch_size=20, shuffle=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=20, shuffle=True)

In [None]:
NUM_FEATURES = 10
model = MNISTConv(NUM_FEATURES).to(device)
loss_fn = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters())

In [None]:
# --- DEFINE OUTPUT ---

source = ColumnDataSource(data={
    'num_batches': [],
    'accuracy': [],
    'test_loss': [],
    'EI': []
})

accuracy_plot = figure(plot_width=800, plot_height=150, 
                       x_axis_label='batches', title='Accuracy')
test_loss_plot = figure(plot_width=800, plot_height=150, 
                       x_axis_label='batches', title='Loss on Test Data')
EI_plot = figure(plot_width=800, plot_height=150,
                x_axis_label='batches', y_axis_label='bits', title='Effective Information')
# weight_image = figure(plot_width=5*150, plot_height=2*150, title='Output Weights')
# weight_image.xgrid.visible = False
# weight_image.ygrid.visible = False

accuracy_plot.line('num_batches', 'accuracy', source=source, line_width=2, color='green')
test_loss_plot.line('num_batches', 'test_loss', source=source, line_width=2, color='red')
EI_plot.line('num_batches', 'EI', source=source, line_width=2)
# weight_image.image([np.zeros((784, 10)).reshape(112, -1)], 0, 0, 1, 1, palette=Plasma11)

grid = gridplot([[accuracy_plot], [test_loss_plot], [EI_plot]])
display = show(grid, notebook_handle=True)

def update_metrics():
    correct = 0
    outof = 0
    loss = 0
    with torch.no_grad():
        input = next(iter(test_loader))[0]
        ei = EI(model, input, device=device)
        for x, labels in islice(test_loader, 0, 100): # 100 batches of 20 samples
            output = model(x.to(device))
            loss += loss_fn(output, labels.to(device)).item()
            _, pred = torch.max(output, 1)
            correct += (pred == labels.to(device)).sum().item()
            outof += len(labels)
    loss = loss / outof
    ac = correct / outof
    source.stream({
        'num_batches': [num_batches],
        'accuracy': [ac],
        'test_loss': [loss],
        'EI': [ei.item()]
    })
#     weight = model.linear.weight.cpu().detach().numpy()
#     for i in range(len(weight)):
#         x = i % 5
#         y = i // 5
#         weight_image.image([weight[i].reshape(28, 28)], x*29, -y*29, 28, 28, palette=Viridis11)
    push_notebook(handle=display)


# --- TRAIN ---

num_batches = 0
ac = 0
ei = None
for epoch in range(10):
    for sample, target in training_loader:
        optimizer.zero_grad()
        loss = loss_fn(model(sample.to(device)), target.to(device))
        loss.backward()
        optimizer.step()
        num_batches += 1
        if num_batches % 100 == 0:
            update_metrics()