In [None]:
from CNN.cifar_model import MobileNet
import torch
import torch.nn as nn
import time
from torchvision import datasets, transforms
import quartz
from quartz import layers
import copy
import numpy as np
np.set_printoptions(suppress=True)

This notebook tries to find the most resourceful layout for our CIFAR10 MobileNet architecture on a Nahuku32 board in order to reduce power consumption. The number of cores that are needed for each layer is searched using a binary tree. This notebook does not have to be run on the shared VM, as no models are executed. By installing nxsdk on your machine, it is possible to benefit from a faster CPU to compile the model

In [None]:
model = MobileNet(10)
model.load_state_dict(torch.load("cifar-convnet.pth", map_location=torch.device('cpu')))
capture = model.eval()

In [None]:
previous_module = None
new_layers = []
for module in model.modules():
    if isinstance(module, (nn.Conv2d, nn.MaxPool2d, nn.BatchNorm2d, nn.Linear, nn.ReLU6, nn.ReLU, nn.Flatten)):
        if isinstance(module, nn.BatchNorm2d) and isinstance(previous_module, nn.Conv2d):
            new_layers[-1] = torch.nn.utils.fuse_conv_bn_eval(previous_module, module)
        else:
            new_layers.append(module)
        previous_module = module

folded_model = nn.Sequential(*new_layers)
layer_list = list(folded_model.modules())[1:]

In [None]:
t_max = 2**6
input_dims = (3,32,32)
pool_kernel_size = [2,2]

loihi_layers = []
for l, layer in enumerate(layer_list):
    rectification = l < len(layer_list)-1 and isinstance(layer_list[l+1], (nn.ReLU6, nn.ReLU))
    if isinstance(layer, nn.Conv2d):
        loihi_layers.append(layers.Conv2D(weights=layer.weight.detach().numpy(), biases=layer.bias.detach().numpy(), stride=layer.stride, padding=layer.padding, groups=layer.groups, rectifying=rectification))
    elif isinstance(layer, nn.Linear):
        loihi_layers.append(layers.Dense(weights=layer.weight.detach().numpy(), biases=layer.bias.detach().numpy(), rectifying=rectification))
    elif isinstance(layer, nn.MaxPool2d):
        loihi_layers.append(layers.MaxPool2D(kernel_size=layer.kernel_size, stride=layer.stride))

all_layers = [layers.InputLayer(dims=input_dims)] + copy.deepcopy(loihi_layers)

In [None]:
loihi_model = quartz.Network(t_max, all_layers, verbose=True)

In [None]:
loihi_layers

## see if the thing compiles without errors, otherwise reduce #neurons on that layer

In [None]:
def compile_board(cap, bottom, candidate):
    if bottom + 1 >= cap: return cap, bottom, False
    try:
        board = layer_model(np.random.rand(*loihi_model.layers[selected_layer-1].output_dims), n_cores_per_layer=[0,candidate], partition='nahuku32', profiling=True, return_board=True)
        cap = candidate
#         print("SUCCESS: Compilation succeded at {} cores for selected layer {}, cap: {}, bottom: {}, candidate: {}".format(candidate, selected_layer, cap, bottom, candidate))
        return cap, bottom, int(cap - (cap - bottom) / 2)
    except:
        bottom = candidate
#         print("ERROR: Compilation failed at {} cores for selected layer {}, cap: {}, bottom: {}, candidate: {}".format(candidate, selected_layer, cap, bottom, candidate))
        return cap, bottom, int(bottom + (cap - bottom) / 2)

In [None]:
for selected_layer in range(1,10):
    selected_layers = [layers.InputLayer(dims=loihi_model.layers[selected_layer-1].output_dims)] + [copy.deepcopy(loihi_layers[selected_layer-1])]
    layer_model = quartz.Network(t_max, selected_layers, verbose=False)
    cap = 300 # every layer will fit across 300 cores
    bottom = 1 # every layer needs at least 1 core
    candidate = 150 # our first test candidate
    while True:
        cap, bottom, candidate = compile_board(cap, bottom, candidate)
        if candidate == False: 
            print("Found {} cores for selected layer {}.".format(cap, selected_layer))
            break
print("done")