# Training the Spconv
This tutorial aims at twofold: First, we introduce how to construct and train network with spconv and how to glue the SNN neurons and spconv layers together. Second, we introduce how replace the snn neurons using surrogate gradient with Local ZO neurons.
## 1 Introduction for Components
### 1.1 Spconv
Spconv are a pytorch library for sparse convolution, we use for conv layers, batch norm layers and pooling layers.
GitHub homepage: https://github.com/traveller59/spconv
### 1.2 LocalZO.conv_models
Implementations of Spike Neurons (So far, only LIF), that can glue between Spconv layers

## 2 Training the Spconv
### 2.1 Data loading
first, let's load the data using tonic. Details can be found in train_with_torch_nuro.ipynb, so we just skip it here.

In [1]:
# set the gpu device
gpu_idx = '3'
import os
from tonic.datasets import NMNIST
import tonic
from tonic import transforms
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_idx

sensor_size = tonic.datasets.NMNIST.sensor_size
# remove isolated events
# sum a period of events into a frame
frame_transform = transforms.Compose([
    transforms.Denoise(filter_time=10000),
    transforms.ToFrame(sensor_size=sensor_size, time_window=1000),
])  # the output of ToFrame is a tuple of (frame, label), where frame is np.ndarray

trainset = tonic.datasets.NMNIST(save_to='/home/zxh/data', train=True, transform=frame_transform)
testset = tonic.datasets.NMNIST(save_to='/home/zxh/data', train=False, transform=frame_transform)
print('trainset size:', len(trainset), 'testset size:', len(testset), 'type of dataset', type(trainset[0][0]), 'shape', trainset[0][0].shape)

trainset size: 60000 testset size: 10000 type of dataset <class 'numpy.ndarray'> shape (298, 2, 34, 34)


### 2.2 Define the network
The construction of a Spconv network is basically same with constructing any torch.nn networks, except for a few things to notice:
* The computation is time-first, and at the end of LIF layer we reshape the input into [time*batch, *inputs] before feeding into spconv network to improve the performance. So, we need to reshape it back before output.
* The input of a Spconv network is a sparse tensor, constructed by spconv.SparseConvTensor.from_dense()
* The LeakyPlain (implemented in LocalZO.conv_models.neurons) is a spike neuron that can glue between Spconv layers. It needs to specify the batch_size, u_th and beta. Batch size is used to reshape input and output, u_th and beta are parameters of LIF neuron.
* The output should be a tensor with shape (time, batch, *inputs).

**Important:** The spconv library treat empty input (i.e. input with only zeros) as an error
Usually this will not happen, if so, we will get *CUDA kernel launch blocks must be positive, but got N= 0*. and please set appropriate u_th and beta to make at least one spike fire during the time stamps. Details can be found in : https://github.com/traveller59/spconv/blob/master/docs/COMMON_PROBLEMS.md

In [2]:
import torch
from torch import nn
from spconv import pytorch as spconv  # pay attention to the spconv.pytorch
from conv_models.neurons import LeakyPlain


class ExampleNet(nn.Module):
    def __init__(self, batch_size, u_th, beta, conv_algorithm = spconv.ConvAlgo.Native):
        super(ExampleNet, self).__init__()
        self.batch_size= batch_size
        self.conv_block1 = nn.Sequential(
            spconv.SparseConv2d(2, 16, 5, bias=True, algo=conv_algorithm),
            spconv.SparseBatchNorm(16, eps=1e-5, momentum=0.1),
            spconv.SparseMaxPool2d(2, stride=2),
            LeakyPlain(u_th=u_th, beta=beta, batch_size=batch_size),
        )
        self.conv_block2 = nn.Sequential(
            spconv.SparseConv2d(16, 32, 5, bias=True, algo=conv_algorithm),
            spconv.SparseBatchNorm(32, eps=1e-5, momentum=0.1),
            spconv.SparseMaxPool2d(2, stride=2),
            LeakyPlain(u_th=u_th, beta=beta, batch_size=batch_size),
        )
        self.to_dense = spconv.ToDense() # convert the sparse tensor to dense tensor
        self.flatten = nn.Flatten(start_dim=1)
        self.fc = nn.Sequential(
            nn.Linear(32*5*5, 10),
            LeakyPlain(u_th=u_th, beta=beta, batch_size=batch_size),
        )

    def forward(self, x):
        assert isinstance(x, spconv.SparseConvTensor)
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.to_dense(x)
        x = self.flatten(x)
        x = self.fc(x)
        x = x.view(-1, self.batch_size, 10)  # reshape back into time, batch, *inputs
        return x

batch_size = 128
net = ExampleNet(batch_size=batch_size, u_th=1.0, beta=0.5).cuda()
print(net)
for name, param in net.named_parameters():
    print(name, param.shape)

ExampleNet(
  (conv_block1): Sequential(
    (0): SparseConv2d(2, 16, kernel_size=[5, 5], stride=[1, 1], padding=[0, 0], dilation=[1, 1], output_padding=[0, 0], algo=ConvAlgo.Native)
    (1): SparseBatchNorm(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): SparseMaxPool2d(kernel_size=[2, 2], stride=[2, 2], padding=[0, 0], dilation=[1, 1], algo=ConvAlgo.MaskImplicitGemm)
    (3): LeakyPlain()
  )
  (conv_block2): Sequential(
    (0): SparseConv2d(16, 32, kernel_size=[5, 5], stride=[1, 1], padding=[0, 0], dilation=[1, 1], output_padding=[0, 0], algo=ConvAlgo.Native)
    (1): SparseBatchNorm(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): SparseMaxPool2d(kernel_size=[2, 2], stride=[2, 2], padding=[0, 0], dilation=[1, 1], algo=ConvAlgo.MaskImplicitGemm)
    (3): LeakyPlain()
  )
  (to_dense): ToDense()
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Sequential(
    (0): Linear(in_features=800, out_features=10, bias=True)
    (1

### 2.3 Define the loss function and optimizer
we use loss function from snntorch, and optimizer from torch.optim

In [3]:
from snntorch import functional as SF
import torch

loss_fn = SF.mse_count_loss(correct_rate=0.8, incorrect_rate=0.2)
acc_fn = SF.accuracy_rate
optimizer = torch.optim.Adam(net.parameters(), lr=2e-3)

### 2.4 Cache dataset to accelerate training

In [4]:
import torchvision
from torch.utils.data import DataLoader

cache_transform = tonic.transforms.Compose([
    torch.from_numpy,
    torchvision.transforms.RandomRotation([-10,10]),
])

epoch_num = 5
cached_trainset = tonic.DiskCachedDataset(trainset, cache_path='./data/cache', transform=cache_transform)
cached_testset = tonic.DiskCachedDataset(testset, cache_path='./data/cache',)

train_loader = DataLoader(cached_trainset, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True, collate_fn=tonic.collation.PadTensors(batch_first=False))
test_loader = DataLoader(cached_testset, batch_size=batch_size, shuffle=False, num_workers=4, drop_last=True,
                         collate_fn=tonic.collation.PadTensors(batch_first=False))

### 2.6 Training

In [5]:
from torch.utils.tensorboard import SummaryWriter
import time

with SummaryWriter(comment='spconv', log_dir='./output/spconv') as writer:
    for epoch in range(epoch_num):
        for i, (inputs, labels) in enumerate(train_loader):
            start = time.time()
            inputs = inputs.view(-1, *inputs.shape[2:]).transpose(1, 3).cuda()
            inputs = spconv.SparseConvTensor.from_dense(inputs)
            labels = labels.cuda()
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            torch.cuda.synchronize()
            end = time.time()

            if i % 10 == 0:
                writer.add_scalar('loss', loss.item(), epoch * len(train_loader) + i)
                writer.add_scalar('acc', acc_fn(outputs, labels).item(), epoch * len(train_loader) + i)
                print('epoch', epoch, 'batch', i, 'loss', loss.item(), 'acc', acc_fn(outputs, labels).item(),
                      'time', end - start)

        acc = 0
        loss_val = 0
        for inputs, labels in test_loader:
            inputs = inputs.cuda()
            labels = labels.cuda()
            outputs = net(inputs)
            loss_val += loss_fn(outputs, labels).item()
            acc += acc_fn(outputs, labels).item()

        writer.add_scalar('test loss', loss_val/len(test_loader), epoch)
        writer.add_scalar('test acc', acc/len(test_loader), epoch)
        print('epoch', epoch, 'test loss', loss_val/len(test_loader), 'test acc', acc/len(test_loader))

2023-05-03 19:32:18.463947: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-03 19:32:20.531580: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:
2023-05-03 19:32:20.531835: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:


epoch 0 batch 0 loss 28.434736251831055 acc 0.1015625 time 14.419233083724976
epoch 0 batch 10 loss 10.476030349731445 acc 0.328125 time 1.4713521003723145
epoch 0 batch 20 loss 8.578996658325195 acc 0.4921875 time 1.731255054473877
epoch 0 batch 30 loss 7.9581074714660645 acc 0.5390625 time 1.2566273212432861
epoch 0 batch 40 loss 7.049453258514404 acc 0.6171875 time 1.0027294158935547
epoch 0 batch 50 loss 6.671371936798096 acc 0.6484375 time 2.2599339485168457
epoch 0 batch 60 loss 6.267968654632568 acc 0.71875 time 0.9950437545776367
epoch 0 batch 70 loss 6.827608108520508 acc 0.6875 time 1.298081636428833
epoch 0 batch 80 loss 5.799502372741699 acc 0.765625 time 1.0090351104736328
epoch 0 batch 90 loss 5.619458198547363 acc 0.796875 time 1.1825788021087646
epoch 0 batch 100 loss 5.831307888031006 acc 0.7578125 time 1.4779713153839111
epoch 0 batch 110 loss 5.420891284942627 acc 0.8515625 time 1.3453187942504883
epoch 0 batch 120 loss 5.045302391052246 acc 0.8359375 time 1.34677839

KeyError: Caught KeyError in DataLoader worker process 2.
Original Traceback (most recent call last):
  File "/home/zxh/.conda/envs/xiaohan/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/zxh/.conda/envs/xiaohan/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/zxh/.conda/envs/xiaohan/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/zxh/.conda/envs/xiaohan/lib/python3.8/site-packages/tonic/cached_dataset.py", line 137, in __getitem__
    data, targets = load_from_disk_cache(file_path)
  File "/home/zxh/.conda/envs/xiaohan/lib/python3.8/site-packages/tonic/cached_dataset.py", line 214, in load_from_disk_cache
    for index in f[name].keys():
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "/home/zxh/.conda/envs/xiaohan/lib/python3.8/site-packages/h5py/_hl/group.py", line 328, in __getitem__
    oid = h5o.open(self.id, self._e(name), lapl=self._lapl)
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "h5py/h5o.pyx", line 190, in h5py.h5o.open
KeyError: "Unable to open object (object 'target' doesn't exist)"


## 3 Training with LocalZO neurons
### 3.1 Implementation Details
The LocalZO neurons use random sampler to generate random tangents to approximate gradient
Since the sampling technique varies, to instantiate a LocalZo neuron, we need to provide a sampler. For customized sampler, we recommend to inherit the base class `BaseSampler` and implement the `generate_random_tangents` method.

In [None]:
from conv_models.neurons import LeakeyZOPlain
from conv_models.samplers import NormalSampler, BaseSampler


class ExampleNetLocalZO(nn.Module):
    def __init__(self, batch_size, u_th, beta, conv_algorithm = spconv.ConvAlgo.Native, random_sampler: BaseSampler=NormalSampler, sample_num=1):
        super(ExampleNetLocalZO, self).__init__()
        self.batch_size= batch_size
        self.conv_block1 = nn.Sequential(
            spconv.SparseConv2d(2, 16, 5, bias=True, algo=conv_algorithm),
            spconv.SparseBatchNorm(16, eps=1e-5, momentum=0.1),
            spconv.SparseMaxPool2d(2, stride=2),
            LeakeyZOPlain(u_th=u_th, beta=beta, batch_size=batch_size, random_sampler=random_sampler(),sample_num=sample_num)
        )
        self.conv_block2 = nn.Sequential(
            spconv.SparseConv2d(16, 32, 5, bias=True, algo=conv_algorithm),
            spconv.SparseBatchNorm(32, eps=1e-5, momentum=0.1),
            spconv.SparseMaxPool2d(2, stride=2),
            LeakeyZOPlain(u_th=u_th, beta=beta, batch_size=batch_size, random_sampler=random_sampler(),sample_num=sample_num)
        )
        self.to_dense = spconv.ToDense() # convert the sparse tensor to dense tensor
        self.flatten = nn.Flatten(start_dim=1)
        self.fc = nn.Sequential(
            nn.Linear(32*5*5, 10),
            LeakyPlain(u_th=u_th, beta=beta, batch_size=batch_size)
        )

    def forward(self, x):
        assert isinstance(x, spconv.SparseConvTensor)
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.to_dense(x)
        x = self.flatten(x)
        x = self.fc(x)
        x = x.view(-1, self.batch_size, 10)  # reshape back into time, batch, *inputs
        return x

net = ExampleNetLocalZO(batch_size=batch_size, u_th=1.0, beta=0.5).cuda()
print(net)
for name, param in net.named_parameters():
    print(name, param.shape)

In [None]:
from torch.utils.tensorboard import SummaryWriter
import time
from snntorch import functional as SF
import torch

loss_fn = SF.mse_count_loss(correct_rate=0.8, incorrect_rate=0.2)
acc_fn = SF.accuracy_rate
optimizer = torch.optim.Adam(net.parameters(), lr=2e-3)

with SummaryWriter(comment='spconv', log_dir='./output/spconv') as writer:
    for epoch in range(epoch_num):
        for i, (inputs, labels) in enumerate(train_loader):
            start = time.time()
            inputs = inputs.view(-1, *inputs.shape[2:]).transpose(1, 3).cuda()
            inputs = spconv.SparseConvTensor.from_dense(inputs)
            labels = labels.cuda()
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            torch.cuda.synchronize()
            end = time.time()

            if i % 10 == 0:
                writer.add_scalar('loss', loss.item(), epoch * len(train_loader) + i)
                writer.add_scalar('acc', acc_fn(outputs, labels).item(), epoch * len(train_loader) + i)
                print('epoch', epoch, 'batch', i, 'loss', loss.item(), 'acc', acc_fn(outputs, labels).item(), 'time', end-start)

        acc = 0
        loss_val = 0
        for inputs, labels in test_loader:
            inputs = inputs.cuda()
            labels = labels.cuda()
            outputs = net(inputs)
            loss_val += loss_fn(outputs, labels).item()
            acc += acc_fn(outputs, labels).item()

        writer.add_scalar('test loss', loss_val/len(test_loader), epoch)
        writer.add_scalar('test acc', acc/len(test_loader), epoch)
        print('epoch', epoch, 'test loss', loss_val/len(test_loader), 'test acc', acc/len(test_loader))