## Simple Dendritic Gated Networks in numpy

This colab implements a Dendritic Gated Network (DGN) solving a regression (using square loss) or a binary classification problem (using Bernoulli log loss). 

See our paper titled "A rapid and efficient learning rule for biological neural circuits" for details of the DGN model.


Some implementation details:
- We utilize `sklearn.datasets.load_breast_cancer` for binary classification and `sklearn.datasets.load_diabetes` for regression.
- This code is meant for educational purposes only. It is not optimized for high-performance, both in terms of computational efficiency and quality of fit. 
- Network is trained on 80% of the dataset and tested on the rest. Test MSE or log loss is reported at the end of each epoch.



In [None]:
# Copyright 2021 DeepMind Technologies Limited. All rights reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [None]:
import numpy as np
from sklearn import datasets
from sklearn import preprocessing
from sklearn import model_selection
from typing import List, Optional

## Choose classification or regression

In [None]:
do_classification = True  # if False, does regression

### Load dataset

In [None]:
if do_classification:
  features, targets = datasets.load_breast_cancer(return_X_y=True)

else:
  features, targets = datasets.load_diabetes(return_X_y=True)


x_train, x_test, y_train, y_test = model_selection.train_test_split(
    features, targets, test_size=0.2, random_state=0)
input_dim = x_train.shape[-1]

feature_encoder = preprocessing.StandardScaler()
x_train = feature_encoder.fit_transform(x_train)
x_test = feature_encoder.transform(x_test)

if not do_classification:
  target_encoder = preprocessing.StandardScaler()
  y_train = np.squeeze(target_encoder.fit_transform(y_train[:, np.newaxis]))
  y_test = np.squeeze(target_encoder.transform(y_test[:, np.newaxis]))


## DGN inference/update

In [None]:
def step_square_loss(inputs: np.ndarray,
                     weights: List[np.ndarray],
                     hyperplanes: List[np.ndarray],
                     hyperplane_bias_magnitude: Optional[float] = 1.,
                     learning_rate: Optional[float] = 1e-5,
                     target: Optional[float] = None,
                     update: bool = False,
                     ):
  """Implements a DGN inference/update using square loss."""
  r_in = inputs
  side_info = np.hstack([hyperplane_bias_magnitude, inputs])

  for w, h in zip(weights, hyperplanes):  # loop over layers
    r_in = np.hstack([1., r_in])  # add biases
    gate_values = np.heaviside(h.dot(side_info), 0).astype(bool)
    effective_weights = gate_values.dot(w).sum(axis=1)
    r_out = effective_weights.dot(r_in)

    if update:
      grad = (r_out[:, None] - target) * r_in[None]
      w -= learning_rate * gate_values[:, :, None] * grad[:, None]

    r_in = r_out
  r_out = r_out[0]
  loss = (target - r_out)**2 / 2
  return r_out, loss

def sigmoid(x):  # numerically stable sigmoid
  return np.exp(-np.logaddexp(0, -x))

def inverse_sigmoid(x):
  return np.log(x/(1-x))

def step_bernoulli(inputs: np.ndarray,
                   weights: List[np.ndarray],
                   hyperplanes: List[np.ndarray],
                   hyperplane_bias_magnitude: Optional[float] = 1.,
                   learning_rate: Optional[float] = 1e-5,
                   epsilon: float = 0.01,
                   target: Optional[float] = None,
                   update: bool = False,
                   ):
  """Implements a DGN inference/update using Bernoulli log loss."""
  r_in = np.clip(sigmoid(inputs), epsilon, 1-epsilon)
  side_info = np.hstack([hyperplane_bias_magnitude, inputs])

  for w, h in zip(weights, hyperplanes):  # loop over layers
    r_in = np.hstack([sigmoid(1.), r_in])  # add biases
    h_in = inverse_sigmoid(r_in)
    gate_values = np.heaviside(h.dot(side_info), 0).astype(bool)
    effective_weights = gate_values.dot(w).sum(axis=1)
    h_out = effective_weights.dot(h_in)
    r_out = np.clip(sigmoid(h_out), epsilon, 1 - epsilon)

    if update:
      update_indicator = np.logical_and(r_out < 1 - epsilon, r_out > epsilon)
      grad = (r_out[:, None] - target) * h_in[None]  * update_indicator[:, None]
      w -= learning_rate * gate_values[:, :, None] * grad[:, None]

    r_in = r_out

  r_out = r_out[0]
  loss = -(target * r_out + (1 - target) * (1 - r_out))
  return r_out, loss

In [None]:
def forward_pass(step_fn, x, y, weights, hyperplanes, learning_rate, update):
  losses, outputs = [], []
  for x_i, y_i in zip(x, y):
    y, l = step_fn(x_i, weights, hyperplanes, target=y_i,
                   learning_rate=learning_rate, update=update)
    losses.append(l)
    outputs.append(y)
  return np.mean(losses), np.array(outputs)

## Define architecture


In [None]:
# number of neurons per layer, the last element must be 1
num_neurons = np.array([100, 10, 1])
num_branches = 20  # number of dendritic brancher per neuron

## Initialise weights and gating parameters

In [None]:
num_inputs = np.hstack([input_dim + 1, num_neurons[:-1] + 1])  # 1 for the bias
weights_ = [np.zeros((num_neuron, num_branches, num_input))
            for num_neuron, num_input in zip(num_neurons, num_inputs)]
hyperplanes_ = [np.random.normal(0, 1, size=(num_neuron, num_branches, input_dim + 1))
                for num_neuron in num_neurons]
# By default, the weight parameters are drawn from a normalised Gaussian:
hyperplanes_ = [h_ / np.linalg.norm(h_[:, :, :-1], axis=(1, 2))[:, None, None]
                for h_ in hyperplanes_]

## Train

In [None]:
if do_classification:
  n_epochs = 3
  learning_rate_const = 1e-4
  step = step_bernoulli
else:
  n_epochs = 10
  learning_rate_const = 1e-5
  step = step_square_loss

for epoch in range(0, n_epochs):
  train_loss, train_pred = forward_pass(
      step, x_train, y_train, weights_,
      hyperplanes_, learning_rate_const, update=True)
  test_loss, test_pred = forward_pass(
      step, x_test, y_test, weights_, hyperplanes_, learning_rate_const, update=False)
  print('epoch: {:d}, test loss: {:.3f} (train_loss: {:.3f})'.format(
      epoch, np.mean(test_loss), np.mean(train_loss)))

  if do_classification:
    accuracy = 1 - np.mean(np.logical_xor(np.round(test_pred), y_test))
    print('test accuracy: {:.3f}'.format(accuracy))
