In [1]:
import logging
import sys
from typing import Any, Dict, List, Iterable, Tuple, Union
import warnings
warnings.filterwarnings('ignore')

import leabra7 as lb
import numpy as np
import pandas as pd
import sklearn.datasets, sklearn.metrics, sklearn.model_selection, sklearn.preprocessing

In [2]:
LoggerType = Union[None, logging.Logger, logging.LoggerAdapter]

In [3]:
def load_data(num_feature_units: int = 10,
              logger: LoggerType = None) -> Tuple[np.ndarray, np.ndarray]:
    """Loads and preprocesses the data.
    
    Returns:
        An (X, Y) tuple containing the features and labels, respectively.
    """
    if logger is None:
        logger = logging.getLogger()
    logger.info("Loading data")
    data = sklearn.datasets.load_iris()
    
    # One-hot encode the labels
    label_binarizer = sklearn.preprocessing.LabelBinarizer()
    Y = label_binarizer.fit_transform(data.target)
    
    # Quantile transform, bin, and one-hot encode the features
    quant = sklearn.preprocessing.QuantileTransformer()
    X = quant.fit_transform(data.data)
    X = np.digitize(X, bins=np.linspace(0.0, 1.0, num=num_feature_units))
    one_hot = sklearn.preprocessing.OneHotEncoder(sparse=False)
    X = one_hot.fit_transform(X)
    
    # Randomly shuffle the data
    return sklearn.utils.shuffle(X, Y)


def build_network(input_size: int,
                  output_size: int,
                  hidden_size: int = 23,
                  logger: LoggerType = None) -> lb.Net:
    """Builds the classifier network.
    
    Args:
        input_size: The size of the input layer.
        output_size: The size of the output layer.
        hidden_size: The size of the hidden layer.
        logger: The logger to use.
    
    Returns:
        A Leabra7 network for classification.
    """
    if logger is None:
        logger = logging.getLogger()
    logger.info("Building network")
    net = lb.Net()
    
    # Layers
    layer_spec = lb.LayerSpec(gi=1.5, ff=1, fb=1,
    unit_spec=lb.UnitSpec(spike_gain=0, vm_gain=0, adapt_dt=0))
    net.new_layer("input", size=input_size, spec=layer_spec)
    net.new_layer("hidden", size=hidden_size, spec=layer_spec)
    net.new_layer("output", size=output_size, spec=layer_spec)
    logger.debug("Input layer size: %d", input_size)
    logger.debug("Hidden layer size: %d", hidden_size)
    logger.debug("Output layer size: %d", output_size)
    
    # Projections
    lrate = 0.02
    up_spec = lb.ProjnSpec(
        lrate=lrate,
        dist=lb.Uniform(0.25, 0.75),
        cos_diff_thr_l_mix=False,
        cos_diff_lrate=False)
    down_spec = lb.ProjnSpec(
        lrate=lrate,
        dist=lb.Uniform(0.25, 0.5),
        wt_scale_rel=0.3,
        cos_diff_thr_l_mix=False,
        cos_diff_lrate=False)
    net.new_projn(
        "input_to_hidden", pre="input", post="hidden", spec=up_spec)
    net.new_projn(
        "hidden_to_output", pre="hidden", post="output", spec=up_spec)
    net.new_projn(
        "output_to_hidden", pre="output", post="hidden", spec=down_spec)
    
    return net

def trial(network: lb.Net, input_pattern: Iterable[float],
          output_pattern: Iterable[float]) -> None:
    """Runs a trial.
    
    Args:
        input_pattern: The pattern to clamp to the network's input layer.
        output_pattern: The pattern to clamp to the network's output layer.
    """
    network.clamp_layer("input", input_pattern)
    network.minus_phase_cycle(num_cycles=50)
    network.clamp_layer("output", output_pattern)
    network.plus_phase_cycle(num_cycles=25)
    network.unclamp_layer("input")
    network.unclamp_layer("output")
    network.learn()
    
def epoch(network: lb.Net, input_patterns: np.ndarray,
          output_patterns: np.ndarray) -> None:
    """Runs an epoch (one pass through the whole dataset).
    
    Args:
        input_patterns: A numpy array with shape (n_samples, n_features).
        output_patterns: A numpy array with shape (n_samples, n_features).
    """
    for x, y in zip(input_patterns, output_patterns):
        trial(network, x, y)
    network.end_epoch()
    
def train(network: lb.Net,
          input_patterns: np.ndarray,
          output_patterns: np.ndarray,
          num_epochs: int = 500,
          logger: LoggerType = None) -> pd.DataFrame:
    """Trains the network.
    
    Args:
        input_patterns: A numpy array with shape (n_samples, n_features).
        output_patterns: A numpy array with shape (n_samples, n_features).
        num_patterns: The number of epochs to run. Defaults to 500.
        logger: The logger to use. If None, will use the module's default logger.
    
    Returns:
        29pd.DataFrame:
        A dataframe of metrics from the training run.
    """
    if logger is None:
        logger = logging.getLogger()
    logger.info("Begin training")
    
    X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(
        input_patterns, output_patterns, test_size=0.2)
    
    logger.debug("Training set size: %d", X_train.shape[0])
    logger.debug("Test set size: %d", X_test.shape[0])
    
    data: Dict[str, List[float]] = {
        "epoch": [],
        "train_loss": [],
        "train_accuracy": [],
        "test_loss": [],
        "test_accuracy": []
    }
        
    for i in range(num_epochs):
        epoch(network, X_train, Y_train)
        # Predicting is slow, so we only calculate metrics every 5 epochs
        if (i + 1) % 5 == 0:
            pred_train = predict(network, X_train)
            data["epoch"].append(i)
            data["train_loss"].append(
                sklearn.metrics.mean_squared_error(Y_train, pred_train))
            data["train_accuracy"].append(
                sklearn.metrics.accuracy_score(
                    Y_train, pred_train, normalize=True))
            pred_test = predict(network, X_test)
            data["test_loss"].append(
                sklearn.metrics.mean_squared_error(Y_test, pred_test))
            data["test_accuracy"].append(
                sklearn.metrics.accuracy_score(
                    Y_test, pred_test, normalize=True))
            logger.info(
                "Epoch %d/%d. Train accuracy: %.2f%%. Test accuracy: %.2f%%",
                i, num_epochs, data["train_accuracy"][-1] * 100,
                data["test_accuracy"][-1] * 100)
    logger.info("End training")
    return pd.DataFrame(data)

def output(network: lb.Net, pattern: Iterable[float]) -> List[float]:
    """Calculates a prediction for a single input pattern.
    
    Args:
        network: The trained network.
        pattern: The input pattern.
    
    Returns:
        np.ndarray: The output of the network after clamping the input
        pattern to the input layer and settling. The max value is set to one,
        everything else is set to zero.
    """
    network.clamp_layer("input", pattern)
    for _ in range(50):
        network.cycle()
    network.unclamp_layer("input")
    out = network.observe("output", "unit_act")["act"].values
    max_idx = np.argmax(out)
    out[:] = 0
    out[max_idx] = 1
    return list(out)

def predict(network: lb.Net, input_patterns: np.ndarray) -> np.ndarray:
    """Calculates predictions for an array of input patterns.
    
    Args:
        network: The trained network.
        input_patterns: An array of shape (n_samples, n_features)
        containing the input patterns for which to calculate predictions.

    Returns:
        np.ndarray: An array of shape (n_samples, n_features) containing the
        predictions for the input patterns.
    """
    outputs = [output(network, item)
               for item in input_patterns]
    return np.array(outputs)

In [4]:
PROJ_NAME = "iris"
np.seterr("warn")
logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s %(levelname)s %(message)s",
    handlers=(
        logging.FileHandler(
            "{0}_log.txt".format(PROJ_NAME), mode="w"),
        logging.StreamHandler(sys.stdout)))

logging.info("Begin training %s", PROJ_NAME)

2021-04-07 12:50:26,405 INFO Begin training iris


In [5]:
X, Y = load_data()
net = build_network(
    input_size=X.shape[1],
    output_size=Y.shape[1])

metrics = train(net, X, Y)

# Save metrics and network for future analysis
metrics.to_csv("{0}_metrics.csv".format(PROJ_NAME), index=False)
net.save("{0}_network.pkl".format(PROJ_NAME))

2021-04-07 12:50:28,156 INFO Loading data
2021-04-07 12:50:28,165 INFO Building network
2021-04-07 12:50:28,168 DEBUG Input layer size: 40
2021-04-07 12:50:28,169 DEBUG Hidden layer size: 23
2021-04-07 12:50:28,169 DEBUG Output layer size: 3
2021-04-07 12:50:28,172 INFO Begin training
2021-04-07 12:50:28,174 DEBUG Training set size: 120
2021-04-07 12:50:28,174 DEBUG Test set size: 30
2021-04-07 12:51:57,871 INFO Epoch 4/500. Train accuracy: 35.83%. Test accuracy: 16.67%
2021-04-07 12:53:23,466 INFO Epoch 9/500. Train accuracy: 35.83%. Test accuracy: 16.67%
2021-04-07 12:54:53,598 INFO Epoch 14/500. Train accuracy: 35.83%. Test accuracy: 16.67%
2021-04-07 12:56:23,700 INFO Epoch 19/500. Train accuracy: 35.83%. Test accuracy: 16.67%
2021-04-07 12:57:53,768 INFO Epoch 24/500. Train accuracy: 35.83%. Test accuracy: 16.67%
2021-04-07 12:59:31,506 INFO Epoch 29/500. Train accuracy: 35.83%. Test accuracy: 16.67%
2021-04-07 13:01:01,881 INFO Epoch 34/500. Train accuracy: 35.83%. Test accuracy:

KeyboardInterrupt: 