# RBF Networks with Iris Data: Soup to Nuts (and Beyond!)


Review from Monday:

Training a RBF consists of:
* Finding prototypes
* Selecting the activation function for the hidden nodes
* Selecting the activation function for the output nodes
* Setting the weights for the edges and biases

We use:
* kmeans to find prototypes
* linear regression to fit the edge weights from the hidden layer to the output layer

We can use RBF networks for:
* classification - by taking the absolute rounded value of the argmax of the outputs from the output layer (one output node per class)
* regression - by taking the output from the one node in the output layer

Today we will talk about:
* Visualizing neural networks and the activations of a neural network layer
* Hebbian learning - using RBF networks for clustering

As a sanity check when building a model, I suggest you *always print the shape at each step*.

## Load the Data

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

def type_converter(x):
    values = ['setosa', 'versicolor', 'virginica']
    return float(values.index(x))

def inverse_type_converter(x):
    values = ['setosa', 'versicolor', 'virginica']
    return values[x]


columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]
iris = np.array(np.genfromtxt('data/iris.csv', delimiter=',', converters={4: type_converter}, skip_header=2, dtype=float, encoding='utf-8'))
print(iris.shape, iris.dtype)

# Look at the Data

In [None]:
def get_summary_statistics(data):
    "Get the max, min, mean, var for each variable in the data."
    return pd.DataFrame(np.array([data.max(axis=0), data.min(axis=0), data.mean(axis=0), data.var(axis=0)]))

print(get_summary_statistics(iris))

df = pd.DataFrame(iris, columns=columns)
sns.pairplot(df, y_vars = ["class"], kind = "scatter")

## Clean the Data

Nothing to see here for the iris data

## Split the Data 

Into train, dev, test

In [None]:
# Why are we doing this?
np.random.shuffle(iris)

train_data, dev_data, test_data = np.split(iris, [int(.8 * len(iris)), int(.9 * len(iris))])
print("training data", "\n", train_data.shape, "\n", get_summary_statistics(train_data))
print("dev data", "\n", dev_data.shape, "\n", get_summary_statistics(dev_data))
print("test data", "\n", test_data.shape, "\n", get_summary_statistics(test_data))

## Split off the Dependent Variable

In [None]:
dep_var = 4
ind_vars = list(range(train_data.shape[1]))
ind_vars.pop(dep_var)
dep_name = columns.pop(dep_var)
print(dep_var, ind_vars)

train_data, train_y = train_data[np.ix_(np.arange(train_data.shape[0]), ind_vars)], train_data[:, dep_var]
dev_data, dev_y = dev_data[np.ix_(np.arange(dev_data.shape[0]), ind_vars)], dev_data[:, dep_var]
test_data, test_y = test_data[np.ix_(np.arange(test_data.shape[0]), ind_vars)], test_data[:, dep_var]
print("training data", "\n", train_data.shape, "\n", get_summary_statistics(train_data))
print("dev data", "\n", dev_data.shape, "\n", get_summary_statistics(dev_data))
print("test data", "\n", test_data.shape, "\n", get_summary_statistics(test_data))

## Consider Transforming/Normalizing the Data

In [None]:
def homogenizeData(data):
    return np.append(data, np.array([np.ones(data.shape[0], dtype=float)]).T, axis=1)
   
def zScore(data, translateTransform=None, scaleTransform=None):
    "z score."
    homogenizedData = np.append(data, np.array([np.ones(data.shape[0], dtype=float)]).T, axis=1)
    if translateTransform is None:
        translateTransform = np.eye(homogenizedData.shape[1])
        for i in range(homogenizedData.shape[1]):
            translateTransform[i, homogenizedData.shape[1]-1] = -homogenizedData[:, i].mean()
    if scaleTransform is None:
        diagonal = [1 / homogenizedData[:, i].std() if homogenizedData[:, i].std() != 0 else 1 for i in range(homogenizedData.shape[1])]
        scaleTransform = np.eye(homogenizedData.shape[1], dtype=float) * diagonal
    data = (scaleTransform@translateTransform@homogenizedData.T).T
    return translateTransform, scaleTransform, data[:, :data.shape[1]-1]

translateTransform, scaleTransform, train_data_transformed = zScore(train_data)
print("training data", "\n", train_data_transformed.shape, "\n", get_summary_statistics(train_data_transformed))

_, _, dev_data_transformed = zScore(dev_data, translateTransform=translateTransform, scaleTransform=scaleTransform)
print("dev data", "\n", dev_data_transformed.shape, "\n", get_summary_statistics(dev_data_transformed))

_, _, test_data_transformed = zScore(test_data, translateTransform=translateTransform, scaleTransform=scaleTransform)
print("test data", "\n", test_data_transformed.shape, "\n", get_summary_statistics(test_data_transformed))

## Consider Dimensionality Reduction

Nothing to see here for the iris data

# Find Prototypes

In [None]:
from sklearn.cluster import KMeans

inertia_by_k = []

for k in range(2, 17):
    km = KMeans(n_clusters=k, random_state=0).fit(train_data)
    inertia_by_k.append([k, km.inertia_])

inertia_by_k = np.array(inertia_by_k)
fig = plt.figure(figsize=(6,4))
ax1 = fig.add_subplot(111)
ax1.plot(inertia_by_k[:, 0], inertia_by_k[:, 1])
ax1.set_xlabel('k')
ax1.set_ylabel('Inertia')
ax1.set_title('Elbow Plot')
plt.show()

In [None]:
k = 11

km = KMeans(n_clusters=k, random_state=0).fit(train_data)

print(km.cluster_centers_.shape)

# Define the Activation Function for the Hidden Nodes

In [None]:
# I am going to implement a _stupid activation function_ so that you can implement the right one yourselves for project 7
def calculate_activations(data, centroids):
    "I repeat, do not use this activation function directly. This one is exp(-distance / 3); yours is exp(-distance^2 / (2*radius + epsilon))"
    # You can easily fiddle with this numerator to make it calculate the square of the distance
    numerator = -np.linalg.norm(data-centroids[:,np.newaxis], axis = 2).T
    # The construction of your denominator will be a little more complex than this; the diagonals will be centroid/prototype-specific
    denominator = np.eye(centroids.shape[0], dtype=float) * 1/3
    print(numerator.shape, denominator.shape)
    return np.exp((denominator@numerator.T).T)

train_calcs = calculate_activations(train_data_transformed, km.cluster_centers_)
print(train_calcs.shape)

# Define the Activation Function for the Output Nodes

In a RBF network, this will be a straight linear function.

# Calculate the Values for the Weights on the Edges to the Output Nodes

In [None]:
from sklearn.linear_model import LinearRegression

def fit_classification(A, y):
    y = []
    for value in np.unique(train_y):
        y.append([1 if x == value else 0 for x in train_y])
    y = np.vstack(y).T
    print("y", "\n", y.shape, "\n", np.vstack((y.T, train_y)).T)
    reg = LinearRegression().fit(A, y)
    print("coefficients", "\n", reg.coef_.shape)
    print("intercepts", "\n", reg.intercept_.shape)
    return reg

def fit_regression(A, y):
    print("y", "\n", y.shape, "\n", np.vstack((y.T, train_y)).T)
    reg = LinearRegression().fit(A, y)
    print("coefficients", "\n", reg.coef_.shape)
    print("intercepts", "\n", reg.intercept_.shape)
    return reg

if dep_var == 4:
    reg = fit_classification(train_calcs, train_y)
else:
    reg = fit_regression(train_calcs, train_y)

# Test and Score

In [None]:
dev_calcs = calculate_activations(dev_data_transformed, km.cluster_centers_)
print(dev_calcs.shape)

dev_yhat = reg.predict(dev_calcs)
print(dev_yhat.shape)
print(dev_yhat)

In [None]:
def final_prediction(input_predictions):
    return np.abs(np.round(np.argmax(input_predictions)))

if dep_var == 4:
    dev_yhat = [final_prediction(dev_yhat[i]) for i in range(dev_yhat.shape[0])]
print(dev_yhat)
print(dev_y)

In [None]:
def accuracy(y, yhat):
    return np.sum([1 if y[i]==yhat[i] else 0 for i in range(len(y))]) / len(y)

def rsquared(y, yhat):
    return 1 - (((y - yhat)**2).sum() / ((y - y.mean())**2).sum())
 
if dep_var == 4:
    print(accuracy(dev_y, dev_yhat))
else:
    print(rsquared(dev_y, dev_yhat))

# Visualizing Neural Networks

## Visualize the network itself

For this, there are many options. We will use *graphviz*. To install graphviz:
* First, download and stall the appropriate executable package from https://www.graphviz.org/download/ (for Windows or Mac)
* Then, do pip install graphviz or (or conda install graphviz) to install the python bindings
You may have to restart your computer after this for the computer to find the paths.

In [None]:
from graphviz import Digraph

# Let's make a graph
network = Digraph(format='png', comment='RBF network')
# Let's make it draw from left to right
network.attr(rankdir='LR')
# Let's make it draw straight lines for edges rather than curves
#network.attr(splines='line')

# Add input nodes
with network.subgraph(name='input', node_attr={'shape': 'circle', 'color': 'blue'}) as input:
    for i in range(len(columns)):
        input.node('i'+str(i), columns[i])

# Add hidden layer nodes
with network.subgraph(name='hidden', node_attr={'shape': 'circle', 'color': 'black'}) as hidden:
    for i in range(k):
        hidden.node('h'+str(i))

# Add bias nodes
o_len = 1
if len(reg.coef_.shape) > 1:
    o_len = reg.coef_.shape[0]
with network.subgraph(name='bias', node_attr={'shape': 'square', 'color': 'black'}) as bias:
    for i in range(o_len):
        bias.node('b'+str(i))

# Add output layer nodes
with network.subgraph(name='output', node_attr={'shape': 'circle', 'color': 'red'}) as output:
    for i in range(o_len):
        output.node('o'+ str(i), inverse_type_converter(i))


# Add edges from input to hidden layer
for i in range(len(columns)):
    for j in range(k):
        network.edge('i'+ str(i), 'h'+str(j))

# Add edges from hidden to output layer
for i in range(k):
    for j in range(o_len):
        network.edge('h'+str(i), 'o'+str(j), label=str(reg.coef_[j, i]))

# Add edges from bias to output layer
for i in range(o_len):
    network.edge('b'+str(i), 'o'+str(i), label=str(reg.intercept_[i]))

network

# Visualizing the *activation* of one or more layers of the network for an input

One way to do this is with a heatmap.

In [None]:
sns.heatmap(dev_calcs , yticklabels=[inverse_type_converter(int(x)) for x in dev_y])