# Multilayer networks with Keras 


We will use a few functions from [`scikit-learn`](http://scikit-learn.org) to generate data sets:

In [None]:
from sklearn.datasets import make_blobs, make_circles, make_moons
from conv_net_utils import generate_dataset

In [None]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

We'll also set up some configuration variables up-front:

### The first example is a linearly separable data-set:

In [None]:
train_data, test_data, train_labels, test_labels = generate_dataset(make_blobs, 
                                                                    n_train=1000, 
                                                                    n_test=200, 
                                                                    num_labels=2,
                                                                    centers=2, 
                                                                    center_box=[-4., 4.])

In [None]:
train_data.shape, train_labels.shape

In [None]:
train_data[:10]

In [None]:
train_labels[:10]

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(train_data[:, 0], train_data[:, 1], c=np.where(train_labels)[1])
ax.set_aspect('equal')

In [None]:
from keras.models import Sequential
from keras.layers import Dense

In [None]:
model = Sequential()
model.add(Dense(32, input_shape=(train_data.shape[-1],), activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

## An aside about the softmax function 

The softmax function defined mathemtatically as: 

$f(y_i) = \frac{e^{y_i}}{\sum_i{e^{y_i}}}$

assigns each of the possible outputs a probability (a number between 0-1, all sum together to 1). 

It also has the effect of 'mutual inhibition' between alternative options. 

Consider our case, with two options:


In [None]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [None]:
output = []
xs = np.arange(1, 11, 1)
for ii in xs:
    x = [10-ii, ii]
    output.append(softmax(x))

In [None]:
plt.plot(output)

In [None]:
model.fit(train_data, train_labels, epochs=5, batch_size=100, validation_split=0.2)

In [None]:
loss_and_metrics = model.evaluate(test_data, test_labels, batch_size=100)

In [None]:
for name, metric in zip(model.metrics_names, loss_and_metrics):
    print(name, ":", metric)

In [None]:
prediction = np.round(model.predict(test_data)).astype(int).squeeze()

In [None]:
prediction

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(test_data[:, 0], test_data[:, 1], c=np.where(prediction)[1])
ax.set_aspect('equal')



### The next set of data is not linearly separable:

In [None]:
train_data, test_data, train_labels, test_labels = generate_dataset(make_moons, 
                                                                    n_train=1000, 
                                                                    n_test=200,
                                                                    num_labels=2, 
                                                                    noise=0.2)

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(train_data[:, 0], train_data[:, 1], c=np.where(train_labels)[1])
ax.set_aspect('equal')

In [None]:
model.fit(train_data, train_labels, nb_epoch=5, batch_size=100, validation_split=0.2)

In [None]:
loss_and_metrics = model.evaluate(test_data, test_labels, batch_size=100)
print()
for name, metric in zip(model.metrics_names, loss_and_metrics):
    print(name, ":", metric)

In [None]:
prediction = np.round(model.predict(test_data)).astype(int).squeeze()

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(test_data[:, 0], test_data[:, 1], c=np.where(prediction)[1])
ax.set_aspect('equal')

In [None]:
train_data, test_data, train_labels, test_labels = generate_dataset(make_circles, 
                                                                    n_train=1000,
                                                                    n_test=200, 
                                                                    noise=0.2, 
                                                                    num_labels=2,
                                                                    factor=0.1)

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(train_data[:, 0], train_data[:, 1], c=np.where(train_labels)[1])
ax.set_aspect('equal')

In [None]:
model.fit(train_data, train_labels, nb_epoch=5, batch_size=100, validation_split=0.2)

In [None]:
loss_and_metrics = model.evaluate(test_data, test_labels, batch_size=100)
print()
for name, metric in zip(model.metrics_names, loss_and_metrics):
    print(name, ":", metric)

In [None]:
prediction = np.round(model.predict(test_data)).astype(int).squeeze()

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(test_data[:, 0], test_data[:, 1], c=np.where(prediction)[1])
ax.set_aspect('equal')

In [None]:
model = Sequential()
model.add(Dense(32, input_shape=(train_data.shape[-1],), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
model.fit(train_data, train_labels, epochs=5, batch_size=100, validation_split=0.2)
print()
loss_and_metrics = model.evaluate(test_data, test_labels, batch_size=100)
print()
for name, metric in zip(model.metrics_names, loss_and_metrics):
    print(name, ":", metric)

In [None]:
prediction = np.round(model.predict(test_data)).astype(int).squeeze()

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(test_data[:, 0], test_data[:, 1], c=np.where(prediction)[1])
ax.set_aspect('equal')