In [None]:
!wget -nc https://raw.githubusercontent.com/arokem/conv-nets/master/conv_net_utils.py

In [None]:
import numpy as np
import matplotlib.pyplot as plt 
%matplotlib inline

A single-layered neural net is simply a set of weights 

<img src='https://raw.githubusercontent.com/arokem/conv-nets/master/img/nn-1.png' width=500px>

Where: 

$X_{21} = X_{11} w^2_{11} + X_{12} w^2_{21}$

Let's implement this in code:

In [None]:
x11 = 1 
x12 = 2
w_2_11 = -2
w_2_21 = 3

In [None]:
x21 = w_2_11 * x11 + w_2_21 * x12

In [None]:
print(x21)

More generally: 

$X_{21} = w^2 \cdot X_{1}


In [None]:
x21 = np.dot([w_2_11, w_2_21], [x11, x12])

In [None]:
print(x21)

A multi-layered network will add to that another set of weights: 

<img src='https://raw.githubusercontent.com/arokem/conv-nets/master/img/nn-2.png' width=500px>

In [None]:
x11 = 1 
x12 = 2

w_2_11 = -2
w_2_21 = 3
w_2_12 = 2
w_2_22 = -3

w_3_11 = 3
w_3_21 = 2

In [None]:
x21 = np.dot([w_2_11, w_2_21], [x11, x12])
x22 = np.dot([w_2_12, w_2_22], [x11, x12])

x31 = np.dot([w_3_11, w_3_21], [x21, x22])

In [None]:
print(x31)

Things get more interesting when an activation function is added to each unit:

<img src='https://raw.githubusercontent.com/arokem/conv-nets/master/img/nn-3.png' width=500px>

Different functions that are used include the hyperbolic tangent:

In [None]:
x = np.arange(-np.pi, np.pi, 0.001)

In [None]:
plt.plot(x, np.tanh(x))

Another function that has been used a lot, for both convenience, and because it induces sparsity, is a rectified linear unit:

In [None]:
plt.plot(x, np.max([x, np.zeros(x.shape[0])], axis=0))

In [None]:
x21 = np.max([np.dot([w_2_11, w_2_21], [x11, x12]), 0])
x22 = np.max([np.dot([w_2_12, w_2_22], [x11, x12]), 0])

x31 = np.max([np.dot([w_3_11, w_3_21], [x21, x22]), 0])

In [None]:
print(x31)

Networks are trained through gradient descent: gradual changes to the values of the weights

The gradients are calculate through **backpropagation**

Error is propagated back through the network to calculate a gradient (derivative) for each weight by multiplying:

- The gradient of the loss function with respect to the node a weight feeds into
- The value of the node feeding into the weight
- The slope of the activation function of the node it feeds into

For example, for the network we had above, let's assume the desired output was 10, instead of 12

In [None]:
# We take the simplest possible error, the absolute difference:
e31 = x31 - 10

# We'll use this helper function to derive ReLU functions:
def d_relu(x):
    if x > 0:
        return 1
    else:
        return 0

e_3_11 = e31 * x21 * d_relu(x31)
e_3_21 = e31 * x22 * d_relu(x31)

e_2_11 = e_3_11 * x11 * d_relu(x21)
e_2_21 = e_3_11 * x12 * d_relu(x21)

e_2_12 = e_3_21 * x11 * d_relu(x22)
e_2_22 = e_3_21 * x12 * d_relu(x22)


In [None]:
lr = 0.01

In [None]:
w_3_11 = w_3_11 - e_3_11 * lr 
w_3_21 = w_3_11 - e_3_21 * lr 

w_2_11 = w_2_11 - e_2_11 * lr
w_2_12 = w_2_12 - e_2_12 * lr

w_2_21 = w_2_21 - e_2_21 * lr
w_2_22 = w_2_22 - e_2_22 * lr

In [None]:
x21 = np.max([np.dot([w_2_11, w_2_21], [x11, x12]), 0])
x22 = np.max([np.dot([w_2_12, w_2_22], [x11, x12]), 0])

x31 = np.max([np.dot([w_3_11, w_3_21], [x21, x22]), 0])

In [None]:
print(x31)

As we'll see next, depending on the error function that is used, neural nets can be used for other tasks as well

# Multilayer networks with Keras 


We will use a few functions from [`scikit-learn`](http://scikit-learn.org) to generate data sets:

In [None]:
from sklearn.datasets import make_blobs, make_circles, make_moons
from conv_net_utils import generate_dataset

In [None]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

### The first example is a linearly separable data-set:

In [None]:
train_data, test_data, train_labels, test_labels = generate_dataset(make_blobs, 
                                                                    n_train=1000, 
                                                                    n_test=200, 
                                                                    num_labels=2,
                                                                    centers=2, 
                                                                    center_box=[-4., 4.])

In [None]:
train_data.shape, train_labels.shape

In [None]:
train_data[:10]

In [None]:
train_labels[:10]

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(train_data[:, 0], train_data[:, 1], c=np.where(train_labels)[1])
ax.set_aspect('equal')

In [None]:
from keras.models import Sequential
from keras.layers import Dense

In [None]:
model = Sequential()
model.add(Dense(32, input_shape=(train_data.shape[-1],), activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
model.fit(train_data, train_labels, epochs=5, batch_size=100, validation_split=0.2)

In [None]:
loss_and_metrics = model.evaluate(test_data, test_labels, batch_size=100)

In [None]:
for name, metric in zip(model.metrics_names, loss_and_metrics):
    print(name, ":", metric)

In [None]:
prediction = np.round(model.predict(test_data)).astype(int).squeeze()

In [None]:
prediction

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(test_data[:, 0], test_data[:, 1], c=np.where(prediction)[1])
ax.set_aspect('equal')



### The next set of data is not linearly separable:

In [None]:
train_data, test_data, train_labels, test_labels = generate_dataset(make_circles, 
                                                                    n_train=1000,
                                                                    n_test=200, 
                                                                    noise=0.2, 
                                                                    num_labels=2,
                                                                    factor=0.1)

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(train_data[:, 0], train_data[:, 1], c=np.where(train_labels)[1])
ax.set_aspect('equal')

In [None]:
model.fit(train_data, train_labels, nb_epoch=5, batch_size=100, validation_split=0.2)

In [None]:
loss_and_metrics = model.evaluate(test_data, test_labels, batch_size=100)
print()
for name, metric in zip(model.metrics_names, loss_and_metrics):
    print(name, ":", metric)

In [None]:
prediction = np.round(model.predict(test_data)).astype(int).squeeze()

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(test_data[:, 0], test_data[:, 1], c=np.where(prediction)[1])
ax.set_aspect('equal')

In [None]:
model = Sequential()
model.add(Dense(32, input_shape=(train_data.shape[-1],), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
model.fit(train_data, train_labels, epochs=5, batch_size=100, validation_split=0.2)
print()
loss_and_metrics = model.evaluate(test_data, test_labels, batch_size=100)
print()
for name, metric in zip(model.metrics_names, loss_and_metrics):
    print(name, ":", metric)

In [None]:
prediction = np.round(model.predict(test_data)).astype(int).squeeze()

In [None]:
fig, ax = plt.subplots(1)
ax.scatter(test_data[:, 0], test_data[:, 1], c=np.where(prediction)[1])
ax.set_aspect('equal')

In [None]:
from conv_net_utils import load_fashion

In [None]:
import numpy as np

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
!wget -nc https://storage.googleapis.com/ohbm-dl-lindsay-data/Rokem_IXI_brain_data.npz

In [None]:
brain_data = np.load("Rokem_IXI_brain_data.npz")
train_data = brain_data['X_train']
test_data = brain_data['X_test']
train_labels = brain_data['y_train']
test_labels = brain_data['y_test']

In [None]:
shuffle_train = np.random.permutation(np.arange(len(train_data)))
shuffle_test = np.random.permutation(np.arange(len(test_data)))

In [None]:
train_data = train_data[shuffle_train]
train_labels = train_labels[shuffle_train]

test_data = test_data[shuffle_test]
test_labels = test_labels[shuffle_test]

In [None]:
train_data.shape, train_labels.shape, test_data.shape, test_labels.shape

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical

In [None]:
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [None]:
train_data_flat = train_data.reshape((train_data.shape[0], -1))
test_data_flat = test_data.reshape((test_data.shape[0], -1))

In [None]:
train_data.shape

In [None]:
model = Sequential()
model.add(Dense(32, input_shape=(train_data_flat.shape[-1],), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
model.fit(train_data_flat, train_labels, epochs=10, batch_size=100, validation_split=0.2)
print()
loss_and_metrics = model.evaluate(test_data_flat, test_labels, batch_size=100)
print()
for name, metric in zip(model.metrics_names, loss_and_metrics):
    print(name, ":", metric)

# What is a convolution? 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from conv_net_utils import plot_with_annot, load_fashion
%matplotlib inline

In [None]:
small_image = np.concatenate([np.arange(10), np.arange(10, 0, -1)]).reshape((4, 5))
fig1 = plot_with_annot(small_image)

In [None]:
small_image = np.pad(small_image, 1, 'constant')
fig1 = plot_with_annot(small_image)

In [None]:
kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])

fig2 = plot_with_annot(kernel) 
fig2.set_size_inches([1, 1])

In [None]:
result = np.zeros(small_image.shape)

result[1, 1] += np.sum(small_image[:3, :3] * kernel)
fig3 = plot_with_annot(result)

In [None]:
result[1, 2] += np.sum(small_image[:3, 1:4] * kernel)
fig3 = plot_with_annot(result)

In [None]:
result[1, 3] += np.sum(small_image[:3, 2:5] * kernel)
fig3 = plot_with_annot(result)

In [None]:
result = np.zeros(small_image.shape)

for ii in range(small_image.shape[0]-2):
    for jj in range(small_image.shape[1]-2):
        result[ii+1, jj+1] = np.sum(small_image[ii:ii+kernel.shape[0], jj:jj+kernel.shape[1]] * kernel)

_ = plot_with_annot(result)

In [None]:
bricks = train_data[0, :, :, 0]

In [None]:
fig, ax = plt.subplots(1)
ax.matshow(bricks)
ax.set_axis_off()

In [None]:
conv = np.zeros(bricks.shape)

for ii in range(1, bricks.shape[0]-2):
    for jj in range(1, bricks.shape[1]-2):
        conv[ii, jj] = np.sum(bricks[ii:ii+kernel.shape[0], jj:jj+kernel.shape[1]] * kernel)


In [None]:
plt.matshow(conv)

In [None]:
kernel = np.array([[-0.5, -0.5, -0.5], [1, 1, 1], [-0.5, -0.5, -0.5]])
fig2 = plot_with_annot(kernel, vmax=1)
fig2.set_size_inches([1, 1])

In [None]:
conv = np.zeros(bricks.shape)

for ii in range(1, bricks.shape[0]-2):
    for jj in range(1, bricks.shape[1]-2):
        conv[ii, jj] = np.sum(bricks[ii:ii+kernel.shape[0], jj:jj+kernel.shape[1]] * kernel)

fig, ax = plt.subplots(1)
ax.matshow(conv)
ax.set_axis_off()

In [None]:
kernel = np.array([[-0.5, 1, -0.5], [-0.5, 1, -0.5], [-0.5, 1, -0.5]])
fig2 = plot_with_annot(kernel, vmax=1)
fig2.set_size_inches([1, 1])

In [None]:
conv = np.zeros(bricks.shape)

for ii in range(1, bricks.shape[0]-2):
    for jj in range(1, bricks.shape[1]-2):
        conv[ii, jj] = np.sum(bricks[ii:ii+kernel.shape[0], jj:jj+kernel.shape[1]] * kernel)

plt.matshow(conv)

# Why are convolutions useful for neural networks?

- Natural images contain correlations
- Reduce the number of parameters in training

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPool2D, MaxPooling2D, Dropout

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=3, input_shape=train_data.shape[1:], activation='relu'))
model.add(Flatten())
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.fit(train_data, train_labels, epochs=10, batch_size=100, validation_split=0.2)
print()
loss_and_metrics = model.evaluate(test_data, test_labels, batch_size=100)
print()
for name, metric in zip(model.metrics_names, loss_and_metrics):
    print(name, ":", metric)

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=3, input_shape=train_data.shape[1:], activation='relu'))
model.add(MaxPool2D((2, 2),padding='same'))
model.add(Conv2D(64, kernel_size=3))
model.add(MaxPool2D((2, 2),padding='same'))
model.add(Flatten())
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.fit(train_data, train_labels, epochs=10, batch_size=100, validation_split=0.2)
print()
loss_and_metrics = model.evaluate(test_data, test_labels, batch_size=100)
print()
for name, metric in zip(model.metrics_names, loss_and_metrics):
    print(name, ":", metric)

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu',input_shape=(28,28,1), padding='same'))
model.add(MaxPool2D((2, 2),padding='same'))
model.add(Conv2D(64, (3, 3), activation='relu',padding='same'))
model.add(MaxPool2D(pool_size=(2, 2),padding='same'))
model.add(Conv2D(128, (3, 3), activation='relu',padding='same'))
model.add(MaxPool2D(pool_size=(2, 2), padding='same'))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(2, activation='softmax'))

model.compile(loss="categorical_crossentropy",
              optimizer="adadelta",
              metrics=['accuracy'])

In [None]:
model.fit(train_data, train_labels, epochs=10, batch_size=100, validation_split=0.2)
print()
loss_and_metrics = model.evaluate(test_data, test_labels, batch_size=100)
print()
for name, metric in zip(model.metrics_names, loss_and_metrics):
    print(name, ":", metric)