# Neural Networks with Keras

In [None]:
from __future__ import print_function
import random
import numpy as np
import pandas as pd
from math import sin

from keras.models import Sequential
from keras.layers import Dense, Activation
#from keras.optimizers import SGD   # Stochastic Gradient Descent

from sklearn.metrics import accuracy_score, confusion_matrix, mean_squared_error
from sklearn.model_selection import cross_val_score as cv

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams["figure.figsize"] = (8, 8)

## Neural Network Regression

### Polynomial Regression

Let's train a neural network on a few different shapes. First we start with a polynomial (a cubic).

In [None]:
# Create some data

def f(x):
    return x ** 3 - 5 * x + 12 + random.random()

X = np.linspace(-1, 1, 1000).reshape(-1, 1)
y = np.array(list(map(f, X)))

print(X.shape, y.shape)

In [None]:
plt.scatter(X, y)

In [None]:
# Define a Feed Forward NN
model = Sequential()
model.add(Dense(5, activation='tanh'))
model.add(Dense(1, activation='linear'))

#   lr: learning rate
model.compile(loss='mse', optimizer='SGD')

In [None]:
# Train the model
print('Training...')
loss = model.fit(X, y, epochs=150, validation_split=0.1, 
                 batch_size=128, verbose=False)
print(loss.history['loss'][-1])
print("Done")

In [None]:
# Plot the predictions
predictions = model.predict(X)

plt.scatter(X, y, alpha=0.5)
plt.plot(X, predictions, color='r', linewidth=2)
plt.show()
print("MSE", mean_squared_error(predictions, y))

### Sine Regression

In [None]:
# Sine data
X = np.linspace(0, 2 * np.pi, 500).reshape(-1,1)
y = np.sin(X)

print(X.shape, y.shape)

In [None]:
plt.scatter(X, y)

In [None]:
#Create the model
model = Sequential()
model.add(Dense(5, activation='tanh', input_shape=(1,)))
model.add(Dense(1, activation='linear', input_shape=(5,)))

#   lr: learning rate
model.compile(loss='mse', optimizer='SGD')

### Train the Model

In [None]:
print('Training..')
loss = model.fit(X, y, epochs=150, validation_split=0.1, 
                 batch_size=128, verbose=False)
print(loss.history['loss'][-1])
print('Complete')

In [None]:
# Plot the predictions
predictions = model.predict(X)

plt.scatter(X, y)
plt.plot(X, predictions, color='r')
plt.show()
print("MSE", mean_squared_error(predictions, y))

In [None]:
# Plot the error over time

plt.scatter(range(len(loss.history['loss'])), loss.history['loss'])

# plt.scatter(range(len(loss.history['val_loss'])), loss.history['val_loss'], color='red')

plt.xlabel('Epoch')
plt.ylabel('MSE')

plt.title('MSE by Epoch');

### Train longer

If we train for more epochs, we can get a better regression.

In [None]:
X = np.linspace(0, 2 * np.pi, 1000).reshape(-1,1)
y = np.sin(X)

print(X.shape, y.shape)

model = Sequential()
model.add(Dense(5, activation='tanh', input_shape=(1,)))
model.add(Dense(1, activation='linear', input_shape=(5,)))

#   lr: learning rate
model.compile(loss='mse', optimizer='SGD')

print('Training..')
loss = model.fit(X, y, epochs=250, validation_split=0.1, 
                 batch_size=256, verbose=False)
print(loss.history['loss'][-1])
print('Complete')

# Plot
predictions = model.predict(X)

plt.scatter(X, y)
plt.plot(X, predictions, color='r')
plt.show()
print("MSE", mean_squared_error(predictions, y))

We can take a closer look at the error per training epoch.

In [None]:
# Plot the error over time

plt.scatter(range(len(loss.history['loss'])), loss.history['loss'])
plt.xlabel('Epoch')
plt.ylabel('MSE')
plt.title('MSE by Epoch')

### Exercise: Perform regression on the following data
Hints:
* Try adding a hidden layer
* Try lowering the learning rate and using more epochs

In [None]:
def f(x):
    return x ** 2 * np.sin(x**2)

# Sine data
X = np.linspace(2, np.pi, 1000).reshape(-1,1)
y = np.array(list(map(f, X)))

print(X.shape, y.shape)

In [None]:
plt.scatter(X, y)

In [None]:
# Solution


In [None]:
# Setup your model




# Compile the model using MSE as your loss function and an SGD learning rate of your choice



#Fit your model



# Make your predictions


# Plot predictions


## Classification

We'll start with the Iris dataset (of course).

In [None]:
import sklearn.datasets as datasets
iris = datasets.load_iris()

X = iris.data
y = iris.target

# Break each output into indicator cols
y_cat = pd.get_dummies(y).values

print(X.shape, y_cat.shape)

In [None]:
# Define a model
model = Sequential()

# First layer - input diminsions=k features.
model.add(Dense(4, activation='tanh', input_shape=(4,)))

# Output layer - output_dim=# of output per point (in y).
# Use 'softmax' for class probability. 'linear' for regression
model.add(Dense(3, activation='softmax', input_shape=(4,)))

# Uses Mean Squared Error and Stochastic Gradient Descent
model.compile(loss='mse', optimizer='SGD')

In [None]:
# Train the model
print('Training...')
loss = model.fit(X, y_cat, 
                 validation_split=0.1, epochs=100, 
                 batch_size=16, verbose=False)

print(loss.history['loss'][-1])   # displays MSE at last iteration
print("Training complete")

In [None]:
y

In [None]:
# Model evaluation
pred_y = model.predict(X, verbose=False)
preds  = np.argmax(model.predict(X), axis=-1)

print('ACCURACY: ', accuracy_score(y, preds))
print('CONFUSION MATRIX:\n', confusion_matrix(y, preds))

In [None]:
# Plot the error over time

plt.scatter(range(len(loss.history['loss'])), loss.history['loss'])
plt.xlabel('Epoch')
plt.ylabel('MSE')
plt.title('MSE by Epoch');

## Abalone data set

In [None]:
columns = ["Sex", "Length", "Diameter", "Height", "Whole Weight",
           "Shucked weight", "Viscera weight", "Shell weight", "Rings" ]
df = pd.read_csv("./data/abalone.data", names=columns)

df.head()

In [None]:
df.describe()

In [None]:
import seaborn as sns
sns.pairplot(data=df, vars=columns[1:4], hue="Sex")
plt.show()

In [None]:
d = {'M': 0, 'F': 1, 'I': 2}
df["Sex"] = df["Sex"].apply(lambda x: d[x])

In [None]:
df.describe()

In [None]:
X = np.array(df[columns[1:]])
y = np.array(df["Sex"])
y_cat = pd.get_dummies(y).values

print(X.shape, y_cat.shape)

In [None]:
# Define a model
model = Sequential()

# input_dim = number of neurons in previous layer.
# output_dim = number of neurons in current layer.

# First layer - input_dim=k features.
model.add(Dense(6, activation='tanh', input_shape=(8,)))

#Second, hidden layer
model.add(Dense(8, activation='tanh', input_shape=(6,)))

# Output layer - output_dim=# of output per point (in y).
# Use 'softmax' for class probability. 'linear' for regression
model.add(Dense(3, activation='softmax', input_shape=(8,)))

sgd = 
# Uses Mean Squared Error and Stochastic Gradient Descent
model.compile(loss='mse', optimizer='SGD')

In [None]:
# Train the model
print('Training...')
loss = model.fit(X, y_cat, 
                 validation_split=0.1, epochs=50, 
                 batch_size=16, verbose=False)

print(loss.history['loss'][-1])   # displays MSE at last iteration
print("Training complete")

In [None]:
# Model evaluation
pred_y = model.predict(X, verbose=False)
preds  = np.argmax(model.predict(X), axis=-1)

print('ACCURACY: ', accuracy_score(y, preds))
print('CONFUSION MATRIX:\n', confusion_matrix(y, preds))

# Plot the error over time

plt.scatter(range(len(loss.history['loss'])), loss.history['loss'])
plt.xlabel('Epoch')
plt.ylabel('MSE')
plt.title('MSE by Epoch')

## Exercise

Classify the following data ([source](https://archive.ics.uci.edu/ml/datasets/MAGIC+Gamma+Telescope)). You'll need to translate the classes into integers. Design a neural network to classify the data and evaluate the results.

In [None]:
names = "fLength fWidth fSize fConc fConc1 fAsym fM3Long fM3Trans fAlpha fDist class".split()
df = pd.read_csv("./data/magic04.data", names=names)
df.head()

In [None]:
df['class'].value_counts()

In [None]:
# Create a dictionary to change the class to an int


In [None]:
# Move create your X, y and y_cat datasets


In [None]:
# What's their shape?


In [None]:
# Define a model? Why not!
model = Sequential()

# How many input dimensions does X have?
# What are our output dimensions?
# Build our first layer

# Choose a value for the hidden layer

# Create the Output layer - how many output dimensions should you have?



In [None]:
# Uses Mean Squared Error and Stochastic Gradient Descent


In [None]:
# Train the model


In [None]:
# Model evaluation - maybe accuracy and confusion matrix?

# Plot the error over time



## Extra Practice 

What are better loss functions for classification problems? Can you make any of the above in this notebook better by looking at binary_crossentropy? What about optimizing with Adam or others? 