Notebook for learning deep learning methodolgies and algorithms that help in fulfilling them

In [1]:
# import libraries (commonly used libraries)
import numpy as np
import pandas as pd
from sklearn import datasets
import matplotlib.pyplot as plt
import seaborn as sns

Forward propagation in the neural networks

Dot products of the weights of each node transition to the node values will give the next node value

Carried out one data point at a time, each data point is carried out till the output layer

Prediction of the data point is given at the output layer

In [2]:
# forward propagation algorithm
input_data = np.array([2,3])
weights = {"node_0": np.array([1,1]),
            "node_1": np.array([-1,1]),
            "output": np.array([2,-1])}
node_0_value = (input_data * weights["node_0"]).sum()
node_1_value = (input_data * weights["node_1"]).sum()

In [3]:
hidden_layers_values = np.array([node_0_value, node_1_value])
print(hidden_layers_values)

[5 1]


In [4]:
output = (hidden_layers_values * weights["output"]).sum()
print(output)

9


Activation Function

Functions used to consider the non-linearity of the data points 

Applied to the hidden layer node values and is used on the node inputs caculated from the input layer and the weights applied.

In [5]:
input_data = np.array([-1,2])

weights = { "node_0": np.array([3,3]),
            "node_1": np.array([1,5]),
            "output": np.array([2,-1])}

node_0_input = (input_data * weights["node_0"]).sum()
node_0_output = np.tanh(node_0_input)

node_1_input = (input_data * weights["node_1"]).sum()
node_1_output = np.tanh(node_1_input)

hidden_layers_output = np.array([node_0_output, node_1_output])
output = (hidden_layers_output * weights["output"]).sum()

print(output)

0.9901095378334199


Rectified Linear Activation Function (ReLU)

- Shown to lead to very high-performance netowrks
- Takes a single input and gives an output of 0 for negative values and 1 for positive value or zero 

In [6]:
#defining the ReLU fucntion

def relu(input):
    output = max(0,input)
    return output

In [7]:
# model application using ReLU
node_0_input = (input_data * weights["node_0"]).sum()
node_0_output = relu(node_0_input)

node_1_input = (input_data * weights["node_1"]).sum()
node_1_output = relu(node_1_input)

hidden_layer_outputs = np.array([node_0_output, node_1_output])

model_output = (hidden_layer_outputs * weights["output"]).sum()

print(model_output)

-3


In [8]:
# applying the network to multiple rows of data
def predict_with_network(input_data_row, weights):

    node_0_input = (input_data_row * weights["node_0"]).sum()
    node_0_output = relu(node_0_input)

    node_1_input = (input_data_row * weights["node_1"]).sum()
    node_1_output = relu(node_1_input)

    hidden_layer_outputs = np.array([node_0_output, node_1_output])

    input_to_final_layer = (hidden_layer_outputs * weights["output"]).sum()
    model_output = relu(input_to_final_layer)

    return (model_output)

In [9]:
results = []
for input_data_row in input_data:
    results.append(predict_with_network(input_data_row, weights))

print(results)

[0, 12]


Multi-layer neural networks

- forward propagation on multi-layer networks
- each layer has two nodes  

In [10]:
weights = { "node_0_0": np.array([3,3]),
            "node_0_1": np.array([1,5]),
            "node_1_0": np.array([-1,4]),
            "node_1_1": np.array([6,-2]),
            "output": np.array([2,-1])}

def predict_with_network(input_data, weights):

    # first hidden layer
    node_0_0_input = (input_data * weights["node_0_0"]).sum()
    node_0_0_output = relu(node_0_0_input)

    node_0_1_input = (input_data * weights["node_0_1"]).sum()
    node_0_1_ouput = relu(node_0_1_input)

    hidden_0_outputs = np.array([node_0_0_output, node_0_1_ouput])

    # second hidden layer
    node_1_0_input = (hidden_0_outputs * weights["node_1_0"]).sum()
    node_1_0_output = relu(node_1_0_input)

    node_1_1_input = (hidden_0_outputs * weights["node_1_1"]).sum()
    node_1_1_output = relu(node_1_1_input)

    hidden_1_outputs = np.array([node_1_0_output, node_1_1_output])

    model_output = (hidden_1_outputs * weights["output"]).sum()

    return (model_output)

In [11]:
print(predict_with_network(input_data, weights))

66


Optimizing weight changes

- changing the weights of the nodes will results in chnges to the loss fucntion as well as the prediction capability of the model
- accuracy can be alter with varying the weights

In [12]:
def predict_with_network_single(input_data_row, weights):

    node_0_input = (input_data_row * weights["node_0"]).sum()
    node_0_output = relu(node_0_input)

    node_1_input = (input_data_row * weights["node_1"]).sum()
    node_1_output = relu(node_1_input)

    hidden_layer_outputs = np.array([node_0_output, node_1_output])

    input_to_final_layer = (hidden_layer_outputs * weights["output"]).sum()
    model_output = relu(input_to_final_layer)

    return (model_output)

In [13]:
input_data = np.array([0,3])

weights_0 = {"node_0": [2,1],
                "node_1": [1,2],
                "output": [1,1]}

target_value = 3

model_output_0 = predict_with_network_single(input_data, weights_0)

error_0 = model_output_0 - target_value

# changing the weights to predict better solutions
weights_1 = {"node_0": [2,1],
                "node_1": [1,2],
                "output": [1,0]}

model_output_1 = predict_with_network_single(input_data, weights_1)

error_1 = model_output_1 - target_value

print(error_0)
print(error_1)

6
0


Optimizing weight changes for multi-layer networks (multiple data point)

In [14]:
from sklearn.metrics import mean_squared_error

input_data = np.array([np.array([0,3]), np.array([1,3]), np.array([3,2])])

target_actuals = np.array([7,8,5])

# Create model_output_0 
model_output_0 = []
# Create model_output_1
model_output_1 = []

# Loop over input_data
for row in input_data:
    # Append prediction to model_output_0
    model_output_0.append(predict_with_network_single(row, weights_0))
    
    # Append prediction to model_output_1
    model_output_1.append(predict_with_network_single(row, weights_1))

# Calculate the mean squared error for model_output_0: mse_0
mse_0 = mean_squared_error(target_actuals, model_output_0)

# Calculate the mean squared error for model_output_1: mse_1
mse_1 = mean_squared_error(target_actuals, model_output_1)

# Print mse_0 and mse_1
print("Mean squared error with weights_0: %f" %mse_0)
print("Mean squared error with weights_1: %f" %mse_1)

Mean squared error with weights_0: 40.000000
Mean squared error with weights_1: 11.333333


During optimization the weughts are the values that are being changed and used for getting better predictions

- Slopes of the loss func wrt to value  node we feed into
- value of the node that feeds into weight
- slope of the activation fucntion wrt value we feed into  

In [15]:
# calculate slopes and update the weights
weights = np.array([1,2])
input_data = np.array([3,4])
target = 6
learning_rate = 0.01
preds =(weights * input_data).sum()
error = preds - target
print(error)

5


In [16]:
gradient = 2 * input_data * error
weights_updated = weights - (learning_rate * gradient)
preds_updated = (weights_updated * input_data).sum()

error_updated = preds_updated - target
print(error_updated)

2.5


Back Propagation
- start at a random set of weights
- use forward propagation to make prediction
- use backward propagation to calculate the slope of the loss function wrt each weight
- multiply that slope by the learning rate and subtract from the current weightds
- keep going with that cycel until we get to flat part

Stochastic Gradient Descent : Slopes are calculated on one batch at a time
- common to calculate the slopes on a subset of data (batch)
- use a different batch of data to calculate the next update
- start over from beginning once all the data is used
- each ruun through the training data is an epoch

Creating a Keras Model
* Model building Step
    * Specify Architecture
    * Compile
    * Fit
    * Predict    

In [None]:
#import neural network models
from keras.layers import Dense
from keras.models import Sequential

iris = datasets.load_iris()

X = iris.data
y = iris.target

df = pd.DataFrame(X, columns= iris.feature_names)

n_cols = df.shape[1]

model = Sequential()
model.add(Dense(100, activation= "relu", input_shape = (n_cols,))) # number of layers from the observations in data
model.add(Dense(100, activation= "relu"))
model.add(Dense(1)) # last layer which is output layer

model.compile(optimizer= "adam", loss= "mean_squared_error")
model.fit(X, y)

Categorical data - Classification is the preferred method of analyzing data

In [None]:
print(iris.target_names)

In [None]:
iris_df = sns.load_dataset("iris")

iris_df.head()

In [None]:
from keras.utils.np_utils import to_categorical
from keras.optimizer_v1 import Adam
from keras.callbacks import EarlyStopping

early_stopping_monitor = EarlyStopping(patience = 2)

predictors = iris.data
target = to_categorical(iris.target)

n_cols = len(iris.feature_names)

model_1 = Sequential()
model_1.add(Dense(100, activation = "relu", input_shape = (n_cols,)))
model_1.add(Dense(100, activation = "relu"))
model_1.add(Dense(100, activation = "relu"))
model_1.add(Dense(3, activation = "softmax"))

model_1.compile(optimizer= "sgd", loss= "categorical_crossentropy", metrics=["accuracy"])
model_1_training = model_1.fit(predictors, target, validation_split = 0.3, epochs = 10, callbacks = [early_stopping_monitor])
model_1_training

In [None]:
model_2 = Sequential()
model_2.add(Dense(50, activation = "relu", input_shape = (n_cols,)))
model_2.add(Dense(50, activation = "relu"))
model_2.add(Dense(50, activation = "relu"))
model_2.add(Dense(3, activation = "softmax"))

model_2.compile(optimizer= "adam", loss= "categorical_crossentropy", metrics=["accuracy"])
model_2_training = model_2.fit(predictors, target, validation_split = 0.3, epochs = 10, callbacks = [early_stopping_monitor])
model_2_training

In [None]:
# Create the plot
plt.plot(model_1_training.history['val_loss'], 'r', model_2_training.history['val_loss'], 'b')
plt.xlabel('Epochs')
plt.ylabel('Validation score')
plt.show()

Testing classsification model on Titanic dataset

In [None]:
# write code and figure by self

Once a model is created
- save model
- reload model
- predict using the model

In [None]:
# from keras.models import load_model

# model.save("model_file.h5")
# my_model = load_model("model_file.h5")

# predctions = my_model.predict(data_to_predict_with)
# probability_true = predictions[:,1]
#my_model.summary()

Stochastic Gradient Descent (in practice)

In [None]:
# def get_new_model():
#     model = Sequential()
#     model.add(Dense(100, activation = "relu", input_shape = (n_cols,)))
#     model.add(Dense(100, activation = "relu"))
#     model.add(Dense(2, activation = "softmax"))

#     return(model)

# lr_to_test = [.000001, 0.01, 1]

# # loop over the learning rates specified
# for lr in lr_to_test:
#     model = get_new_model()
#     my_optimizer = SGD(lr = lr)
#     model.compile(optimizer= my_optimizer, loss = "categorical_crossentropy")
#     model.fit(predictors, target)
