In [34]:
import keras 
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split 
from keras.preprocessing import sequence
from keras.models import load_model
import numpy as np



## Forward propagation code

In [35]:
input_data = np.array([2,3]) #input 1 is 2, input 2 is 3
weights ={'node_0': np.array([1,-1]),
          'node_1': np.array([-1,1]),
          'output': np.array([2,-1])
          }
node_0_value = (input_data * weights['node_0']).sum()
node_1_value = (input_data * weights['node_1']).sum()

hiddd_layer_value = np.array([node_0_value,node_1_value])


In [36]:
hiddd_layer_value

In [37]:
output = (hiddd_layer_value*weights['output']).sum()
output


-3

## Coding the forward propagation algorithm

Each data point is a customer. The first input is how many accounts they have, and the second input is how many children they have. The model will predict how many transactions the user makes in the next year. You will use this data throughout the first 2 chapters of this course.

The input data has been pre-loaded as input_data, and the weights are available in a dictionary called weights. The array of weights for the first node in the hidden layer are in weights['node_0'], and the array of weights for the second node in the hidden layer are in weights['node_1'].

The weights feeding into the output node are available in weights['output'].

NumPy will be pre-imported for you as np in all exercises.


In [38]:
input_data = np.array([3,5])
# Calculate node 0 value: node_0_value
node_0_value = (input_data * weights['node_0']).sum()

# Calculate node 1 value: node_1_value
node_1_value = (input_data*weights['node_1']).sum()

# Put node values into array: hidden_layer_outputs
hidden_layer_outputs = np.array([node_0_value, node_1_value])

# Calculate output: output
output = (hidden_layer_outputs*weights['output']).sum()

# Print output
print(output)


-6


## The Rectified Linear Activation Function
As Dan explained to you in the video, an "activation function" is a function applied at each node. It converts the node's input into some output.

The rectified linear activation function (called ReLU) has been shown to lead to very high-performance networks. This function takes a single number as an input, returning 0 if the input is negative, and the input if the input is positive.

Here are some examples:
relu(3) = 3 
relu(-3) = 0 

In [39]:

def relu(input):
    '''Define your relu activation function here'''
    # Calculate the value for the output of the relu function: output
    output = max(0,input)
    
    # Return the value just calculated
    return(output)

# Calculate node 0 value: node_0_output
node_0_input = (input_data * weights['node_0']).sum()
node_0_output = relu(node_0_input)

# Calculate node 1 value: node_1_output
node_1_input = (input_data * weights['node_1']).sum()
node_1_output = relu(node_1_input)

# Put node values into array: hidden_layer_outputs
hidden_layer_outputs = np.array([node_0_output, node_1_output])

# Calculate model output (do not apply relu)
model_output = (hidden_layer_outputs * weights['output']).sum()

# Print model output
print(model_output)

-2


## Applying the network to many observations/rows of data
You'll now define a function called predict_with_network() which will generate predictions for multiple data observations, which are pre-loaded as input_data. As before, weights are also pre-loaded. In addition, the relu() function you defined in the previous exercise has been pre-loaded.

In [40]:
input_data = np.array([[3,5],[1,-1],[0,0],[8,4]])
input_data


array([[ 3,  5],
       [ 1, -1],
       [ 0,  0],
       [ 8,  4]])

In [41]:

# Define predict_with_network()
def predict_with_network(input_data_row, weights):

    # Calculate node 0 value
    node_0_input = (input_data_row*weights['node_0']).sum()
    node_0_output = relu(node_0_input)

    # Calculate node 1 value
    node_1_input = (input_data_row*weights['node_1']).sum()
    node_1_output = relu(node_1_input)

    # Put node values into array: hidden_layer_outputs
    hidden_layer_outputs = np.array([node_0_output, node_1_output])
    
    # Calculate model output
    input_to_final_layer = (hidden_layer_outputs*weights['output']).sum()
    model_output =relu(input_to_final_layer)
    
    # Return model output
    return(model_output)


# Create empty list to store prediction results
results = []
for input_data_row in input_data:
    # Append prediction to results
    results.append(predict_with_network(input_data_row,weights))

# Print results
print(results)
        


[0, 4, 0, 8]


## Multi-layer neural networks

Multi-layer neural networks
In this exercise, you'll write code to do forward propagation for a neural network with 2 hidden layers. Each hidden layer has two nodes. The input data has been preloaded as input_data. The nodes in the first hidden layer are called node_0_0 and node_0_1. Their weights are pre-loaded as weights['node_0_0'] and weights['node_0_1'] respectively.

The nodes in the second hidden layer are called node_1_0 and node_1_1. Their weights are pre-loaded as weights['node_1_0'] and weights['node_1_1'] respectively.

We then create a model output from the hidden nodes using weights pre-loaded as weights['output'].

In [42]:
weights ={'node_0_0': np.array([2,4]),
          'node_0_1': np.array([4,-5]),
          'node_1_0': np.array([-1,2]),
          'node_1_1': np.array([1,2]),
          'output': np.array([2,7])
          }


input_data = np.array([3,5])


In [43]:
def predict_with_network(input_data):
    # Calculate node 0 in the first hidden layer
    node_0_0_input = (input_data * weights['node_0_0']).sum()
    node_0_0_output = relu(node_0_0_input)

    # Calculate node 1 in the first hidden layer
    node_0_1_input = (input_data * weights['node_0_1']).sum()
    node_0_1_output = relu(node_0_1_input)

    # Put node values into array: hidden_0_outputs
    hidden_0_outputs = np.array([node_0_0_output, node_0_1_output])
    
    # Calculate node 0 in the second hidden layer
    node_1_0_input = (hidden_0_outputs*weights['node_1_0']).sum()
    node_1_0_output = relu(node_1_0_input)

    # Calculate node 1 in the second hidden layer
    node_1_1_input = (hidden_0_outputs*weights['node_1_1']).sum()
    node_1_1_output = relu(node_1_1_input)

    # Put node values into array: hidden_1_outputs
    hidden_1_outputs = np.array([node_1_0_output, node_1_1_output])

    # Calculate model output: model_output
    model_output = (hidden_1_outputs*weights['output']).sum()
    
    # Return model_output
    return(model_output)

output = predict_with_network(input_data)
print(output)


182


## Coding how weight changes affect accuracy
Now you'll get to change weights in a real network and see how they affect model accuracy!

Have a look at the following neural network: Ch2Ex4

Its weights have been pre-loaded as weights_0. Your task in this exercise is to update a single weight in weights_0 to create weights_1, which gives a perfect prediction (in which the predicted value is equal to target_actual: 3).

Use a pen and paper if necessary to experiment with different combinations. You'll use the predict_with_network() function, which takes an array of data as the first argument, and weights as the second argument.

In [44]:
def predict_with_network(input_data_row, weights):

    # Calculate node 0 value
    node_0_input = (input_data_row*weights['node_0']).sum()
    node_0_output = relu(node_0_input)

    # Calculate node 1 value
    node_1_input = (input_data_row*weights['node_1']).sum()
    node_1_output = relu(node_1_input)

    # Put node values into array: hidden_layer_outputs
    hidden_layer_outputs = np.array([node_0_output, node_1_output])
    
    # Calculate model output
    input_to_final_layer = (hidden_layer_outputs*weights['output']).sum()
    model_output =relu(input_to_final_layer)
    
    # Return model output
    return(model_output)

In [45]:
# The data point you will make a prediction for
input_data = np.array([0, 3])

# Sample weights
weights_0 = {'node_0': [2, 1],
             'node_1': [1, 2],
             'output': [1, 1]
            }

# The actual target value, used to calculate the error
target_actual = 3

# Make prediction using original weights
model_output_0 = predict_with_network(input_data, weights_0)

# Calculate error: error_0
error_0 = model_output_0 - target_actual

# Create weights that cause the network to make perfect prediction (3): weights_1
weights_1 = {'node_0': [2, 1],
             'node_1': [1, 2],
             'output': [1, 0]
            }

# Make prediction using new weights: model_output_1
#
model_output_1 = predict_with_network(input_data, weights_1)

# Calculate error: error_1
error_1 = model_output_1 - target_actual

# Print error_0 and error_1
print(error_0)
print(error_1)


## Scaling up to multiple data points

You've seen how different weights will have different accuracies on a single prediction. But usually, you'll want to measure model accuracy on many points. You'll now write code to compare model accuracies for two different sets of weights, which have been stored as weights_0 and weights_1.

input_data is a list of arrays. Each item in that list contains the data to make a single prediction. target_actuals is a list of numbers. Each item in that list is the actual value we are trying to predict.

In this exercise, you'll use the mean_squared_error() function from sklearn.metrics. It takes the true values and the predicted values as arguments.

You'll also use the preloaded predict_with_network() function, which takes an array of data as the first argument, and weights as the second argument.

In [46]:
# The data point you will make a prediction for
input_data = np.array(([0, 3],[1,2],[-1,-2],[4,0]))

In [47]:
weights_0 = {'node_0': [2, 1],
             'node_1': [1, 2],
             'output': [1, 1]
            }

weights_1 = {'node_0': [2, 1],
             'node_1': [1, 1.5],
             'output': [1, 1.5]
            }
#target_actuals = np.array([1,3,5,7])
target_actuals = ([1,3,5,7])



In [48]:
from sklearn.metrics import mean_squared_error

# Create model_output_0 
model_output_0 = []
# Create model_output_0
model_output_1 = []

# Loop over input_data
for row in input_data:
    # Append prediction to model_output_0
    model_output_0.append(predict_with_network(row,weights_0))
    
    # Append prediction to model_output_1
    model_output_1.append(predict_with_network(row,weights_1))

# Calculate the mean squared error for model_output_0: mse_0
mse_0 = mean_squared_error(target_actuals,model_output_0)

# Calculate the mean squared error for model_output_1: mse_1
mse_1 = mean_squared_error(target_actuals,model_output_1)

# Print mse_0 and mse_1
print("Mean squared error with weights_0: %f" %mse_0)
print("Mean squared error with weights_1: %f" %mse_1)


Mean squared error with weights_0: 37.500000
Mean squared error with weights_1: 49.890625


## Calculating slopes
You're now going to practice calculating slopes. When plotting the mean-squared error loss function against predictions, the slope is 2 * x * (y-xb), or 2 * input_data * error. Note that x and b may have multiple numbers (x is a vector for each data point, and b is a vector). In this case, the output will also be a vector, which is exactly what you want.

You're ready to write the code to calculate this slope while using a single data point. You'll use pre-defined weights called weights as well as data for a single point called input_data. The actual value of the target you want to predict is stored in target.

In [49]:
# The data point you will make a prediction for
input_data = np.array([1,2,3])

weights = np.array([0,2,1])
target = 0

In [50]:
# Calculate the predictions: preds
preds = (weights * input_data).sum()

# Calculate the error: error
error = preds- target

# Calculate the slope: slope
slope = 2 * input_data * error

# Print the slope
print(slope)

[14 28 42]


## Improving model weights

Hurray! You've just calculated the slopes you need. Now it's time to use those slopes to improve your model. If you add the slopes to your weights, you will move in the right direction. However, it's possible to move too far in that direction. So you will want to take a small step in that direction first, using a lower learning rate, and verify that the model is improving.

The weights have been pre-loaded as weights, the actual value of the target as target, and the input data as input_data. The predictions from the initial weights are stored as preds.

In [51]:
# Set the learning rate: learning_rate
learning_rate = 0.01

# Calculate the predictions: preds
preds = (weights * input_data).sum()

# Calculate the error: error
error = preds - target

# Calculate the slope: slope
slope = 2 * input_data * error

# Update the weights: weights_updated
weights_updated = weights - learning_rate * slope

# Get updated predictions: preds_updated
preds_updated = (weights_updated * input_data).sum()

# Calculate updated error: error_updated
error_updated = preds_updated - target

# Print the original error
print(error)

# Print the updated error
print(error_updated)


7
5.04


## Making multiple updates to weights
You're now going to make multiple updates so you can dramatically improve your model weights, and see how the predictions improve with each update.

To keep your code clean, there is a pre-loaded get_slope() function that takes input_data, target, and weights as arguments. There is also a get_mse() function that takes the same arguments. The input_data, target, and weights have been pre-loaded.

This network does not have any hidden layers, and it goes directly from the input (with 3 nodes) to an output node. Note that weights is a single array.

We have also pre-loaded matplotlib.pyplot, and the error history will be plotted after you have done your gradient descent steps.

In [52]:
n_updates = 20
mse_hist = []

# Iterate over the number of updates
for i in range(n_updates):
    # Calculate the slope: slope
    slope = get_slope(input_data,target,weights)
    
    # Update the weights: weights
    weights = weights - 0.01 * slope
    
    # Calculate mse with new weights: mse
    mse = get_mse(input_data,target,weights)
    
    # Append the mse to mse_hist
    mse_hist.append(mse)

# Plot the mse history
plt.plot(mse_hist)
plt.xlabel('Iterations')
plt.ylabel('Mean Squared Error')
plt.show()

NameError: name 'get_slope' is not defined

# Keras steps
1. Specific Architechure
2. Compiler 
  - Specific optimizer , control learning rate (Adam, usually a good one, 
  - loss function , mean_squre error for regression)
3. Fit
4. Predict


# Specifying a model

Specifying a model
Now you'll get to work with your first model in Keras, and will immediately be able to run more complex neural network models on larger datasets compared to the first two chapters.

To start, you'll take the skeleton of a neural network and add a hidden layer and an output layer. You'll then fit that model and see Keras do the optimization so your model continually gets better.

As a start, you'll predict workers wages based on characteristics like their industry, education and level of experience. You can find the dataset in a pandas dataframe called df. For convenience, everything in df except for the target has been converted to a NumPy matrix called predictors. The target, wage_per_hour, is available as a NumPy matrix called target.

For all exercises in this chapter, we've imported the Sequential model constructor, the Dense layer constructor, and pandas.

In [53]:
# Import necessary modules
import keras
from keras.layers import Dense
from keras.models import Sequential
import pandas as pd

predictors = pd.read_csv('K:\TensorflowPY36CPU\TensorflowPY36CPU\_2_Deeplearning\hourly_wages.csv',delimiter=',')
target = np.array(predictors.wage_per_hour)


In [54]:
target



array([  5.1 ,   4.95,   6.67,   4.  ,   7.5 ,  13.07,   4.45,  19.47,
        13.28,   8.75,  11.35,  11.5 ,   6.5 ,   6.25,  19.98,   7.3 ,
         8.  ,  22.2 ,   3.65,  20.55,   5.71,   7.  ,   3.75,   4.5 ,
         9.56,   5.75,   9.36,   6.5 ,   3.35,   4.75,   8.9 ,   4.  ,
         4.7 ,   5.  ,   9.25,  10.67,   7.61,  10.  ,   7.5 ,  12.2 ,
         3.35,  11.  ,  12.  ,   4.85,   4.3 ,   6.  ,  15.  ,   4.85,
         9.  ,   6.36,   9.15,  11.  ,   4.5 ,   4.8 ,   4.  ,   5.5 ,
         8.4 ,   6.75,  10.  ,   5.  ,   6.5 ,  10.75,   7.  ,  11.43,
         4.  ,   9.  ,  13.  ,  12.22,   6.28,   6.75,   3.35,  16.  ,
         5.25,   3.5 ,   4.22,   3.  ,   4.  ,  10.  ,   5.  ,  16.  ,
        13.98,  13.26,   6.1 ,   3.75,   9.  ,   9.45,   5.5 ,   8.93,
         6.25,   9.75,   6.73,   7.78,   2.85,   3.35,  19.98,   8.5 ,
         9.75,  15.  ,   8.  ,  11.25,  14.  ,  10.  ,   6.5 ,   9.83,
        18.5 ,  12.5 ,  26.  ,  14.  ,  10.5 ,  11.  ,  12.47,  12.5 ,
      

In [55]:
predictors.head(10)



Unnamed: 0,wage_per_hour,union,education_yrs,experience_yrs,age,female,marr,south,manufacturing,construction
0,5.1,0,8,21,35,1,1,0,1,0
1,4.95,0,9,42,57,1,1,0,1,0
2,6.67,0,12,1,19,0,0,0,1,0
3,4.0,0,12,4,22,0,0,0,0,0
4,7.5,0,12,17,35,0,1,0,0,0
5,13.07,1,13,9,28,0,0,0,0,0
6,4.45,0,10,27,43,0,0,1,0,0
7,19.47,0,12,9,27,0,0,0,0,0
8,13.28,0,16,11,33,0,1,0,1,0
9,8.75,0,12,9,27,0,0,0,0,0


In [56]:
n_cols = predictors.shape[1]
n_cols


10

In [57]:

# Save the number of columns in predictors: n_cols
n_cols = predictors.shape[1]

# Set up the model: model
model = Sequential()

# Add the first layer
model.add(Dense(50, activation='relu', input_shape =(n_cols,))) # (n_cols,) tuple

# Add the second layer
model.add(Dense(32, activation='relu'))

# Add the output layer
model.add(Dense(1))


In [58]:
# Compile the model
model.compile(optimizer='adam',loss='mean_squared_error')

# Verify that model contains information from compiling
print("Loss function: " + model.loss)

Loss function: mean_squared_error


In [59]:
type(target)
target[:10]

array([  5.1 ,   4.95,   6.67,   4.  ,   7.5 ,  13.07,   4.45,  19.47,
        13.28,   8.75])

In [60]:
# Fit the model
model.fit(predictors,target)


Epoch 1/1


 32/534 [>.............................] - ETA: 16s - loss: 102.3850



<keras.callbacks.History at 0x1d015d5cf98>

## Last steps in classification models
You'll now create a classification model using the titanic dataset, which has been pre-loaded into a DataFrame called df. You'll take information about the passengers and predict which ones survived.

The predictive variables are stored in a NumPy array predictors. The target to predict is in df.survived, though you'll have to manipulate it for keras. The number of predictive features is stored in n_cols.

Here, you'll use the 'sgd' optimizer, which stands for Stochastic Gradient Descent. You'll learn more about this in the next chapter!

In [61]:
# Import necessary modules
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import to_categorical

In [62]:
df = pd.read_csv('K:\\TensorflowPY36CPU\\TensorflowPY36CPU\\_2_Deeplearning\\titanic.csv',delimiter=',')

n_cols = df.shape[1]


In [63]:
print(df.head())
print(df.describe())


   survived  pclass   age  sibsp  parch     fare  male  age_was_missing  \
0         0       3  22.0      1      0   7.2500     1            False   
1         1       1  38.0      1      0  71.2833     0            False   
2         1       3  26.0      0      0   7.9250     0            False   
3         1       1  35.0      1      0  53.1000     0            False   
4         0       3  35.0      0      0   8.0500     1            False   

   embarked_from_cherbourg  embarked_from_queenstown  \
0                        0                         0   
1                        1                         0   
2                        0                         0   
3                        0                         0   
4                        0                         0   

   embarked_from_southampton  
0                          1  
1                          0  
2                          1  
3                          1  
4                          1  


         survived      pclass         age       sibsp       parch        fare  \
count  891.000000  891.000000  891.000000  891.000000  891.000000  891.000000   
mean     0.383838    2.308642   29.699118    0.523008    0.381594   32.204208   
std      0.486592    0.836071   13.002015    1.102743    0.806057   49.693429   
min      0.000000    1.000000    0.420000    0.000000    0.000000    0.000000   
25%      0.000000    2.000000   22.000000    0.000000    0.000000    7.910400   
50%      0.000000    3.000000   29.699118    0.000000    0.000000   14.454200   
75%      1.000000    3.000000   35.000000    1.000000    0.000000   31.000000   
max      1.000000    3.000000   80.000000    8.000000    6.000000  512.329200   

             male  embarked_from_cherbourg  embarked_from_queenstown  \
count  891.000000               891.000000                891.000000   
mean     0.647587                 0.188552                  0.086420   
std      0.477990                 0.391372            

## Last steps in classification models
You'll now create a classification model using the titanic dataset, which has been pre-loaded into a DataFrame called df. You'll take information about the passengers and predict which ones survived.

The predictive variables are stored in a NumPy array predictors. The target to predict is in df.survived, though you'll have to manipulate it for keras. The number of predictive features is stored in n_cols.

Here, you'll use the 'sgd' optimizer, which stands for Stochastic Gradient Descent. You'll learn more about this in the next chapter!

In [79]:
#init
# Import necessary modules
import keras
from keras.layers import Dense
from keras.models import Sequential , load_model
from keras.utils import to_categorical
import pandas as pd
df = pd.read_csv('K:\\TensorflowPY36CPU\\TensorflowPY36CPU\\_2_Deeplearning\\titanic.csv',delimiter=',')
predictors = df.drop(['survived'],axis=1).as_matrix()




In [80]:
df.shape


(891, 11)

In [81]:
predictors.shape



(891, 10)

In [82]:


# Convert the target to categorical: target
target = to_categorical(df.survived)
target.shape


(891, 2)

In [83]:
print(df.head())



   survived  pclass   age  sibsp  parch     fare  male  age_was_missing  \
0         0       3  22.0      1      0   7.2500     1            False   
1         1       1  38.0      1      0  71.2833     0            False   
2         1       3  26.0      0      0   7.9250     0            False   
3         1       1  35.0      1      0  53.1000     0            False   
4         0       3  35.0      0      0   8.0500     1            False   

   embarked_from_cherbourg  embarked_from_queenstown  \
0                        0                         0   
1                        1                         0   
2                        0                         0   
3                        0                         0   
4                        0                         0   

   embarked_from_southampton  
0                          1  
1                          0  
2                          1  
3                          1  
4                          1  


In [84]:
n_cols = 10  # The number of predictive features is stored in n_cols.
n_cols


10

In [85]:




# Set up the model
model = Sequential()

# Add the first layer
#Add a Dense layer with 32 nodes. Use 'relu' as the activation and (n_cols,) as the input_shape.
model.add(Dense(32,activation='relu',input_shape=(n_cols,))) 


# Add the output layer
#Add the Dense output layer. Because there are two outcomes, it should have 2 units, and because it is a classification model, the activation should be 'softmax'.
model.add(Dense(2,activation='softmax'))

# Compile the model
# Compile the model, using 'sgd' as the optimizer, 'categorical_crossentropy' as the loss function, and metrics=['accuracy'] to see the accuracy (what fraction of predictions were correct) at the end of each epoch.
model.compile(optimizer='sgd',loss='categorical_crossentropy',metrics=['accuracy'])

# Fit the model
model.fit(predictors,target)


Epoch 1/1


 32/891 [>.............................] - ETA: 14s - loss: 10.1179 - acc: 0.3125



<keras.callbacks.History at 0x1d017783470>

## Making predictions

The trained network from your previous coding exercise is now stored as model. New data to make predictions is stored in a NumPy array as pred_data. Use model to make predictions on your new data.

In this exercise, your predictions will be probabilities, which is the most common way for data scientists to communicate their predictions to colleagues.


In [95]:
model.save('K:\TensorflowPY36CPU\TensorflowPY36CPU\_2_Deeplearning\Titanic.h5')
titan_model=load_model('K:\TensorflowPY36CPU\TensorflowPY36CPU\_2_Deeplearning\Titanic.h5')

pred_data= np.array(
    [[2, 34.0, 0, 0, 13.0, 1, False, 0, 0, 1],
       [2, 31.0, 1, 1, 26.25, 0, False, 0, 0, 1],
       [1, 11.0, 1, 2, 120.0, 1, False, 0, 0, 1],
       [3, 0.42, 0, 1, 8.5167, 1, False, 1, 0, 0],
       [3, 27.0, 0, 0, 6.975, 1, False, 0, 0, 1],
       [3, 31.0, 0, 0, 7.775, 1, False, 0, 0, 1],
       [1, 39.0, 0, 0, 0.0, 1, False, 0, 0, 1],
       [3, 18.0, 0, 0, 7.775, 0, False, 0, 0, 1],
       [2, 39.0, 0, 0, 13.0, 1, False, 0, 0, 1],
       [1, 33.0, 1, 0, 53.1, 0, False, 0, 0, 1],
       [3, 26.0, 0, 0, 7.8875, 1, False, 0, 0, 1],
       [3, 39.0, 0, 0, 24.15, 1, False, 0, 0, 1],
       [2, 35.0, 0, 0, 10.5, 1, False, 0, 0, 1],
       [3, 6.0, 4, 2, 31.275, 0, False, 0, 0, 1],
       [3, 30.5, 0, 0, 8.05, 1, False, 0, 0, 1],
       [1, 29.69911764705882, 0, 0, 0.0, 1, True, 0, 0, 1],
       [3, 23.0, 0, 0, 7.925, 0, False, 0, 0, 1],
       [2, 31.0, 1, 1, 37.0042, 1, False, 1, 0, 0],
       [3, 43.0, 0, 0, 6.45, 1, False, 0, 0, 1],
       [3, 10.0, 3, 2, 27.9, 1, False, 0, 0, 1],
       [1, 52.0, 1, 1, 93.5, 0, False, 0, 0, 1],
       [3, 27.0, 0, 0, 8.6625, 1, False, 0, 0, 1],
       [1, 38.0, 0, 0, 0.0, 1, False, 0, 0, 1],
       [3, 27.0, 0, 1, 12.475, 0, False, 0, 0, 1],
       [3, 2.0, 4, 1, 39.6875, 1, False, 0, 0, 1],
       [3, 29.69911764705882, 0, 0, 6.95, 1, True, 0, 1, 0],
       [3, 29.69911764705882, 0, 0, 56.4958, 1, True, 0, 0, 1],
       [2, 1.0, 0, 2, 37.0042, 1, False, 1, 0, 0],
       [3, 29.69911764705882, 0, 0, 7.75, 1, True, 0, 1, 0],
       [1, 62.0, 0, 0, 80.0, 0, False, 0, 0, 0],
       [3, 15.0, 1, 0, 14.4542, 0, False, 1, 0, 0],
       [2, 0.83, 1, 1, 18.75, 1, False, 0, 0, 1],
       [3, 29.69911764705882, 0, 0, 7.2292, 1, True, 1, 0, 0],
       [3, 23.0, 0, 0, 7.8542, 1, False, 0, 0, 1],
       [3, 18.0, 0, 0, 8.3, 1, False, 0, 0, 1],
       [1, 39.0, 1, 1, 83.1583, 0, False, 1, 0, 0],
       [3, 21.0, 0, 0, 8.6625, 1, False, 0, 0, 1],
       [3, 29.69911764705882, 0, 0, 8.05, 1, True, 0, 0, 1],
       [3, 32.0, 0, 0, 56.4958, 1, False, 0, 0, 1],
       [1, 29.69911764705882, 0, 0, 29.7, 1, True, 1, 0, 0],
       [3, 20.0, 0, 0, 7.925, 1, False, 0, 0, 1],
       [2, 16.0, 0, 0, 10.5, 1, False, 0, 0, 1],
       [1, 30.0, 0, 0, 31.0, 0, False, 1, 0, 0],
       [3, 34.5, 0, 0, 6.4375, 1, False, 1, 0, 0],
       [3, 17.0, 0, 0, 8.6625, 1, False, 0, 0, 1],
       [3, 42.0, 0, 0, 7.55, 1, False, 0, 0, 1],
       [3, 29.69911764705882, 8, 2, 69.55, 1, True, 0, 0, 1],
       [3, 35.0, 0, 0, 7.8958, 1, False, 1, 0, 0],
       [2, 28.0, 0, 1, 33.0, 1, False, 0, 0, 1],
       [1, 29.69911764705882, 1, 0, 89.1042, 0, True, 1, 0, 0],
       [3, 4.0, 4, 2, 31.275, 1, False, 0, 0, 1],
       [3, 74.0, 0, 0, 7.775, 1, False, 0, 0, 1],
       [3, 9.0, 1, 1, 15.2458, 0, False, 1, 0, 0],
       [1, 16.0, 0, 1, 39.4, 0, False, 0, 0, 1],
       [2, 44.0, 1, 0, 26.0, 0, False, 0, 0, 1],
       [3, 18.0, 0, 1, 9.35, 0, False, 0, 0, 1],
       [1, 45.0, 1, 1, 164.8667, 0, False, 0, 0, 1],
       [1, 51.0, 0, 0, 26.55, 1, False, 0, 0, 1],
       [3, 24.0, 0, 3, 19.2583, 0, False, 1, 0, 0],
       [3, 29.69911764705882, 0, 0, 7.2292, 1, True, 1, 0, 0],
       [3, 41.0, 2, 0, 14.1083, 1, False, 0, 0, 1],
       [2, 21.0, 1, 0, 11.5, 1, False, 0, 0, 1],
       [1, 48.0, 0, 0, 25.9292, 0, False, 0, 0, 1],
       [3, 29.69911764705882, 8, 2, 69.55, 0, True, 0, 0, 1],
       [2, 24.0, 0, 0, 13.0, 1, False, 0, 0, 1],
       [2, 42.0, 0, 0, 13.0, 0, False, 0, 0, 1],
       [2, 27.0, 1, 0, 13.8583, 0, False, 1, 0, 0],
       [1, 31.0, 0, 0, 50.4958, 1, False, 0, 0, 1],
       [3, 29.69911764705882, 0, 0, 9.5, 1, True, 0, 0, 1],
       [3, 4.0, 1, 1, 11.1333, 1, False, 0, 0, 1],
       [3, 26.0, 0, 0, 7.8958, 1, False, 0, 0, 1],
       [1, 47.0, 1, 1, 52.5542, 0, False, 0, 0, 1],
       [1, 33.0, 0, 0, 5.0, 1, False, 0, 0, 1],
       [3, 47.0, 0, 0, 9.0, 1, False, 0, 0, 1],
       [2, 28.0, 1, 0, 24.0, 0, False, 1, 0, 0],
       [3, 15.0, 0, 0, 7.225, 0, False, 1, 0, 0],
       [3, 20.0, 0, 0, 9.8458, 1, False, 0, 0, 1],
       [3, 19.0, 0, 0, 7.8958, 1, False, 0, 0, 1],
       [3, 29.69911764705882, 0, 0, 7.8958, 1, True, 0, 0, 1],
       [1, 56.0, 0, 1, 83.1583, 0, False, 1, 0, 0],
       [2, 25.0, 0, 1, 26.0, 0, False, 0, 0, 1],
       [3, 33.0, 0, 0, 7.8958, 1, False, 0, 0, 1],
       [3, 22.0, 0, 0, 10.5167, 0, False, 0, 0, 1],
       [2, 28.0, 0, 0, 10.5, 1, False, 0, 0, 1],
       [3, 25.0, 0, 0, 7.05, 1, False, 0, 0, 1],
       [3, 39.0, 0, 5, 29.125, 0, False, 0, 1, 0],
       [2, 27.0, 0, 0, 13.0, 1, False, 0, 0, 1],
       [1, 19.0, 0, 0, 30.0, 0, False, 0, 0, 1],
       [3, 29.69911764705882, 1, 2, 23.45, 0, True, 0, 0, 1],
       [1, 26.0, 0, 0, 30.0, 1, False, 1, 0, 0],
       [3, 32.0, 0, 0, 7.75, 1, False, 0, 1, 0]]
)


[ 0.09198101  0.32260731  0.99931157  0.58266932  0.06006553  0.04318747
  0.0833628   0.22405207  0.05056877  0.82905     0.08078083  0.16054912
  0.05026579  0.83664685  0.04849864  0.11155797  0.13542552  0.50397301
  0.01363104  0.78146994  0.96424729  0.08325163  0.08717314  0.17678529
  0.92668939  0.06395701  0.86047322  0.90594512  0.07468493  0.68021309
  0.38834885  0.76520926  0.0591516   0.11374842  0.21135701  0.9666732
  0.16416609  0.05986293  0.84390837  0.39609674  0.16076311  0.32493207
  0.39476705  0.03040359  0.24784547  0.01471492  0.95824993  0.0331192
  0.45317701  0.98325771  0.87209624  0.00177931  0.54455024  0.84237975
  0.13404256  0.25902024  0.99975294  0.09114476  0.344767    0.0591516
  0.04234095  0.23913395  0.11267009  0.9552933   0.22706304  0.04173505
  0.18199866  0.82431698  0.07929122  0.57983536  0.0809093   0.41543192
  0.04962541  0.01075136  0.33294332  0.27046025  0.22239631  0.17798303
  0.05808108  0.89761537  0.38244522  0.03455493  0.22

In [92]:
print(pred_data.shape)
type(pred_data)


[ 0.09198101  0.32260731  0.99931157  0.58266932  0.06006553  0.04318747
  0.0833628   0.22405207  0.05056877  0.82905     0.08078083  0.16054912
  0.05026579  0.83664685  0.04849864  0.11155797  0.13542552  0.50397301
  0.01363104  0.78146994  0.96424729  0.08325163  0.08717314  0.17678529
  0.92668939  0.06395701  0.86047322  0.90594512  0.07468493  0.68021309
  0.38834885  0.76520926  0.0591516   0.11374842  0.21135701  0.9666732
  0.16416609  0.05986293  0.84390837  0.39609674  0.16076311  0.32493207
  0.39476705  0.03040359  0.24784547  0.01471492  0.95824993  0.0331192
  0.45317701  0.98325771  0.87209624  0.00177931  0.54455024  0.84237975
  0.13404256  0.25902024  0.99975294  0.09114476  0.344767    0.0591516
  0.04234095  0.23913395  0.11267009  0.9552933   0.22706304  0.04173505
  0.18199866  0.82431698  0.07929122  0.57983536  0.0809093   0.41543192
  0.04962541  0.01075136  0.33294332  0.27046025  0.22239631  0.17798303
  0.05808108  0.89761537  0.38244522  0.03455493  0.22

In [97]:

# Calculate predictions: predictions
predictions = model.predict(pred_data)

# Calculate predicted probability of survival: predicted_prob_true
predicted_prob_true = predictions[:,1]

# print predicted_prob_true
print(predicted_prob_true)


[ 0.09198101  0.32260731  0.99931157  0.58266932  0.06006553  0.04318747
  0.0833628   0.22405207  0.05056877  0.82905     0.08078083  0.16054912
  0.05026579  0.83664685  0.04849864  0.11155797  0.13542552  0.50397301
  0.01363104  0.78146994  0.96424729  0.08325163  0.08717314  0.17678529
  0.92668939  0.06395701  0.86047322  0.90594512  0.07468493  0.68021309
  0.38834885  0.76520926  0.0591516   0.11374842  0.21135701  0.9666732
  0.16416609  0.05986293  0.84390837  0.39609674  0.16076311  0.32493207
  0.39476705  0.03040359  0.24784547  0.01471492  0.95824993  0.0331192
  0.45317701  0.98325771  0.87209624  0.00177931  0.54455024  0.84237975
  0.13404256  0.25902024  0.99975294  0.09114476  0.344767    0.0591516
  0.04234095  0.23913395  0.11267009  0.9552933   0.22706304  0.04173505
  0.18199866  0.82431698  0.07929122  0.57983536  0.0809093   0.41543192
  0.04962541  0.01075136  0.33294332  0.27046025  0.22239631  0.17798303
  0.05808108  0.89761537  0.38244522  0.03455493  0.22

In [98]:
predictions.shape


(91, 2)

In [100]:
predictions


array([[  9.08018947e-01,   9.19810086e-02],
       [  6.77392662e-01,   3.22607309e-01],
       [  6.88473228e-04,   9.99311566e-01],
       [  4.17330682e-01,   5.82669318e-01],
       [  9.39934492e-01,   6.00655265e-02],
       [  9.56812561e-01,   4.31874730e-02],
       [  9.16637182e-01,   8.33628029e-02],
       [  7.75947928e-01,   2.24052072e-01],
       [  9.49431181e-01,   5.05687743e-02],
       [  1.70949996e-01,   8.29050004e-01],
       [  9.19219136e-01,   8.07808265e-02],
       [  8.39450955e-01,   1.60549119e-01],
       [  9.49734211e-01,   5.02657853e-02],
       [  1.63353115e-01,   8.36646855e-01],
       [  9.51501310e-01,   4.84986417e-02],
       [  8.88442099e-01,   1.11557968e-01],
       [  8.64574432e-01,   1.35425523e-01],
       [  4.96026993e-01,   5.03973007e-01],
       [  9.86368895e-01,   1.36310374e-02],
       [  2.18530029e-01,   7.81469941e-01],
       [  3.57527062e-02,   9.64247286e-01],
       [  9.16748405e-01,   8.32516328e-02],
       [  

## Changing optimization parameters

It's time to get your hands dirty with optimization. You'll now try optimizing a model at a very low learning rate, a very high learning rate, and a "just right" learning rate. You'll want to look at the results after running this exercise, remembering that a low value for the loss function is good.

For these exercises, we've pre-loaded the predictors and target values from your previous classification models (predicting who would survive on the Titanic). You'll want the optimization to start from scratch every time you change the learning rate, to give a fair comparison of how each learning rate did in your results. So we have created a function get_new_model() that creates an unoptimized model to optimize.

In [103]:
# Import the SGD optimizer
from keras.optimizers import SGD
# Create list of learning rates: lr_to_test
lr_to_test = [.000001, 0.01, 1]

# Loop over learning rates
for lr in lr_to_test:
    print('\n\nTesting model with learning rate: %f\n'%lr )
    
    # Build new model to test, unaffected by previous models
    model = get_new_model()
    
    # Create SGD optimizer with specified learning rate: my_optimizer
    my_optimizer = SGD(lr=lr)
    
    # Compile the model
    model.compile(optimizer=my_optimizer,loss='categorical_crossentropy')
    
    # Fit the model
    model.fit(predictors,target)
    



Testing model with learning rate: 0.000001



NameError: name 'get_new_model' is not defined

## Evaluating model accuracy on validation dataset
Now it's your turn to monitor model accuracy with a validation data set. A model definition has been provided as model. Your job is to add the code to compile it and then fit it. You'll check the validation score in each epoch.



In [106]:
# Save the number of columns in predictors: n_cols
n_cols = predictors.shape[1]
input_shape = (n_cols,)

# Specify the model
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = input_shape))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Fit the model
hist = model.fit(predictors,target,validation_split=0.3)


Train on 623 samples, validate on 268 samples
Epoch 1/1


 32/623 [>.............................] - ETA: 13s - loss: 3.4766 - acc: 0.4688



# Early stopping: Optimizing the optimization

Now that you know how to monitor your model performance throughout optimization, you can use early stopping to stop optimization when it isn't helping any more. Since the optimization stops automatically when it isn't helping, you can also set a high value for epochs in your call to .fit(), as Dan showed in the video.

The model you'll optimize has been specified as model. As before, the data is pre-loaded as predictors and target.



In [110]:
# Import EarlyStopping
from keras.callbacks import EarlyStopping

# Save the number of columns in predictors: n_cols
n_cols = predictors.shape[1]
input_shape = (n_cols,)

# Specify the model
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = input_shape))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))

# Compile the model
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

# Define early_stopping_monitor
early_stopping_monitor = EarlyStopping(patience=2)


# Fit the model
#Fit the model using the predictors and target. Specify the number of epochs to be 30 and use a validation split of 0.3. In addition, pass [early_stopping_monitor] to the callbacks parameter.
model.fit(predictors,target,epochs=30,validation_split=0.3,callbacks=[early_stopping_monitor])



Train on 623 samples, validate on 268 samples
Epoch 1/30


 32/623 [>.............................] - ETA: 22s - loss: 2.2248 - acc: 0.6562



Epoch 2/30
 32/623 [>.............................] - ETA: 0s - loss: 0.9497 - acc: 0.5312



Epoch 3/30
 32/623 [>.............................] - ETA: 0s - loss: 0.6323 - acc: 0.6562



Epoch 4/30
 32/623 [>.............................] - ETA: 0s - loss: 0.7332 - acc: 0.6562



Epoch 5/30
 32/623 [>.............................] - ETA: 0s - loss: 0.7275 - acc: 0.5312



Epoch 6/30
 32/623 [>.............................] - ETA: 0s - loss: 0.5011 - acc: 0.7500



Epoch 7/30
 32/623 [>.............................] - ETA: 0s - loss: 0.5447 - acc: 0.6875



<keras.callbacks.History at 0x1d01d71a080>

## Experimenting with wider networks

Now you know everything you need to begin experimenting with different models!

A model called model_1 has been pre-loaded. You can see a summary of this model printed in the IPython Shell. This is a relatively small network, with only 10 units in each hidden layer.

In this exercise you'll create a new model called model_2 which is similar to model_1, except it has 100 units in each hidden layer.

After you create model_2, both models will be fitted, and a graph showing both models loss score at each epoch will be shown. We added the argument verbose=False in the fitting commands to print out fewer updates, since you will look at these graphically instead of as text.

Because you are fitting two models, it will take a moment to see the outputs after you hit run, so be patient.

In [113]:
import matplotlib.pyplot as plt
# Define early_stopping_monitor
early_stopping_monitor = EarlyStopping(patience=2)

# Create the new model: model_2
#Create model_2 to replicate model_1, but use 100 nodes instead of 10 for the first two Dense layers you add with the 'relu' activation. Use 2 nodes for the Dense output layer with 'softmax' as the activation.

model_1 = Sequential()

# Add the first and second layers
model_1.add(Dense(10,activation='relu', input_shape=input_shape))
model_1.add(Dense(10,activation='relu'))

# Add the output layer
model_1.add(Dense(2,activation='softmax'))

# Compile model_2
model_1.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

model_2 = Sequential()

# Add the first and second layers
model_2.add(Dense(100,activation='relu', input_shape=input_shape))
model_2.add(Dense(100,activation='relu'))

# Add the output layer
model_2.add(Dense(2,activation='softmax'))

# Compile model_2
model_2.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

# Fit model_1
model_1_training = model_1.fit(predictors, target, epochs=15, validation_split=0.2, callbacks=[early_stopping_monitor], verbose=False)

# Fit model_2
model_2_training = model_2.fit(predictors, target, epochs=15, validation_split=0.2, callbacks=[early_stopping_monitor], verbose=False)


# Create the plot
plt.plot(model_1_training.history['val_loss'], 'r', model_2_training.history['val_loss'], 'b')
plt.xlabel('Epochs')
plt.ylabel('Validation score')
plt.show()


## Adding layers to a network

You've seen how to experiment with wider networks. In this exercise, you'll try a deeper network (more hidden layers).

Once again, you have a baseline model called model_1 as a starting point. It has 1 hidden layer, with 50 units. You can see a summary of that model's structure printed out. You will create a similar network with 3 hidden layers (still keeping 50 units in each layer).

This will again take a moment to fit both models, so you'll need to wait a few seconds to see the results after you run your code.

In [115]:
# The input shape to use in the first hidden layer
input_shape = (n_cols,)

# Create the new model: model_2
model_2 = Sequential()

# Add the first, second, and third hidden layers
# #Specify a model called model_2 that is like model_1, but which has 3 hidden layers of 50 units instead of only 1 hidden layer.
# Use input_shape to specify the input shape in the first hidden layer.
# Use 'relu' activation for the 3 hidden layers and 'softmax' for the output layer, which should have 2 units.
model_2.add(Dense(50,activation='relu',input_shape=input_shape))
model_2.add(Dense(50,activation='relu'))
model_2.add(Dense(50,activation='relu'))


# Add the output layer
model_2.add(Dense(2,activation='softmax'))

# Compile model_2
#Compile model_2 as you have done with previous models: Using 'adam' as the optimizer, 'categorical_crossentropy' for the loss, and metrics=['accuracy'].
model_2.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

# Fit model 1
model_1_training = model_1.fit(predictors, target, epochs=20, validation_split=0.4, callbacks=[early_stopping_monitor], verbose=False)

# Fit model 2
model_2_training = model_2.fit(predictors, target, epochs=20, validation_split=0.4, callbacks=[early_stopping_monitor], verbose=False)

# Create the plot
plt.plot(model_1_training.history['val_loss'], 'r', model_2_training.history['val_loss'], 'b')
plt.xlabel('Epochs')
plt.ylabel('Validation score')
plt.show()


## Building your own digit recognition model
You've reached the final exercise of the course - you now know everything you need to build an accurate model to recognize handwritten digits!

We've already done the basic manipulation of the MNIST dataset shown in the video, so you have X and y loaded and ready to model with. Sequential and Dense from keras are also pre-imported.

To add an extra challenge, we've loaded only 2500 images, rather than 60000 which you will see in some published results. Deep learning models perform better with more data, however, they also take longer to train, especially when they start becoming more complex.

If you have a computer with a CUDA compatible GPU, you can take advantage of it to improve computation time. If you don't have a GPU, no problem! You can set up a deep learning environment in the cloud that can run your models on a GPU. Here is a blog post by Dan that explains how to do this - check it out after completing this exercise! It is a great next step as you continue your deep learning journey.


In [117]:
# Create the model: model
model = Sequential()

# Add the first hidden layer
model.add(Dense(50, activation='relu',input_shape= (784,)))

# Add the second hidden layer
model.add(Dense(50,activation='relu'))

# Add the output layer
model.add(Dense(10,activation='softmax' ))

# Compile the model
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

# Fit the model
model.fit(X,y,validation_split=0.3 )


NameError: name '____' is not defined