## Introduction to TensorFlow


In [None]:
# Defining data as constants

# Import constant from TensorFlow
from tensorflow import constant

# Convert the credit_numpy array into a tensorflow constant
credit_constant = constant(credit_numpy)

# Print constant datatype
print('\n The datatype is:', credit_constant.dtype)

# Print constant shape
print('\n The shape is:', credit_constant.shape)

"""
The datatype is: <dtype: 'float64'>
The shape is: (30000, 4)
"""

In [None]:
# Defining variables

# Define the 1-dimensional variable A1
A1 = Variable([1, 2, 3, 4]) # tf.Variable

# Print the variable A1
print('\n A1: ', A1)

# Convert A1 to a numpy array and assign it to B1
B1 = A1.numpy()

# Print B1
print('\n B1: ', B1)

"""
A1:  <tf.Variable 'Variable:0' shape=(4,) dtype=int32, numpy=array([1, 2, 3, 4], dtype=int32)>
    
B1:  [1 2 3 4]
"""

In [None]:
# Performing element-wise multiplication

# Define tensors A1 and A23 as constants
A1 = constant([1, 2, 3, 4])
A23 = constant([[1, 2, 3], [1, 6, 4]])

# Define B1 and B23 to have the correct shape
B1 = ones_like(A1)
B23 = ones_like(A23)

# Perform element-wise multiplication
C1 = multiply(A1, B1)
C23 = multiply(A23, B23)

# Print the tensors C1 and C23
print('\n C1: {}'.format(C1.numpy()))
print('\n C23: {}'.format(C23.numpy()))

"""
C1: [1 2 3 4]
    
     C23: [[1 2 3]
     [1 6 4]]
"""

In [None]:
# Making predictions with matrix multiplication

# Define features, params, and bill as constants
features = constant([[2, 24], [2, 26], [2, 57], [1, 37]])
params = constant([[1000], [150]])
bill = constant([[3913], [2682], [8617], [64400]])

# Compute billpred using features and params
billpred = matmul(features, params)

# Compute and print the error
error = bill - billpred
print(error.numpy())

"""
[[-1687]
     [-3218]
     [-1933]
     [57850]]
"""

In [None]:
# Reshaping tensors

# Reshape the grayscale image tensor into a vector
gray_vector = reshape(gray_tensor, (784, 1))

# Reshape the color image tensor into a vector
color_vector = reshape(color_tensor, (2352, 1)) # color tensor has 3 color channels

In [None]:
# Optimizing with gradients

def compute_gradient(x0):
  	# Define x as a variable with an initial value of x0
	x = Variable(x0)
	with GradientTape() as tape:
		tape.watch(x)
        # Define y using the multiply operation
		y = multiply(x, x)
    # Return the gradient of y with respect to x
	return tape.gradient(y, x).numpy()

# Compute and print gradients at x = -1, 1, and 0
print(compute_gradient(-1.0))
print(compute_gradient(1.0))
print(compute_gradient(0.0))

"""-2.0
    2.0
    0.0

Notice that the slope is positive at x = 1, which means that we can lower the loss by reducing x. 
The slope is negative at x = -1, which means that we can lower the loss by increasing x. 
The slope at x = 0 is 0, which means that we cannot lower the loss by either increasing or decreasing x. 
This is because the loss is minimized at x = 0.
"""

In [None]:
# Working with image data

# Reshape model from a 1x3 to a 3x1 tensor
model = reshape(model, (3, 1))

# Multiply letter by model
output = matmul(letter, model)

# Sum over output and print prediction using the numpy method
prediction = reduce_sum(output)
print(prediction.numpy())

"""
Your model found that prediction=1.0 and correctly classified the letter as a K. 
In the coming chapters, you will use data to train a model, model, and then combine this with matrix multiplication, 
matmul(letter, model), as we have done here, to make predictions about the classes of objects.
"""

## Linear models


In [None]:
# Load data using pandas

# Import pandas under the alias pd
import pandas as pd

# Assign the path to a string variable named data_path
data_path = 'kc_house_data.csv'

# Load the dataset as a dataframe named housing
housing = pd.read_csv(data_path)

# Print the price column of housing
print(housing['price'])

In [None]:
# Setting the data type

# Import numpy and tensorflow with their standard aliases
import numpy as np
import tensorflow as tf

# Use a numpy array to define price as a 32-bit float
price = np.array(housing['price'], np.float32)

# Define waterfront as a Boolean using cast
waterfront = tf.cast(housing['waterfront'], tf.bool)

# Print price and waterfront
print(price)
print(waterfront)

"""
[221900. 538000. 180000. ... 402101. 400000. 325000.]
tf.Tensor([False False False ... False False False], shape=(21613,), dtype=bool)

Notice that printing price yielded a numpy array; whereas printing waterfront yielded a tf.Tensor().
"""

In [None]:
# Loss functions in TensorFlow

# Import the keras module from tensorflow
from tensorflow import keras

# Compute the mean squared error (mse)
loss = keras.losses.mse(price, predictions)

# Print the mean squared error (mse)
print(loss.numpy())

"""141171604777.12717"""

############################################################################

# Import the keras module from tensorflow
from tensorflow import keras

# Compute the mean absolute error (mae)
loss = keras.losses.mae(price, predictions)

# Print the mean absolute error (mae)
print(loss.numpy())

"""268827.99302088"""

"""
You may have noticed that the MAE was much smaller than the MSE, even though price and predictions were the same. 
This is because the different loss functions penalize deviations of predictions from price differently.
MSE does not like large deviations and punishes them harshly.
"""

In [None]:
# Modifying the loss function

# Initialize a variable named scalar
scalar = Variable(1.0, float32)

# Define the model
def model(scalar, features = features):
  	return scalar * features

# Define a loss function
def loss_function(scalar, features = features, targets = targets):
	# Compute the predicted values
	predictions = model(scalar, features)
    
	# Return the mean absolute error loss
	return keras.losses.mae(targets, predictions)

# Evaluate the loss function and print the loss
print(loss_function(scalar).numpy())

"""
3.0
As you will see in the following lessons, this exercise was the equivalent of evaluating the loss function for 
a linear regression where the intercept is 0.
"""

In [None]:
# Set up a linear regression

# Define a linear regression model
def linear_regression(intercept, slope, features = size_log):
	return intercept + features*slope

# Set loss_function() to take the variables as arguments
def loss_function(intercept, slope, features = size_log, targets = price_log):
	# Set the predicted values
	predictions = linear_regression(intercept, slope, features)
    
    # Return the mean squared error loss
	return keras.losses.mse(targets, predictions)

# Compute the loss for different slope and intercept values
print(loss_function(0.1, 0.1).numpy())
print(loss_function(0.1, 0.5).numpy())

In [None]:
# Train a linear model

# Initialize an adam optimizer
opt = keras.optimizers.Adam(0.5)

for j in range(100):
	# Apply minimize, pass the loss function, and supply the variables
	opt.minimize(lambda: loss_function(intercept, slope), var_list=[intercept, slope])

	# Print every 10th value of the loss
	if j % 10 == 0:
		print(loss_function(intercept, slope).numpy())

# Plot data and regression line
plot_results(intercept, slope)

"""
9.669482
11.726698
1.1193314
1.6605737
0.7982884
0.8017316
0.6106565
0.59997976
0.5811015
0.5576158

Notice that we printed loss_function(intercept, slope) every 10th execution for 100 executions. 
Each time, the loss got closer to the minimum as the optimizer moved the slope and intercept parameters 
closer to their optimal values.
"""

In [None]:
# Multiple linear regression

# Define the linear regression model
def linear_regression(params, feature1 = size_log, feature2 = bedrooms):
	return params[0] + feature1*params[1] + feature2*params[2]

# Define the loss function
def loss_function(params, targets = price_log, feature1 = size_log, feature2 = bedrooms):
	# Set the predicted values
	predictions = linear_regression(params, feature1, feature2)
  
	# Use the mean absolute error loss
	return keras.losses.mae(targets, predictions)

# Define the optimize operation
opt = keras.optimizers.Adam()

# Perform minimization and print trainable variables
for j in range(10):
	opt.minimize(lambda: loss_function(params), var_list=[params])
	print_results(params)
    
"""
loss: 12.418, intercept: 0.101, slope_1: 0.051, slope_2: 0.021
loss: 12.404, intercept: 0.102, slope_1: 0.052, slope_2: 0.022
loss: 12.391, intercept: 0.103, slope_1: 0.053, slope_2: 0.023
loss: 12.377, intercept: 0.104, slope_1: 0.054, slope_2: 0.024
loss: 12.364, intercept: 0.105, slope_1: 0.055, slope_2: 0.025
loss: 12.351, intercept: 0.106, slope_1: 0.056, slope_2: 0.026
loss: 12.337, intercept: 0.107, slope_1: 0.057, slope_2: 0.027
loss: 12.324, intercept: 0.108, slope_1: 0.058, slope_2: 0.028
loss: 12.311, intercept: 0.109, slope_1: 0.059, slope_2: 0.029
loss: 12.297, intercept: 0.110, slope_1: 0.060, slope_2: 0.030
"""

In [None]:
# Preparing to batch train

# Define the intercept and slope
intercept = Variable(10, float32)
slope = Variable(0.5, float32)

# Define the model
def linear_regression(intercept, slope, features):
	# Define the predicted values
	return intercept + slope*features

# Define the loss function
def loss_function(intercept, slope, targets, features):
	# Define the predicted values
	predictions = linear_regression(intercept, slope, features)
    
 	# Define the MSE loss
	return keras.losses.mse(targets, predictions)

In [None]:
# Training a linear model in batches

# Initialize adam optimizer
opt = keras.optimizers.Adam()

# Load data in batches
for batch in pd.read_csv('kc_house_data.csv', chunksize=100):
	size_batch = np.array(batch['sqft_lot'], np.float32)

	# Extract the price values for the current batch
	price_batch = np.array(batch['price'], np.float32)

	# Complete the loss, fill in the variable list, and minimize
	opt.minimize(lambda: loss_function(intercept, slope, price_batch, size_batch), var_list=[intercept, slope])

# Print trained parameters
print(intercept.numpy(), slope.numpy())

"""    10.217888 0.7016
"""

## Neural Networks
In this chapter, you will apply those same tools to build, train, and make predictions with neural networks. You will learn how to define dense layers, apply activation functions, select an optimizer, and apply regularization to reduce overfitting. You will take advantage of TensorFlow's flexibility by using both low-level linear algebra and high-level Keras API operations to define and train models.

In [None]:
# The linear algebra of dense layers

"""There are two ways to define a dense layer in tensorflow. The first involves the use of low-level, 
linear algebraic operations. The second makes use of high-level keras operations. 
In this exercise, we will use the first method to construct the network shown in the image below.
https://assets.datacamp.com/production/repositories/3953/datasets/23d6f91f73eb1363c4fd67c83720ca3c84ce20a1/3_2_1_network2.png

The input layer contains 3 features -- education, marital status, and age -- which are available as borrower_features. 
The hidden layer contains 2 nodes and the output layer contains a single node.
"""
# Initialize bias1
bias1 = Variable(1.0)

# Initialize weights1 as 3x2 variable of ones
weights1 = Variable(ones((3, 2)))

# Perform matrix multiplication of borrower_features and weights1
product1 = matmul(borrower_features, weights1)

# Apply sigmoid activation function to product1 + bias1
dense1 = keras.activations.sigmoid(product1 + bias1)

# Print shape of dense1
print("\n dense1's output shape: {}".format(dense1.shape))

"""dense1's output shape: (1, 2)"""

############################################################################

# From previous step
bias1 = Variable(1.0)
weights1 = Variable(ones((3, 2)))
product1 = matmul(borrower_features, weights1)
dense1 = keras.activations.sigmoid(product1 + bias1)

# Initialize bias2 and weights2
bias2 = Variable(1.0)
weights2 = Variable(ones((2, 1)))

# Perform matrix multiplication of dense1 and weights2
product2 = matmul(dense1, weights2)

# Apply activation to product2 + bias2 and print the prediction
prediction = keras.activations.sigmoid(product2 + bias2)
print('\n prediction: {}'.format(prediction.numpy()[0,0]))
print('\n actual: 1')

"""prediction: 0.9525741338729858
    
     actual: 1"""

"""Our model produces predicted values in the interval between 0 and 1. For the example we considered, 
the actual value was 1 and the predicted value was a probability between 0 and 1. This, of course, is not meaningful,
since we have not yet trained our model's parameters."""

In [None]:
# The low-level approach with multiple examples

# Compute the product of borrower_features and weights1
products1 = matmul(borrower_features, weights1)

# Apply a sigmoid activation function to products1 + bias1
dense1 = keras.activations.sigmoid(products1 + bias1)

# Print the shapes of borrower_features, weights1, bias1, and dense1
print('\n shape of borrower_features: ', borrower_features.shape)
print('\n shape of weights1: ', weights1.shape)
print('\n shape of bias1: ', bias1.shape)
print('\n shape of dense1: ', dense1.shape)

"""
shape of borrower_features:  (5, 3)
    
shape of weights1:  (3, 2)

shape of bias1:  (1,)

shape of dense1:  (5, 2)
     
Note that our input data, borrower_features, is 5x3 because it consists of 5 examples for 3 features. 
The shape of weights1 is 3x2, as it was in the previous exercise, since it does not depend on the number of examples. 
Additionally, bias1 is a scalar. Finally, dense1 is 5x2, which means that we can multiply it by the following set of weights,
weights2, which we defined to be 2x1 in the previous exercise.   
"""

In [None]:
# Using the dense layer operation

"""
We've now seen how to define dense layers in tensorflow using linear algebra. In this exercise, 
we'll skip the linear algebra and let keras work out the details. This will allow us to construct the network below,
which has 2 hidden layers and 10 features, using less code than we needed for the network with 1 hidden layer and 3 features.
https://assets.datacamp.com/production/repositories/3953/datasets/eb2fda20a023befc69b53ff5bd278c2eee73dac8/10_7_3_1_network.png
"""
# Define the first dense layer
dense1 = keras.layers.Dense(7, activation='sigmoid')(borrower_features)

# Define a dense layer with 3 output nodes
dense2 = keras.layers.Dense(3, activation='sigmoid')(dense1)

# Define a dense layer with 1 output node
predictions = keras.layers.Dense(1, activation='sigmoid')(dense2)

# Print the shapes of dense1, dense2, and predictions
print('\n shape of dense1: ', dense1.shape)
print('\n shape of dense2: ', dense2.shape)
print('\n shape of predictions: ', predictions.shape)

"""
shape of dense1:  (100, 7)
    
shape of dense2:  (100, 3)

shape of predictions:  (100, 1)

Note that each layer has 100 rows because the input data contains 100 examples.
"""

In [None]:
# Binary classification problems

# Construct input layer from features
inputs = constant(bill_amounts, float32)

# Define first dense layer
dense1 = keras.layers.Dense(3, activation='relu')(inputs)

# Define second dense layer
dense2 = keras.layers.Dense(2, activation='relu')(dense1)

# Define output layer
outputs = keras.layers.Dense(1, activation='sigmoid')(dense2)

# Print error for first five examples
error = default[:5] - outputs.numpy()[:5]
print(error)

"""
[[-1.]
[-1.]
[-1.]
[-1.]
[-1.]]

If you run the code several times, you'll notice that the errors change each time. 
This is because you're using an untrained model with randomly initialized parameters. 
Furthermore, the errors fall on the interval between -1 and 1 because default is a binary variable 
that takes on values of 0 and 1 and outputs is a probability between 0 and 1.
"""

In [1]:
# Multiclass classification problems

# Construct input layer from borrower features
inputs = constant(borrower_features, float32)

# Define first dense layer
dense1 = keras.layers.Dense(10, activation='sigmoid')(inputs)

# Define second dense layer
dense2 = keras.layers.Dense(8, activation='relu')(dense1)

# Define output layer: softmax=appropriate activation func.
outputs = keras.layers.Dense(6, activation='softmax')(dense2)

# Print first five predictions
print(outputs.numpy()[:5])

"""
[[0.119128   0.21407694 0.21403559 0.16607246 0.14679684 0.13989018]
     [0.09451684 0.1801574  0.22960506 0.20694265 0.11374877 0.17502937]
     [0.119128   0.21407694 0.21403559 0.16607246 0.14679684 0.13989018]
     [0.09906525 0.16128646 0.1742211  0.22180668 0.12069351 0.22292699]
     [0.119128   0.21407694 0.21403559 0.16607246 0.14679684 0.13989018]]
     
Notice that each row of outputs sums to one. This is because a row contains the predicted class 
probabilities for one example. As with the previous exercise, our predictions are not yet informative, 
since we are using an untrained model with randomly initialized parameters. 
This is why the model tends to assign similar probabilities to each class.
"""

In [None]:
# The dangers of local minima

# Initialize x_1 and x_2
x_1 = Variable(6.0,float32)
x_2 = Variable(0.3,float32)

# Define the optimization operation
opt = keras.optimizers.SGD(learning_rate=0.01)

for j in range(100):
	# Perform minimization using the loss function and x_1
	opt.minimize(lambda: loss_function(x_1), var_list=[x_1])
	# Perform minimization using the loss function and x_2
	opt.minimize(lambda: loss_function(x_2), var_list=[x_2])

# Print x_1 and x_2 as numpy arrays
print(x_1.numpy(), x_2.numpy())

"""
4.3801394 0.42052683

Notice that we used the same optimizer and loss function, but two different initial values. 
When we started at 6.0 with x_1, we found the global minimum at 4.38, marked by the dot on the right. 
When we started at 0.3, we stopped around 0.42 with x_2, the local minimum marked by a dot on the far left.
"""

In [None]:
# Avoiding local minima

"""
The previous problem showed how easy it is to get stuck in local minima. We had a simple optimization problem in 
one variable and gradient descent still failed to deliver the global minimum when we had to travel through local minima 
first. One way to avoid this problem is to use momentum, which allows the optimizer to break through local minima. 
We will again use the loss function from the previous problem, which has been defined and is available 
for you as loss_function().
https://assets.datacamp.com/production/repositories/3953/datasets/42876c85cba5c14941a3fac191eff75b41597112/local_minima_dots_4_10.png
"""

# Initialize x_1 and x_2
x_1 = Variable(0.05,float32)
x_2 = Variable(0.05,float32)

# Define the optimization operation for opt_1 and opt_2
opt_1 = keras.optimizers.RMSprop(learning_rate=0.01, momentum=0.99)
opt_2 = keras.optimizers.RMSprop(learning_rate=0.01, momentum=0.00)

for j in range(100):
	opt_1.minimize(lambda: loss_function(x_1), var_list=[x_1])
    # Define the minimization operation for opt_2
	opt_2.minimize(lambda: loss_function(x_2), var_list=[x_2])

# Print x_1 and x_2 as numpy arrays
print(x_1.numpy(), x_2.numpy())

"""
    4.3150263 0.4205261
Recall that the global minimum is approximately 4.38. Notice that opt_1 built momentum, bringing x_1 closer to
the global minimum. To the contrary, opt_2, which had a momentum parameter of 0.0, got stuck in the local minimum on the left.
"""

In [None]:
# Initialization in TensorFlow

# Define the layer 1 weights
w1 = Variable(random.normal([23, 7]))

# Initialize the layer 1 bias
b1 = Variable(ones([7]))

# Define the layer 2 weights
w2 = Variable(random.normal([7, 1]))

# Define the layer 2 bias
b2 = Variable(0)

In [None]:
# Defining the model and loss function

"""
predictions layer is defined as sigmoid(layer1*w2+b2)
"""

# Define the model
def model(w1, b1, w2, b2, features = borrower_features):
	# Apply relu activation functions to layer 1
	layer1 = keras.activations.relu(matmul(features, w1) + b1)
    # Apply dropout
	dropout = keras.layers.Dropout(0.25)(layer1)
	return keras.activations.sigmoid(matmul(dropout, w2) + b2)

# Define the loss function
def loss_function(w1, b1, w2, b2, features = borrower_features, targets = default):
	predictions = model(w1, b1, w2, b2)
	# Pass targets and predictions to the cross entropy loss
	return keras.losses.binary_crossentropy(targets, predictions)

In [None]:
# Training neural networks with TensorFlow

# Train the model
for j in range(100):
    # Complete the optimizer
	opt.minimize(lambda: loss_function(w1, b1, w2, b2), 
                 var_list=[w1, b1, w2, b2])

# Make predictions with model
model_predictions = model(w1, b1, w2, b2, test_features)

# Construct the confusion matrix
confusion_matrix(test_targets, model_predictions)

"""Output --> Confusion Matrix"""

## High Level APIs
In the final chapter, you'll use high-level APIs in TensorFlow 2 to train a sign language letter classifier. You will use both the sequential and functional Keras APIs to train, validate, make predictions with, and evaluate models. You will also learn how to use the Estimators API to streamline the model definition and training process, and to avoid errors.

In [None]:
# The sequential model in Keras

# Define a Keras sequential model
from tensorflow import keras

model = keras.Sequential()

# Define the first dense layer
model.add(keras.layers.Dense(16, activation='relu', input_shape=(784,)))

# Define the second dense layer
model.add(keras.layers.Dense(8, activation='relu', input_shape=(784,)))

# Define the output layer
model.add(keras.layers.Dense(4, activation='softmax'))

# Print the model architecture
print(model.summary())

"""
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 16)                12560     
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 136       
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 36        
=================================================================
Total params: 12,732
Trainable params: 12,732
Non-trainable params: 0
_________________________________________________________________
None
"""

"""
Notice that we've defined a model, but we haven't compiled it. The compilation step in keras allows us to set the optimizer, 
loss function, and other useful training parameters in a single line of code. 
Furthermore, the .summary() method allows us to view the model's architecture.
"""

In [None]:
# Compiling a sequential model

# Define the first dense layer
model.add(keras.layers.Dense(16, activation='sigmoid', input_shape=(784,)))

# Apply dropout to the first layer's output
model.add(keras.layers.Dropout(0.25))

# Define the output layer
model.add(keras.layers.Dense(4, activation='softmax'))

# Compile the model
# Compile the model using an adam optimizer and categorical_crossentropy loss function.
model.compile('adam', loss='categorical_crossentropy')

# Print a model summary
print(model.summary())

"""
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 16)                12560     
_________________________________________________________________
dropout (Dropout)            (None, 16)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 68        
=================================================================
Total params: 12,628
Trainable params: 12,628
Non-trainable params: 0
_________________________________________________________________
None
"""

In [None]:
# Defining a multiple input model

# For model 1, pass the input layer to layer 1 and layer 1 to layer 2
m1_layer1 = keras.layers.Dense(12, activation='sigmoid')(m1_inputs)
m1_layer2 = keras.layers.Dense(4, activation='softmax')(m1_layer1)

# For model 2, pass the input layer to layer 1 and layer 1 to layer 2
m2_layer1 = keras.layers.Dense(12, activation='relu')(m2_inputs)
m2_layer2 = keras.layers.Dense(4, activation='softmax')(m2_layer1)

# Merge model outputs and define a functional model
merged = keras.layers.add([m1_layer2, m2_layer2])
model = keras.Model(inputs=[m1_inputs, m2_inputs], outputs=merged)

# Print a model summary
print(model.summary())

"""
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_1 (InputLayer)            [(None, 784)]        0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 784)]        0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 12)           9420        input_1[0][0]                    
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 12)           9420        input_2[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 4)            52          dense[0][0]                      
__________________________________________________________________________________________________
dense_3 (Dense)                 (None, 4)            52          dense_2[0][0]                    
__________________________________________________________________________________________________
add (Add)                       (None, 4)            0           dense_1[0][0]                    
                                                                 dense_3[0][0]                    
==================================================================================================
Total params: 18,944
Trainable params: 18,944
Non-trainable params: 0
__________________________________________________________________________________________________
None
"""

"""Notice that the .summary() method yields a new column: connected to. 
This column tells you how layers connect to each other within the network. 
We can see that dense_2, for instance, is connected to the input_2 layer. 
We can also see that the add layer, which merged the two models, connected to both dense_1 and dense_3."""

In [None]:
# Training with Keras

# Define a sequential model
model = keras.Sequential()

# Define a hidden layer
model.add(keras.layers.Dense(16, activation='relu', input_shape=(784,)))

# Define the output layer
model.add(keras.layers.Dense(4, activation='softmax'))

# Compile the model
# Compile the model with the SGD optimizer and categorical_crossentropy loss.
model.compile('SGD', loss='categorical_crossentropy')

# Complete the fitting operation
model.fit(sign_language_features, sign_language_labels, epochs=5)

"""
Epoch 1/5
    
 1/32 [..............................] - ETA: 23s - loss: 2.2646
26/32 [=======================>......] - ETA: 0s - loss: 1.5090 
32/32 [==============================] - 1s 2ms/step - loss: 1.4747
    Epoch 2/5
    
 1/32 [..............................] - ETA: 0s - loss: 1.4094
27/32 [========================>.....] - ETA: 0s - loss: 1.2044
32/32 [==============================] - 0s 2ms/step - loss: 1.1934
    Epoch 3/5
    
 1/32 [..............................] - ETA: 0s - loss: 1.0047
25/32 [======================>.......] - ETA: 0s - loss: 1.0226
32/32 [==============================] - 0s 2ms/step - loss: 1.0210
    Epoch 4/5
    
 1/32 [..............................] - ETA: 0s - loss: 0.9127
24/32 [=====================>........] - ETA: 0s - loss: 0.9353
32/32 [==============================] - 0s 2ms/step - loss: 0.9297
    Epoch 5/5
    
 1/32 [..............................] - ETA: 0s - loss: 0.8553
23/32 [====================>.........] - ETA: 0s - loss: 0.8519
32/32 [==============================] - 0s 2ms/step - loss: 0.8452
"""

In [None]:
# Metrics and validation with Keras

# Define sequential model
model = keras.Sequential()

# Define the first layer
model.add(keras.layers.Dense(32, activation='sigmoid', input_shape=(784,)))

# Add activation function to classifier
model.add(keras.layers.Dense(4, activation='softmax'))

# Set the optimizer, loss function, and metrics
model.compile(optimizer='RMSprop', loss='categorical_crossentropy', metrics=['accuracy'])

# Add the number of epochs and the validation split
model.fit(sign_language_features, sign_language_labels, epochs=10, validation_split=0.1)

"""
Epoch 1/10
    
 1/29 [>.............................] - ETA: 28s - loss: 1.4231 - accuracy: 0.3125
21/29 [====================>.........] - ETA: 0s - loss: 1.3871 - accuracy: 0.3030 
29/29 [==============================] - 3s 59ms/step - loss: 1.3545 - accuracy: 0.3324 - val_loss: 1.2242 - val_accuracy: 0.2900
    Epoch 2/10
    
 1/29 [>.............................] - ETA: 0s - loss: 1.3104 - accuracy: 0.2500
25/29 [========================>.....] - ETA: 0s - loss: 1.0650 - accuracy: 0.6364
29/29 [==============================] - 0s 9ms/step - loss: 1.0533 - accuracy: 0.6447 - val_loss: 1.0445 - val_accuracy: 0.5900
    Epoch 3/10
    
 1/29 [>.............................] - ETA: 0s - loss: 0.9369 - accuracy: 0.7812
29/29 [==============================] - 0s 7ms/step - loss: 0.8804 - accuracy: 0.7636 - val_loss: 0.8102 - val_accuracy: 0.7000
    Epoch 4/10
    
 1/29 [>.............................] - ETA: 0s - loss: 0.8435 - accuracy: 0.7188
29/29 [==============================] - 0s 7ms/step - loss: 0.7142 - accuracy: 0.8028 - val_loss: 0.7993 - val_accuracy: 0.6900
    Epoch 5/10
    
 1/29 [>.............................] - ETA: 0s - loss: 0.7524 - accuracy: 0.7812
29/29 [==============================] - 0s 7ms/step - loss: 0.6524 - accuracy: 0.8449 - val_loss: 0.6350 - val_accuracy: 0.7300
    Epoch 6/10
    
 1/29 [>.............................] - ETA: 0s - loss: 0.7238 - accuracy: 0.7188
29/29 [==============================] - 0s 7ms/step - loss: 0.5613 - accuracy: 0.8700 - val_loss: 0.7719 - val_accuracy: 0.5900
    Epoch 7/10
    
 1/29 [>.............................] - ETA: 0s - loss: 0.8442 - accuracy: 0.5000
27/29 [==========================>...] - ETA: 0s - loss: 0.5204 - accuracy: 0.8706
29/29 [==============================] - 0s 8ms/step - loss: 0.5160 - accuracy: 0.8743 - val_loss: 0.4521 - val_accuracy: 0.9800
    Epoch 8/10
    
 1/29 [>.............................] - ETA: 0s - loss: 0.4912 - accuracy: 0.9375
29/29 [==============================] - 0s 7ms/step - loss: 0.4448 - accuracy: 0.9150 - val_loss: 0.5926 - val_accuracy: 0.7100
    Epoch 9/10
    
 1/29 [>.............................] - ETA: 0s - loss: 0.4262 - accuracy: 0.8125
29/29 [==============================] - 0s 8ms/step - loss: 0.3948 - accuracy: 0.9326 - val_loss: 0.3892 - val_accuracy: 0.9400
    Epoch 10/10
    
 1/29 [>.............................] - ETA: 0s - loss: 0.3319 - accuracy: 1.0000
29/29 [==============================] - 0s 7ms/step - loss: 0.3480 - accuracy: 0.9567 - val_loss: 0.3641 - val_accuracy: 0.8900
"""

In [None]:
# Overfitting detection

# Define sequential model
model = keras.Sequential()

# Define the first layer
model.add(keras.layers.Dense(1024, activation='relu', input_shape=(784,)))

# Add activation function to classifier
model.add(keras.layers.Dense(4, activation='softmax'))

# Finish the model compilation
model.compile(optimizer=keras.optimizers.Adam(lr=0.001), 
              loss='categorical_crossentropy', metrics=['accuracy'])

# Complete the model fit operation
model.fit(sign_language_features, sign_language_labels, epochs=50, validation_split=0.5)

"""
You may have noticed that the validation loss, val_loss, was substantially higher than the training loss, loss. 
Furthermore, if val_loss started to increase before the training process was terminated, then we may have overfitted. 
When this happens, you will want to try decreasing the number of epochs.
"""

In [None]:
# Evaluating models

# Evaluate the small model using the train data
small_train = small_model.evaluate(train_features, train_labels)

# Evaluate the small model using the test data
small_test = small_model.evaluate(test_features, test_labels)

# Evaluate the large model using the train data
large_train = large_model.evaluate(train_features, train_labels)

# Evaluate the large model using the test data
large_test = large_model.evaluate(test_features, test_labels)

# Print losses
print('\n Small - Train: {}, Test: {}'.format(small_train, small_test))
print('Large - Train: {}, Test: {}'.format(large_train, large_test))

"""

1/4 [======>.......................] - ETA: 0s - loss: 0.1738
4/4 [==============================] - 0s 2ms/step - loss: 0.1698
    
1/4 [======>.......................] - ETA: 0s - loss: 0.3251
4/4 [==============================] - 0s 1ms/step - loss: 0.2849
    
1/4 [======>.......................] - ETA: 0s - loss: 0.0425
4/4 [==============================] - 0s 2ms/step - loss: 0.0396
    
1/4 [======>.......................] - ETA: 0s - loss: 0.1414
4/4 [==============================] - 0s 2ms/step - loss: 0.1454
    
     Small - Train: 0.16981548070907593, Test: 0.2848725914955139
    Large - Train: 0.03957207128405571, Test: 0.14543527364730835
"""

In [None]:
# Preparing to train with Estimators

# Define feature columns for bedrooms and bathrooms
bedrooms = feature_column.numeric_column("bedrooms")
bathrooms = feature_column.numeric_column("bathrooms")

# Define the list of feature columns
feature_list = [bedrooms, bathrooms]

def input_fn():
	# Define the labels
	labels = np.array(housing['price'])
	# Define the features
	features = {'bedrooms':np.array(housing['bedrooms']), 
                'bathrooms':np.array(housing['bathrooms'])}
	return features, labels

In [None]:
# Defining Estimators

# Define the model and set the number of steps
# 2 nodes in both the first and second hidden
model = estimator.DNNRegressor(feature_columns=feature_list, hidden_units=[2,2])
model.train(input_fn, steps=1)

############################################################################

# Define the model and set the number of steps
model = estimator.LinearRegressor(feature_columns=feature_list)
model.train(input_fn, steps=2)

"""
Note that you have other premade estimator options, such as BoostedTreesRegressor(), 
and can also create your own custom estimators.
"""