In [None]:
from tensorflow import keras
from tensorflow.keras import layers

# Create a network with 1 linear unit
model = keras.Sequential([layers.Dense(units=1, input_shape=[3])])

In [None]:
# Generates evenly-spaced values in an interval along a given axis.
tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0  11.0  12.0]

tf.linspace([0., 5.], [10., 40.], 5, axis=-1)

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

# 2 Inputs => (4 units layer + bias -> ReLU) + (3 units layer + bias -> ReLU) + (1 unit + bias) -> 1 output 
model = keras.Sequential([
    
    # Hidden ReLU layers
    layers.Dense(units=4, activation='relu', input_shape=[2]),
    layers.Dense(units=3, activation='relu'),
    
    # the linear output layer 
    layers.Dense(units=1), ])

# Optimizer and loss function 
model.compile(
    optimizer='adam',
    loss='mae',
)

# We feed the optimizer 256 rows of the training data at a time (the batch_size) 
# and to do that 10 times all the way through the dataset
history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=256,
    epochs=10,
)

# cCnvert the training history to a dataframe and plotwith Pandas
history_df = pd.DataFrame(history.history)
history_df['loss'].plot();

In [None]:
# Activation layer above
layers.Dense(units=8, activation='relu').

# is equivalent to 
layers.Dense(units=8),
layers.Activation('relu'

### Batch Normalization and Dropout

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

# You could think about dropout as creating a kind of ensemble of networks. 
# The predictions will no longer be made by one big network, but instead by a committee of smaller networks

model = keras.Sequential([
    layers.Dense(1024, activation='relu', input_shape=[11]),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    
    layers.BatchNormalization(),
    layers.Dense(1, activation='sigmoid')
])

### Capacity

In [None]:
# A model's capacity refers to the size and complexity of the patterns it is able to learn
# You can increase the capacity of a network either by making it wider (more units to existing layers) or 
# by making it deeper (adding more layers). Wider networks have an easier time learning more linear relationships, 
# while deeper networks prefer more nonlinear ones. 

In [None]:
# Early stopping
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=20, # how many epochs to wait before stopping
    restore_best_weights=True,
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=256,
    epochs=500,
    callbacks=[early_stopping], # put your callbacks in a list
    verbose=0,  # turn off training log
)

### Binary Classification

In [None]:
model = keras.Sequential([
    layers.Dense(4, activation='relu', input_shape=[33]),
    layers.Dense(4, activation='relu'),    
    layers.Dense(1, activation='sigmoid'),
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['binary_accuracy'],
)