In [1]:
import tensorflow
from tensorflow import keras

# Use He initialization 
keras.layers.Dense(10, activation="relu", kernel_initializer="he_normal")
keras.layers.Dense(10, activation="relu", kernel_initializer="he_uniform")


2023-06-05 11:10:09.118908: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-05 11:10:09.146587: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-05 11:10:09.146910: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


<keras.layers.core.dense.Dense at 0x7f5ba99d3fd0>

In [2]:
# Use average fan instead of the input fan
he_avg_init = keras.initializers.VarianceScaling(scale=2., mode="fan_avg", distribution="uniform")
keras.layers.Dense(10, activation="sigmoid", kernel_initializer=he_avg_init)

<keras.layers.core.dense.Dense at 0x7f5ba958f070>

In [3]:
# Leaky ReLU activation function
model = keras.models.Sequential([
    keras.layers.Dense(30, kernel_initializer="he_normal"),
    keras.layers.LeakyReLU(alpha=0.2),
])

# PReLU
model = keras.models.Sequential([
    keras.layers.Dense(30, kernel_initializer="he_normal"),
    keras.layers.PReLU()
])

# SELU
layer = keras.layers.Dense(10, activation="selu", kernel_initializer="lecun_normal")

2023-06-05 11:10:10.449368: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-06-05 11:10:10.460426: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [4]:

# Batch normalization after activation function
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, activation="elu", kernel_initializer="he_normal"),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(10, activation="softmax")
])
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 batch_normalization (BatchN  (None, 784)              3136      
 ormalization)                                                   
                                                                 
 dense_6 (Dense)             (None, 300)               235500    
                                                                 
 batch_normalization_1 (Batc  (None, 300)              1200      
 hNormalization)                                                 
                                                                 
 dense_7 (Dense)             (None, 100)               30100     
                                                                 
 batch_normalization_2 (Batc  (None, 100)             

In [6]:
[(var.name, var.trainable) for var in model.layers[1].variables]

[('batch_normalization/gamma:0', True),
 ('batch_normalization/beta:0', True),
 ('batch_normalization/moving_mean:0', False),
 ('batch_normalization/moving_variance:0', False)]

In [7]:
model.layers[1].updates

  model.layers[1].updates


[]

In [8]:
# Batch normalization before activation function
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, kernel_initializer="he_normal", use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation("elu"),
    keras.layers.Dense(100, kernel_initializer="he_normal", use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation("elu"),
    keras.layers.Dense(10, activation="softmax")
])


In [10]:
# Gradient clipping (clipvalue or clipnorm)
# Clip value will clip if the value is above 1 or below -1
# Clip norm will clip if its computed l2 norm is above 1 or below -1
# Use clipnorm if you want to preserve the orientation of the gradient vector
optimizer = keras.optimizers.SGD(clipvalue=1.0)

In [None]:
# Transfer learning with Keras
model_A = keras.models.load_model("my_model_A.h5")
model_B_on_A = keras.models.Sequential(model_A.layers[:-1])
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))

# Use cloning if you do not want to effect model_A
model_A_clone = keras.models.clone_model(model_A)
model_A_clone.set_weights(model_A.get_weights())

for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False

model_B_on_A.compile(loss="binary_crossentropy", optimizer="sgd", metrics=["accuracy"])

# Faster Optimizers

In [12]:
# Momentum optimization 
optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9)
# Nesterov Accelerated Gradient
optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)
# RMSProp
optimizer = keras.optimizers.RMSprop(lr=0.001, rho=0.9)
# Adam
optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)



# Learning Schedules

In [15]:
import tensorflow as tf
optimizer = keras.optimizers.SGD(learning_rate=0.01, weight_decay=1e-4)

lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.01,
    decay_steps=10000,
    decay_rate=0.9)
optimizer = keras.optimizers.SGD(learning_rate=lr_schedule)


In [16]:
# Exponential scheduling

def exponential_decay_fn(epoch):
    return 0.01 * 0.1**(epoch/20)

def exponential_decay(lr0, s):
    def exponential_decay_fn(epoch):
        return lr0 * 0.1**(epoch/s)
    return exponential_decay_fn

exponential_decay_fn = exponential_decay(lr0=0.01, s=20)

In [17]:
lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)
history = model.fit(X_train, y_train, [...], callbacks=[lr_scheduler])

NameError: name 'X_train' is not defined

In [18]:
# Piecewise scheduling
def piecewise_constant_fn(epoch):
    if epoch < 5:
        return 0.01
    elif epoch < 15:
        return 0.005
    else:
        return 0.001


In [19]:
# Performance scheduling

# Reduces learning rate by a factor of 0.5 for every 5 epochs where the val loss does not improve
lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)

In [20]:
s = 20 * len(X_train) // 32
learning_rate = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.01, decay_steps=s, decay_rate=0.1)


NameError: name 'X_train' is not defined

# Regularization methods

In [23]:
# L2 and L1 regularization
layer = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal", kernel_regularizer=keras.regularizers.l2(0.01))
layer = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal", kernel_regularizer=keras.regularizers.l1(0.01))
layer = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal", kernel_regularizer=keras.regularizers.l1_l2(0.01))

In [24]:
from functools import partial

# Avoid redundant code with partial
RegularizedDense = partial(keras.layers.Dense, activation="elu", 
                           kernel_initializer="he_normal", 
                           kernel_regularizer=keras.regularizers.l2(0.01))

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    RegularizedDense(300),
    RegularizedDense(100),
    RegularizedDense(10, activation="softmax", kernel_initializer="glorot_uniform"),
])

In [25]:
# Dropout
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(300, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(10, activation="softmax")
])

In [26]:
# Max-norm regularization
keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal", kernel_constraint=keras.constraints.max_norm(1.))

<keras.layers.core.dense.Dense at 0x7f5ba81fab60>

In [50]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[32,32,3]),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dense(10, activation="softmax", kernel_initializer="glorot_normal")
])

In [29]:
data = keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [93]:
X_train, y_train = data[0]
X_test, y_test = data[1]

X_val = X_train[40000:]
X_train = X_train[:40000]

y_val = y_train[40000:]
y_train = y_train[:40000]

In [46]:
class ExponentialLearningRate(keras.callbacks.Callback):
    def __init__(self, factor):
        self.factor = factor
        self.rates = []
        self.losses = []
    def on_batch_end(self, batch, logs):
        self.rates.append(keras.backend.get_value(self.model.optimizer.learning_rate))
        self.losses.append(logs["loss"])
        keras.backend.set_value(self.model.optimizer.learning_rate, self.model.optimizer.learning_rate * self.factor)

In [51]:
early_stopping = keras.callbacks.EarlyStopping(patience=10)
exponential_lr = ExponentialLearningRate(factor=1.005)

model.compile(optimizer=keras.optimizers.Nadam(learning_rate=0.0006, beta_1=0.9, beta_2=0.999), 
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"],)

model.fit(X_train, y_train, epochs=300, validation_data=(X_val, y_val), callbacks=[early_stopping])

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300


<keras.callbacks.History at 0x7f5af674ac20>

In [120]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train, y_train = data[0]
X_test, y_test = data[1]
X_val = X_train[40000:]
X_train = X_train[:40000]
y_val = y_train[40000:]
y_train = y_train[:40000]

print(X_train.shape)
print(X_test.shape)
print(X_val.shape)

X_train = scaler.fit_transform(X_train.reshape(X_train.shape[0],-1))
X_test = scaler.fit_transform(X_test.reshape(X_test.shape[0],-1))
X_val = scaler.fit_transform(X_val.reshape(X_val.shape[0],-1))
print(X_train.shape)
print(X_test.shape)
print(X_val.shape)
# y_train = y_train[:] / 255.0
# y_test = y_test[:] / 255.0
# y_val = y_val[:] / 255.0

(40000, 32, 32, 3)
(10000, 32, 32, 3)
(10000, 32, 32, 3)
(40000, 3072)
(10000, 3072)
(10000, 3072)


In [115]:
X_val.shape

(10000, 12288)

In [121]:
model = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=[3072])
])

for i in range(20):
    model.add(keras.layers.Dense(100, kernel_initializer="lecun_normal", activation="selu"))

model.add(keras.layers.Dense(10, activation="softmax", kernel_initializer="glorot_normal"))

In [122]:
X_train.shape

(40000, 3072)

In [123]:
model.compile(loss="sparse_categorical_crossentropy", 
              metrics=["accuracy"], 
              optimizer=keras.optimizers.Nadam(learning_rate=0.001))

model.fit(X_train, y_train, epochs=300, validation_data=(X_val, y_val), callbacks=[early_stopping])

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300


<keras.callbacks.History at 0x7f5940159060>

In [130]:
import numpy as np
from sklearn.metrics import accuracy_score
np.argmax(model.predict(X_test), axis=1)
model.predict(X_test[0])
# accuracy_score(y_test, np.argmax(model.predict(X_test)))



TypeError: in user code:

    File "/home/eddie/.local/lib/python3.10/site-packages/keras/engine/training.py", line 2169, in predict_function  *
        return step_function(self, iterator)
    File "/home/eddie/.local/lib/python3.10/site-packages/keras/engine/training.py", line 2155, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/eddie/.local/lib/python3.10/site-packages/keras/engine/training.py", line 2143, in run_step  **
        outputs = model.predict_step(data)
    File "/home/eddie/.local/lib/python3.10/site-packages/keras/engine/training.py", line 2111, in predict_step
        return self(x, training=False)
    File "/home/eddie/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/home/eddie/.local/lib/python3.10/site-packages/keras/activations.py", line 83, in softmax
        if x.shape.rank > 1:

    TypeError: Exception encountered when calling layer 'dense_466' (type Dense).
    
    '>' not supported between instances of 'NoneType' and 'int'
    
    Call arguments received by layer 'dense_466' (type Dense):
      • inputs=tf.Tensor(shape=<unknown>, dtype=float32)


In [5]:
import tensorflow as tf
from tensorflow import keras
tf.config.list_physical_devices()

ImportError: /home/eddie/.local/lib/python3.10/site-packages/tensorflow/python/platform/../_pywrap_tensorflow_internal.so: undefined symbol: cudaGetErrorString

In [13]:
tf.sysconfig.get_build_info()

OrderedDict([('cpu_compiler', '/dt9/usr/bin/gcc'),
             ('cuda_compute_capabilities',
              ['sm_35', 'sm_50', 'sm_60', 'sm_70', 'sm_75', 'compute_80']),
             ('cuda_version', '11.8'),
             ('cudnn_version', '8'),
             ('is_cuda_build', True),
             ('is_rocm_build', False),
             ('is_tensorrt_build', True)])

In [7]:
!pip install nvidia-cudnn-cu11==8.6.0.163

Defaulting to user installation because normal site-packages is not writeable
Collecting nvidia-cudnn-cu11==8.6.0.163
  Downloading nvidia_cudnn_cu11-8.6.0.163-py3-none-manylinux1_x86_64.whl (715.7 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m715.7/715.7 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:02[0m
[?25hCollecting nvidia-cublas-cu11
  Downloading nvidia_cublas_cu11-11.11.3.6-py3-none-manylinux1_x86_64.whl (417.9 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m417.9/417.9 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hInstalling collected packages: nvidia-cublas-cu11, nvidia-cudnn-cu11
Successfully installed nvidia-cublas-cu11-11.11.3.6 nvidia-cudnn-cu11-8.6.0.163
