In [None]:
import sys
import tensorflow.keras
import pandas as pd
import sklearn as sk
import scipy as sp
import tensorflow as tf
import keras
import platform

In [None]:
print(f'Python Platform: {platform.platform()}')
print(f'Tensor Flow Version: {tf.__version__}')
print(f'Keras Version: {keras.__version__}')
print()
print(f'Python {sys.version}')
print(f'Pandas {pd.__version__}')
print(f'Scikit-Learn {sk.__version__}')
print(f'Scipy {sp.__version__}')
gpu = len(tf.config.list_physical_devices('GPU'))>0
print('GPU is', 'available' if gpu else 'NOT AVAILABLE')

In [None]:
# Tensorflow will not allocate all memory on GPU if run these instructions (it can raise some weird errors without it)
phypical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(phypical_devices[0], True)

# 1. Tensor Basics

## Initialization of Tensors

In [None]:
a = tf.constant(4, shape=(1,1), dtype=tf.float32)
b = tf.constant([[1,2,3],[4,5,6]])
c = tf.ones((3,3))
d = tf.zeros((2,3))
e = tf.eye(3) # Identity matrix
f = tf.random.normal((3,3), mean=0, stddev=1)
g = tf.random.uniform((1,3), minval=0, maxval=1)
h = tf.range(9)
i = tf.range(start=1, limit=10, delta=2)
j = tf.cast(i, dtype=tf.float64)

In [None]:
j

## Mathematical operations

In [None]:
x = tf.constant([1,2,3])
y = tf.constant([9,8,7])

In [None]:
a = tf.add(x, y)
# z = x + y

In [None]:
b = tf.subtract(x, y)
# b = x - y

In [None]:
c = tf.multiply(x, y)
# c = x * y

In [None]:
d = tf.divide(x, y)
# d = x / y

In [None]:
e = tf.tensordot(x, y, axes=1) # dot product
e = tf.reduce_sum(x*y, axis=0) # dot product

In [None]:
f = x ** 5

In [None]:
x = tf.random.normal((2,3))
y = tf.random.normal((3,4))

In [None]:
g = tf.matmul(x, y)
# g = x @ y

In [None]:
g

## Indexing

In [None]:
a = tf.constant([0, 1, 1, 2, 3, 1, 2, 3])

In [None]:
a[::-1]

In [None]:
indices = tf.constant([0, 3])
a_ind = tf.gather(a, indices)

In [None]:
a_ind

## Reshaping

In [None]:
x = tf.range(9)

In [None]:
x = tf.reshape(x, (3,3))

In [None]:
x

In [None]:
x = tf.transpose(x, perm=[1,0]) # swap axis 0 and 1

In [None]:
x

# 2. Neural Network with Sequential and Functional API

## Imports

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
x_train.shape

In [None]:
x_train = x_train.reshape(-1, 28*28).astype('float32') #/ 255.0 # -1 means keep the value in the dimension (60000 in this case), astype from float64 to float32 just to minimize the computation, also normalize values for faster training (instead of values from 0 to 255 we get values from 0 to 1)
x_test = x_test.reshape(-1, 28*28).astype('float32') #/ 255.0 # -1 means keep the value in the dimension (60000 in this case), astype from float64 to float32 just to minimize the computation, also normalize values for faster training (instead of values from 0 to 255 we get values from 0 to 1)

In [None]:
# this conversion is done automatically, so don't need to do it explicitly
# x_train = tf.convert_to_tensor(x_train)
# x_test = tf.convert_to_tensor(x_test)

## Sequential API (convenient but not flexible, we can only map one input to one output)

In [None]:
model = keras.Sequential(
    [
        keras.Input(shape=(28*28)), # allows to print model summary not after .fit() but before
        layers.Dense(512, activation='relu'), # for a fully connected layer with 512 nodes
        layers.Dense(256, activation='relu'),
        layers.Dense(10), # the output layer with 10 nodes, because every node will represent each digit (output layer without activation function). We will use softmax but later inside the loss function
    ]
)

In [None]:
# # Can be done in different way (it's for more complex NNs if you need to see the the intermediate summary and how the inputs is changed for specific layers, basically it's for debugging)
model = keras.Sequential()
model.add(keras.Input(shape=(28*28)))
# display(model.summary())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(256, activation='relu', name='my_layer'))
model.add(layers.Dense(10))
# display(model.summary())

In [None]:
# Extract specific layer output for debugging
model = keras.Model(inputs=model.inputs,
                    # outputs=[model.layers[-1].output] # -1 represents the output layer (the last one)
                    # outputs=[model.get_layer('my_layer').output]
                    outputs=[layer.output for layer in model.layers] # for all layers
                   ) 

features = model.predict(x_train) # for all layers
# feature = model.predict(x_train)
for feature in features: # for all layers
    display(feature.shape)

In [None]:
model.summary()

In [None]:
# Here we tell keras how to configure the training part of the network
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True), # ????? because we don't have a softmax activation, so when we set it as True it's going to send it in softmax first and it's going to map it to SparseCategoricalCrossentropy
    optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001), # ????? 
    metrics=['accuracy']
)

In [None]:
model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)

In [None]:
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

## Functional API (a bit more flexible, can handle multiple inputs and outputs)

In [None]:
inputs = keras.Input(shape=(28*28))
x = layers.Dense(512, activation='relu', name='first_layer')(inputs)
x = layers.Dense(256, activation='relu', name='second_layer')(x)
outputs = layers.Dense(10, activation='softmax')(x)
model = keras.Model(inputs=inputs, outputs=outputs)

model.summary()

In [None]:
# Here we tell keras how to configure the training part of the network
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=False), # ????? because we don't have a softmax activation, so when we set it as False for activation='softmax' in the previous step
    optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001), # ????? 
    metrics=['accuracy']
)

In [None]:
model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)

In [None]:
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

## Tests

In [None]:
model = keras.Sequential()
model.add(keras.Input(shape=(28*28)))
# model.add(layers.Dense(1024, activation='relu'))
# model.add(layers.Dense(512, activation='relu'))
# model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
# model.add(layers.Dense(25, activation='relu'))
model.add(layers.Dense(10))

In [None]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001),
    # optimizer=keras.optimizers.legacy.SGD(momentum=False, learning_rate=0.01),
    # optimizer=keras.optimizers.legacy.Adagrad(learning_rate=0.01),
    # optimizer=keras.optimizers.legacy.RMSprop(learning_rate=0.01),
    metrics=['accuracy']
)

In [None]:
model.fit(x_train, y_train, batch_size=32, epochs=10, verbose=2)

In [None]:
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

# 3. Convolutional Neural Network with Sequential and Functional API and Regularization

## Imports

In [None]:
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras import layers, regularizers
from tensorflow.keras.datasets import cifar10

In [None]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [None]:
x_train.shape

In [None]:
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

In [None]:
model = keras.Sequential(
    [
        keras.Input(shape=(32, 32, 3)), # for CNN we don't need to flatten it at the beginning, 3 because of 3 colored channels (RGB)
        layers.Conv2D(32, 3, padding='valid', activation='relu'), # ????? how many channels we want this convolutional layer to output, (3, 3) can be instead od 3
        layers.MaxPooling2D(pool_size=(2, 2)), # the half of the input
        layers.Conv2D(64, 3, activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(128, 3, activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10)
    ]
)

In [None]:
model.summary()

In [None]:
def my_model():
    inputs = keras.Input(shape=(32, 32, 3))
    x = layers.Conv2D(
        32, 3, padding='same', kernel_regularizer=regularizers.l2(0.01)
    )(inputs) # we don't use activation function here just because if we use batch norm we want to send it through the convolutional level first and then throug the activation function
    x = layers.BatchNormalization()(x)
    x = keras.activations.relu(x)
    x = layers.MaxPooling2D()(x) # pool_size=(2, 2) is the default value
    x = layers.Conv2D(
        64, 3, padding='same', kernel_regularizer=regularizers.l2(0.01)
    )(x)
    x = layers.BatchNormalization()(x)
    x = keras.activations.relu(x)
    x = layers.Conv2D(
        128, 3, padding='same', kernel_regularizer=regularizers.l2(0.01)
    )(x)
    x = layers.BatchNormalization()(x)
    x = keras.activations.relu(x)
    x = layers.Flatten()(x)
    x = layers.Dense(
        64, activation='relu', kernel_regularizer=regularizers.l2(0.01)
    )(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(10)(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

In [None]:
model = my_model()

In [None]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=3e-4),
    metrics=['accuracy']
)

In [None]:
model.fit(x_train, y_train, batch_size=64, epochs=150, verbose=2)

In [None]:
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

# 4. RNN, GRUs, LSTMs and Bidirectionality

## Imports

In [None]:
import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

## RNN

In [None]:
tf.keras.backend.clear_session()
model = keras.Sequential()
model.add(keras.Input(shape=(None, 28))) # None because we don't have a specific number of time steps
model.add(
    layers.SimpleRNN(128, return_sequences=True, activation='tanh')
)
model.add(layers.SimpleRNN(128, activation='tanh'))
model.add(layers.Dense(10))

In [None]:
model.summary()

In [None]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001),
    metrics=['accuracy']
)

In [None]:
model.fit(x_train, y_train, batch_size=64, epochs=2, verbose=1)

In [None]:
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

## GRU

In [None]:
# it should perform better than simple RNN

In [None]:
tf.keras.backend.clear_session()
model = keras.Sequential()
model.add(keras.Input(shape=(None, 28))) # None because we don't have a specific number of time steps
model.add(
    layers.GRU(256, return_sequences=True, activation='tanh')
)
model.add(layers.GRU(256, activation='tanh'))
model.add(layers.Dense(10))

In [None]:
model.summary()

In [None]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001),
    metrics=['accuracy']
)

In [None]:
model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=1)

In [None]:
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

## LSTM

In [None]:
# it should perform better than GRU

In [None]:
tf.keras.backend.clear_session()
model = keras.Sequential()
model.add(keras.Input(shape=(None, 28))) # None because we don't have a specific number of time steps
model.add(
    layers.LSTM(256, return_sequences=True, activation='tanh')
)
model.add(layers.LSTM(256, activation='tanh'))
model.add(layers.Dense(10))

In [None]:
model.summary()

In [None]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001),
    metrics=['accuracy']
)

In [None]:
model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=1)

In [None]:
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

## Bidirectional LSTM

In [None]:
# it should perform better than GRU

In [None]:
tf.keras.backend.clear_session()
model = keras.Sequential()
model.add(keras.Input(shape=(None, 28))) # None because we don't have a specific number of time steps
model.add(
    layers.Bidirectional(
        layers.LSTM(256, return_sequences=True, activation='tanh')
    )
)
model.add(
    layers.Bidirectional(
        layers.LSTM(256, activation='tanh')
    )
)
model.add(layers.Dense(10))

In [None]:
model.summary()

In [None]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001),
    metrics=['accuracy']
)

In [None]:
model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=1)

In [None]:
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

# 5. More in Depth Example on Functional API

## Imports

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.datasets import mnist

import pandas as pd
import os

In [None]:
# HYPERPARAMETERS
BATCH_SIZE = 64
WEIGHT_DECAY = 0.001
LEARNING_RATE = 0.001

# Make sure we don't get any GPU errors
physical_devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)

train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
train_images = os.getcwd() + "/train_images/" + train_df.iloc[:, 0].values
test_images = os.getcwd() + "/test_images/" + test_df.iloc[:, 0].values

train_labels = train_df.iloc[:, 1:].values
test_labels = test_df.iloc[:, 1:].values


def read_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_image(image, channels=1, dtype=tf.float32)

    # In older versions you need to set shape in order to avoid error
    # on newer (2.3.0+) the following 3 lines can safely be removed
    image.set_shape((64, 64, 1))
    label[0].set_shape([])
    label[1].set_shape([])

    labels = {"first_num": label[0], "second_num": label[1]}
    return image, labels


AUTOTUNE = tf.data.experimental.AUTOTUNE
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
train_dataset = (
    train_dataset.shuffle(buffer_size=len(train_labels))
    .map(read_image)
    .batch(batch_size=BATCH_SIZE)
    .prefetch(buffer_size=AUTOTUNE)
)

test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
test_dataset = (
    test_dataset.map(read_image)
    .batch(batch_size=BATCH_SIZE)
    .prefetch(buffer_size=AUTOTUNE)
)

In [None]:
inputs = keras.Input(shape=(64, 64, 1)) # 1 because it's only 1 greyscale (3 for RGB for example)
x = layers.Conv2D(
    filters=32,
    kernel_size=3,
    padding='same',
    kernel_regularizer=regularizers.l2(WEIGHT_DECAY)
)(inputs)
x = layers.BatchNormalization()(x)
x = keras.activations.relu(x)
x = layers.Conv2D(
    filters=64, 
    kernel_size=3, 
    kernel_regularizer=regularizers.l2(WEIGHT_DECAY)
)(x)
x = layers.BatchNormalization()(x)
x = keras.activations.relu(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(
    filters=64, 
    kernel_size=3, 
    kernel_regularizer=regularizers.l2(WEIGHT_DECAY)
)(x)
x = layers.Conv2D(
    filters=128, 
    kernel_size=3, 
    kernel_regularizer=regularizers.l2(WEIGHT_DECAY)
)(x)
x = layers.MaxPooling2D()(x)
x = layers.Flatten()(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(64, activation='relu')(x)
# this is where functional API becomes useful (the previous code can be sequential and the last 2 outputs can be functional, we can combine it)
output1 = layers.Dense(10, activation='softmax', name='first_num')(x)
output2 = layers.Dense(10, activation='softmax', name='second_num')(x)
model = keras.Model(inputs=inputs, outputs=[output1, output2])

In [None]:
model.summary()

In [None]:
model.compile(
    loss=
    # [
        keras.losses.SparseCategoricalCrossentropy(), # if comments it will automatically extend this function to 2 outputs
        # keras.losses.SparseCategoricalCrossentropy()
    # ],
    optimizer=keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE),
    metrics=['accuracy']
)

In [None]:
model.fit(train_dataset, epochs=5, verbose=2)

In [None]:
model.evaluate(test_dataset, verbose=2)

# 6. Model Subclassing

## Imports

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

In [None]:
class CNNBlock(layers.Layer):
    def __init__(self, out_channels, kernel_size=3):
        super(CNNBlock, self).__init__() # super to run parent class layers.Layer
        self.conv = layers.Conv2D(out_channels, kernel_size, padding='same')
        self.bn = layers.BatchNormalization()
        
    def call(self, input_tensor, training=False):
        x = self.conv(input_tensor)
        # print(x,shape)
        x = self.bn(x, training=training)
        x = tf.nn.relu(x)
        return x

In [None]:
model = keras.Sequential(
    [
        CNNBlock(32),
        CNNBlock(64),
        CNNBlock(128),
        layers.Flatten(),
        layers.Dense(10),
    ]
)

In [None]:
# Kind of resnet
class ResBlock(layers.Layer):
    def __init__(self, channels):
        super(ResBlock, self).__init__()
        self.cnn1 = CNNBlock(channels[0])
        self.cnn2 = CNNBlock(channels[1])
        self.cnn3 = CNNBlock(channels[2])
        self.pooling = layers.MaxPooling2D()
        self.identity_mapping = layers.Conv2D(channels[1], 1, padding='same')
        
    def call(self, input_tensor, training=False):
        x = self.cnn1(input_tensor, training=training)
        x = self.cnn2(x, training=training)
        x = self.cnn3(x + self.identity_mapping(input_tensor), training=training)
        return self.pooling(x)

In [None]:
class ResNet_Like(keras.Model):
    def __init__(self, num_classes=10):
        super(ResNet_Like, self).__init__()
        self.block1 = ResBlock([32, 32, 64])
        self.block2 = ResBlock([128, 128, 256])
        self.block3 = ResBlock([128, 256, 512])
        self.pool = layers.GlobalAveragePooling2D()
        self.classifier = layers.Dense(num_classes)
        
    def call(self, input_tensor, training=False):
        x = self.block1(input_tensor, training=training)
        x = self.block2(x, training=training)
        x = self.block3(x, training=training)
        x = self.pool(x)
        return self.classifier(x)
    
    def model(self):
        x = keras.Input(shape=(28, 28, 1))
        return keras.Model(inputs=[x], outputs=self.call(x))

In [None]:
model = ResNet_Like(num_classes=10)

In [None]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001),
    metrics=['accuracy']
)

In [None]:
model.fit(x_train, y_train, batch_size=64, epochs=1, verbose=2)

In [None]:
model.model().summary()

In [None]:
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

# 7. Custom Layers

## Imports

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28 * 28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28 * 28).astype('float32') / 255.0

In [None]:
class Dense(layers.Layer):
    def __init__(self, units, input_dim):
        super(Dense, self).__init__()
        self.w = self.add_weight(
            name='w',
            shape=(input_dim, units),
            initializer='random_normal',
            trainable=True
        )
        
        self.b = self.add_weight(
            name='b',
            shape=(units, ),
            initializer='zeros',
            trainable=True
        )
    
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

class MyModel(keras.Model):
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__()
        self.dense1 = Dense(64, 784)
        self.dense2 = Dense(num_classes, 64)
        # self.dense1 = layers.Dense(64)
        # self.dense2 = layers.Dense(num_classes)
        
    def call(self, input_tensor):
        x = tf.nn.relu(self.dense1(input_tensor))
        return self.dense2(x)

In [None]:
class Dense(layers.Layer):
    def __init__(self, units): #, input_dim
        super(Dense, self).__init__()
        self.units = units
        
    def build(self, input_shape):
        self.w = self.add_weight(
            name='w',
            shape=(input_shape[-1], self.units),
            initializer='random_normal',
            trainable=True
        )
        
        self.b = self.add_weight(
            name='b',
            shape=(self.units, ),
            initializer='zeros',
            trainable=True
        )
    
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

class MyModel(keras.Model):
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__()
        self.dense1 = Dense(64) #, 784
        self.dense2 = Dense(num_classes) #, 64
        # self.dense1 = layers.Dense(64)
        # self.dense2 = layers.Dense(num_classes)
        
    def call(self, input_tensor):
        x = tf.nn.relu(self.dense1(input_tensor))
        return self.dense2(x)

In [None]:
class MyReLU(layers.Layer):
    def __init(self):
        super(MyReLU, self).__init__()
        
    def call(self, x):
        return tf.math.maximum(x, 0)

In [None]:
class Dense(layers.Layer):
    def __init__(self, units): #, input_dim
        super(Dense, self).__init__()
        self.units = units
        
    def build(self, input_shape):
        self.w = self.add_weight(
            name='w',
            shape=(input_shape[-1], self.units),
            initializer='random_normal',
            trainable=True
        )
        
        self.b = self.add_weight(
            name='b',
            shape=(self.units, ),
            initializer='zeros',
            trainable=True
        )
    
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

class MyModel(keras.Model):
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__()
        self.dense1 = Dense(64) #, 784
        self.dense2 = Dense(num_classes) #, 64
        self.relu = MyReLU()
        # self.dense1 = layers.Dense(64)
        # self.dense2 = layers.Dense(num_classes)
        
    def call(self, input_tensor):
        # x = tf.nn.relu(self.dense1(input_tensor))
        x = self.relu(self.dense1(input_tensor))
        return self.dense2(x)

In [None]:
model = MyModel()
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001),
    metrics=['accuracy']
)

In [None]:
model.fit(x_train, y_train, batch_size=32, epochs=10, verbose=2)

In [None]:
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

# 8. Saving and Loading Models

## Imports

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28 * 28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28 * 28).astype('float32') / 255.0

## Sequential

In [None]:
model1 = keras.Sequential(
    [
        layers.Dense(64, activation='relu'),
        layers.Dense(10)
    ]
)

## Functional

In [None]:
inputs = layers.Input(shape=(28, 28))
x = layers.Dense(64, activation='relu')(inputs)
outputs = layers.Dense(10)(x)
model2 = keras.Model(inputs=inputs, outputs=outputs)

## Subclasses

In [None]:
class MyModel(keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.dense1 = layers.Dense(64, activation='relu')
        self.dense2 = layers.Dense(10)
        
    def call(self, input_tensor):
        x = tf.nn.relu(self.dense1(input_tensor))
        return self.dense2(x)

## Realization

In [None]:
model3 = MyModel()

In [None]:
model = model1

In [None]:
# model.load_weights('saved_model/model.weights.h5')

In [None]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(),
    metrics=['accuracy']
)

In [None]:
# model = keras.models.load_model('complete_saved_model/model.h5')

In [None]:
model.fit(x_train, y_train, batch_size=32, epochs=2, verbose=2)

In [None]:
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

In [None]:
model.save_weights('saved_model/model.weights.h5')

In [None]:
model.save('complete_saved_model/model.h5')

# 9. Transfer Learning, Fine Tuning and TensorFlow Hub

## Imports

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

## Pretrained Model

In [None]:
model = keras.models.load_model('complete_saved_model/model.h5')
#model.trainable = False # it freezes all of the layers

In [None]:
for layer in model.layers:
    assert layer.trainable == False
    layer.trainable = False

In [None]:
model.summary()

In [None]:
base_inputs = model.layers[0].input
base_outputs = model.layers[-2].output

In [None]:
final_outputs = layers.Dense(10)(base_outputs)

In [None]:
new_model = keras.Model(inputs=base_inputs, outputs=final_outputs)

In [None]:
new_model.summary()

In [None]:
new_model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(),
    metrics=['accuracy']
)

In [None]:
new_model.fit(x_train, y_train, batch_size=32, epochs=2, verbose=2)

In [None]:
new_model.evaluate(x_test, y_test, batch_size=32, verbose=2)

## Pretained Keras Model

In [None]:
x = tf.random.normal(shape=(5, 299, 299, 3))
y = tf.constant([0, 1, 2, 3, 4])

model = keras.applications.InceptionV3(include_top=True)
base_inputs = model.layers[0].input
base_output = model.layers[-2].output
final_outputs = layers.Dense(5)(base_outputs)
new_model = keras.Model(inputs=base_inputs, outputs=final_outputs)

In [None]:
new_model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(),
    metrics=['accuracy']
)

In [None]:
new_model.fit(x_train, y_train, batch_size=32, epochs=15, verbose=2)

In [None]:
new_model.evaluate(x_test, y_test, batch_size=32, verbose=2)

## Pretrained Hub Model

In [None]:
# import tensorflow_hub as hub

In [None]:
# x = tf.random.normal(shape=(5, 299, 299, 3))
# y = tf.constant([0, 1, 2, 3, 4])

In [None]:
# url = 'https:/// ... '

In [None]:
# base_model = hub.KerasLayer(url, input_shape=(299, 299, 3))
# best_model.trainable = False

In [None]:
# model = keras.Sequential(
#     [
#         base_model,
#         layers.Dense(128, activation='relu'),
#         layers.Dense(64, activation='relu'),
#         layers.Dense(5)
#     ]
# )

In [None]:
# new_model.compile(
#     loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
#     optimizer=keras.optimizers.Adam(),
#     metrics=['accuracy']
# )

In [None]:
# new_model.fit(x_train, y_train, batch_size=32, epochs=15, verbose=2)

# 10. TensorFlow Datasets

# 11. Data Augmentation

## Imports

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
(ds_train, ds_test), ds_info = tfds.load(
    'cifar10',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True, # will return tuple (img, label) otherwise dict
    with_info=True # able to get info about dataset
)

In [None]:
def normalize_img(image, label):
    """Normalize imgages"""
    return tf.cast(image, tf.float32) / 255.0, label

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 32

In [None]:
# its done in paralell while model is training
def augment(image, label):
    new_height = new_width = 32
    image = tf.image.resize(image, (new_height, new_width))

    if tf.random.uniform((), minval=0, maxval=1) < 0.1:
        image = tf.tile(tf.image.rgb_to_grayscale(image), [1, 1, 3])

    image = tf.image.random_brightness(image, max_delta=0.1)
    image = tf.image.random_contrast(image, lower=0.1, upper=0.2)

    image = tf.image.random_flip_left_right(image) # 50% randomly flip it
    # image = tf.image.random_flip_up_down(image) # 50% randomly flip it

    return image, label

In [None]:
# Setup for train dataset
ds_train = ds_train.map(normalize_img, num_parallel_calls=AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
# ds_train = ds_train.map(augment, num_parallel_calls=AUTOTUNE)
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(AUTOTUNE)

In [None]:
# Setup for test dataset
ds_test = ds_train.map(normalize_img, num_parallel_calls=AUTOTUNE)
ds_test = ds_train.batch(BATCH_SIZE)
ds_test = ds_train.prefetch(AUTOTUNE)

In [None]:
# TF >= 2.3.0
# data_augmentation = keras.Sequential(
#     [
#         layers.experimental.preprocessing.Resizing(height=32, width=32),
#         layers.experimental.preprocessing.RandomFlip(mode='horizontal'),
#         layers.experimental.preprocessing.RandomContrast(factor=0.1)
#     ]
# )

model = keras.Sequential(
    [
        keras.Input((32, 32, 3)),
        layers.Conv2D(4, 3, padding='same', activation='relu'),
        layers.Conv2D(8, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(16, 3, padding='same', activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10)
    ]
)

# 12. Callback with Keras and Custom Callbacks

In [None]:
# Callback it is the way to customize the behaviour of the model during either training or evaluation

## Imports

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

## Data

In [None]:
(ds_train, ds_test), ds_info = tfds.load(
    'mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True
)

In [None]:
def normalize_img(image, label):
    return tf.cast(image, tf.float32) / 255.0, label

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 128

In [None]:
ds_train = ds_train.map(normalize_img, num_parallel_calls=AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(AUTOTUNE)

## Model

In [None]:
model = keras.Sequential(
    [
        keras.Input((28, 28, 1)),
        layers.Conv2D(32, 3, activation='relu'),
        layers.Flatten(),
        layers.Dense(10)
    ]
)

In [None]:
# You can save model after training, but for that you need to save it after all of all epochs
# What if you want to save it during training
# You want to save the best model so far

In [None]:
save_callback = keras.callbacks.ModelCheckpoint(
    'checkpoint/model.weights.h5', 
    save_weights_only=True, 
    monitor='accuracy',
    save_best_only=False
)

In [None]:
# Imagine that we want to change learning rate during epoch progresses

In [None]:
def scheduler(epoch, lr):
    if epoch < 2:
        return lr
    else:
        return lr * 0.99 # decreasing by 1% every epoch

In [None]:
lr_scheduler = keras.callbacks.LearningRateScheduler(scheduler, verbose=1)

In [None]:
class CustomCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs.get('accuracy') > 0.90:
            print('Accuracy over 90%, quitting training')
            self.model.stop_training = True

In [None]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [None]:
model.fit(ds_train, epochs=10, verbose=2, callbacks=[save_callback, lr_scheduler, CustomCallback()])

# 13. Customizing Model.fit()

## Imports

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [2]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

## Data

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

## Model

In [4]:
model = keras.Sequential(
    [
        layers.Input(shape=(28, 28, 1)),
        layers.Conv2D(64, (3, 3), padding="same"),
        layers.ReLU(),
        layers.Conv2D(128, (3, 3), padding="same"),
        layers.ReLU(),
        layers.Flatten(),
        layers.Dense(10),
    ],
    name="model",
)


2024-04-13 13:02:14.928964: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-04-13 13:02:14.928985: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-04-13 13:02:14.928991: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-04-13 13:02:14.929007: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-04-13 13:02:14.929016: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [5]:
class CustomFit(keras.Model):
    def __init__(self, model):
        super(CustomFit, self).__init__()
        self.model = model

    def compile(self, optimizer, loss):
        super(CustomFit, self).compile()
        self.optimizer = optimizer
        self.loss = loss

    def train_step(self, data):
        x, y = data

        with tf.GradientTape() as tape:
            y_pred = self.model(x, training=True)
            loss = self.loss(y, y_pred)
            # loss = self.compiled_loss(y, y_pred)

        training_vars = self.trainable_variables
        gradients = tape.gradient(loss, training_vars)

        self.optimizer.apply_gradients(zip(gradients, training_vars))
        acc_metric.update_state(y, y_pred)
        # self.compiled_metrics.update_state(y, y_pred)

        return {'loss': loss, 'accuracy': acc_metric.result()}
        # return {m.name: m.result() for m in self.metrics}

    def test_step(self, data):
        x, y = data

        y_pred = self.model(x, training=False)
        loss = self.loss(y, y_pred)
        acc_metric.update_state(y, y_pred)

        return {'loss': loss, 'accuracy': acc_metric.result()}

In [6]:
acc_metric = keras.metrics.SparseCategoricalAccuracy(name='accuracy')

In [7]:
training = CustomFit(model)

In [8]:
training.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.Adam(),
    # metrics=['accuracy']
)

In [9]:
training.fit(x_train, y_train, batch_size=32, epochs=2)

Epoch 1/2


2024-04-13 13:02:19.238541: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 19ms/step - accuracy: 0.9376 - loss: 0.1071
Epoch 2/2
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 19ms/step - accuracy: 0.9742 - loss: 0.0409


<keras.src.callbacks.history.History at 0x168f76d50>

In [11]:
training.evaluate(x_test, y_test, batch_size=32)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9780 - loss: 0.0438


0.0

# 14. Custom training loops

## Imports

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds

## Data

In [2]:
(ds_train, ds_test), ds_info = tfds.load(
    'mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True
)

2024-04-13 16:48:15.507298: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-04-13 16:48:15.507320: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-04-13 16:48:15.507325: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-04-13 16:48:15.507339: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-04-13 16:48:15.507348: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
def normalize_img(image, label):
    return tf.cast(image, tf.float32) / 255.0, label

In [4]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 128

In [5]:
ds_train = ds_train.map(normalize_img, num_parallel_calls=AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(AUTOTUNE)

In [14]:
ds_test = ds_train.map(normalize_img, num_parallel_calls=AUTOTUNE)
ds_test = ds_train.batch(BATCH_SIZE)
ds_test = ds_train.prefetch(AUTOTUNE)

## Model

In [6]:
model = keras.Sequential(
    [
        keras.Input((28, 28, 1)),
        layers.Conv2D(32, 3, activation='relu'),
        layers.Flatten(),
        layers.Dense(10, activation='softmax')
    ]
)

In [8]:
num_epochs = 5
optimizer = keras.optimizers.Adam()
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
acc_metric = keras.metrics.SparseCategoricalAccuracy()

In [12]:
# Training loop
for epoch in range(num_epochs):
    print(f'\nStart of Training Epoch {epoch}')
    for batch_idx, (x_batch, y_batch) in enumerate(ds_train):
        with tf.GradientTape() as tape:
            y_pred = model(x_batch, training=True)
            loss = loss_fn(y_batch, y_pred)

        gradients = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(gradients, model.trainable_weights))
        acc_metric.update_state(y_batch, y_pred)

    train_acc = acc_metric.result()
    print(f'Accuracy over epoch {train_acc}')
    acc_metric.reset_state()


Start of Training Epoch 0


  output, from_logits = _get_logits(
2024-04-13 16:52:27.714671: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Accuracy over epoch 0.9503417015075684

Start of Training Epoch 1


2024-04-13 16:52:42.298148: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Accuracy over epoch 0.9812000393867493

Start of Training Epoch 2


2024-04-13 16:52:57.358792: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Accuracy over epoch 0.985366702079773

Start of Training Epoch 3


2024-04-13 16:53:12.284693: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Accuracy over epoch 0.9875500202178955

Start of Training Epoch 4
Accuracy over epoch 0.9899500012397766


2024-04-13 16:53:26.707469: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [15]:
# Test loop
for batch_idx, (x_batch, y_batch) in enumerate(ds_test):
    y_pred = model(x_batch, training=True)
    acc_metric.update_state(y_batch, y_pred)

train_acc = acc_metric.result()
print(f'Accuracy over Test Set {train_acc}')
acc_metric.reset_state()

Accuracy over Test Set 0.9919999837875366


2024-04-13 16:57:43.991605: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


# 15. Complete TensorBoard Guide

# 16. Custom Dataset For Images

# 17. Custom Dataset for text

# 18. Classifying Skin Cancer

# The End

It was a quick highlevel TensorFlow overview. It is a highlevel part of my knowledge about Neural Networks implementaion. I also have knowledge of how things work under the hood