# Custom A loss function

## Imports

In [116]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
import tensorflow_datasets as tfds

## Define data and its loss function

- inputs: xs
- outputs: ys
- formula: y = 2x - 1

- root mean square error (RMSE) of  (y_true - y_pred)

In [21]:
#inputs
xs = np.array([-1.0,  0.0, 1.0, 2.0, 3.0, 4.0], dtype='float32')

#outputs
ys = np.array(2*xs - 1)

print(ys)

[-3. -1.  1.  3.  5.  7.]


In [22]:
def my_rmse(y_true, y_pred):
    err = y_true - y_pred
    err_sq = K.square(err)
    mean_err_sq = K.mean(err_sq)
    rmse = K.sqrt(mean_err_sq)
    return rmse

## Define a model

In [23]:
model = tf.keras.Sequential([tf.keras.layers.Dense(units=1, input_shape = [1])])
model.compile(optimizer='sgd',
             loss=my_rmse)

In [33]:
model.fit(xs, ys, epochs=500, verbose=0)

<keras.src.callbacks.history.History at 0x2f312a840>

In [34]:
test = np.array([float(10)])

In [35]:
print(model.predict(test))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[[18.97395]]


# Custom a quadratic layer
- build a custom quadratic layer which computes $y = ax^2 + bx + c$.
- plugged into a model that will be trained on the MNIST dataset.
- ## `__init__`
- call `super(my_fun, self)` to access the base class of `my_fun`, and call the `__init__()` function to initialize that base class.  In this case, `my_fun` is `SimpleQuadratic` and its base class is `Layer`.
- self.units: set this using one of the function parameters.
- self.activation: The function parameter `activation` will be passed in as a string.  To get the tensorflow object associated with the string, please use `tf.keras.activations.get()` 


## `build`
The following are suggested steps for writing your code.  If you prefer to use fewer lines to implement it, feel free to do so.  Either way, you'll want to set `self.a`, `self.b` and `self.c`.

- a_init: set this to tensorflow's `random_normal_initializer()`
- a_init_val: Use the `random_normal_initializer()` that you just created and invoke it, setting the `shape` and `dtype`.
    - The `shape` of `a` should have its row dimension equal to the last dimension of `input_shape`, and its column dimension equal to the number of units in the layer.  
    - This is because you'll be matrix multiplying x^2 * a, so the dimensions should be compatible.
    - set the dtype to 'float32'
- self.a: create a tensor using tf.Variable, setting the initial_value and set trainable to True.

- b_init, b_init_val, and self.b: these will be set in the same way that you implemented a_init, a_init_val and self.a
- c_init: set this to `tf.zeros_initializer`.
- c_init_val: Set this by calling the tf.zeros_initializer that you just instantiated, and set the `shape` and `dtype`
  - shape: This will be a vector equal to the number of units.  This expects a tuple, and remember that a tuple `(9,)` includes a comma.
  - dtype: set to 'float32'.
- self.c: create a tensor using tf.Variable, and set the parameters `initial_value` and `trainable`.

## `call`
The following section performs the multiplication x^2*a + x*b + c.  The steps are broken down for clarity, but you can also perform this calculation in fewer lines if you prefer.
- x_squared: use tf.math.square()
- x_squared_times_a: use tf.matmul().  
  - If you see an error saying `InvalidArgumentError: Matrix size-incompatible`, please check the order of the matrix multiplication to make sure that the matrix dimensions line up.
- x_times_b: use tf.matmul().
- x2a_plus_xb_plus_c: add the three terms together.
- activated_x2a_plus_xb_plus_c: apply the class's `activation` to the sum of the three terms.


## Define a quadratic layer

In [76]:
class SimpleQuadratic(tf.keras.layers.Layer):

    def __init__(self, units=32, activation=None):

        super(SimpleQuadratic, self).__init__()
        self.units = units
        self.activation = tf.keras.activations.get(activation)

    def build(self, input_shape):
        a_init = tf.random_normal_initializer()
        a_init_val = a_init(shape=(input_shape[-1], self.units), dtype='float32')
        self.a = tf.Variable(initial_value=a_init_val, trainable=True)

        b_init = tf.random_normal_initializer()
        b_init_val = b_init(shape=(input_shape[-1], self.units), dtype='float32')
        self.b = tf.Variable(initial_value=b_init_val, trainable=True)

        c_init = tf.zeros_initializer()
        c_init_val = c_init(shape=(self.units,), dtype='float32') #注意shape=（x,)
        self.c = tf.Variable(initial_value=c_init_val, trainable=True)

    def call(self, inputs):
        x_square = tf.math.square(inputs)
        x_a = tf.matmul(x_square, self.a)
        x_b = tf.matmul(inputs, self.b)
        x_result = x_a + x_b + self.c
        return self.activation(x_result)

## Try MNIST dataset to apply custom layer

In [77]:
mnist = tf.keras.datasets.mnist

In [78]:
(x_train, y_train),(x_test, y_test) = mnist.load_data()

In [79]:
x_train, x_test = x_train/255., x_test/255.

In [80]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(shape=[28,28]),
    tf.keras.layers.Flatten(),
    SimpleQuadratic(units=128, activation='relu'), #Apply custom layer
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')])
    

In [81]:
model.compile(optimizer='adam',
             loss = 'sparse_categorical_crossentropy',
             metrics=['accuracy'])

In [82]:
model.fit(x_train, y_train, epochs=8)

Epoch 1/8
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 323us/step - accuracy: 0.5679 - loss: 1.4259
Epoch 2/8
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 324us/step - accuracy: 0.7958 - loss: 0.6766
Epoch 3/8
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 322us/step - accuracy: 0.8099 - loss: 0.6115
Epoch 4/8
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 333us/step - accuracy: 0.8161 - loss: 0.5895
Epoch 5/8
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 318us/step - accuracy: 0.8203 - loss: 0.5757
Epoch 6/8
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 317us/step - accuracy: 0.8208 - loss: 0.5680
Epoch 7/8
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 317us/step - accuracy: 0.8197 - loss: 0.5678
Epoch 8/8
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 322us/step - accuracy: 0.8191 - loss: 0.5698


<keras.src.callbacks.history.History at 0x33b202690>

In [83]:
model.evaluate(x_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 304us/step - accuracy: 0.8497 - loss: 0.4891


[0.42426013946533203, 0.8718000054359436]

# Custom a model

## Create named-variables dynamically

In [84]:
class MyClass:
    def __init__(self):
        self.var1=1

In [85]:
obj1 = MyClass()

In [86]:
obj1.__dict__

{'var1': 1}

In [89]:
obj1.var2 = 2

In [90]:
obj1.__dict__

{'var1': 1, 'var2': 2}

In [91]:
obj1.var3 = 3
obj1.__dict__

{'var1': 1, 'var2': 2, 'var3': 3}

### vars() function 
If you call vars() and pass in an object, it will call the object's __dict__ attribute, which is a Python dictionary containing the object's instance variables and their values as ke

In [92]:
vars(obj1)

{'var1': 1, 'var2': 2, 'var3': 3}

In [93]:
vars(obj1)['var3']=5

In [94]:
vars(obj1)

{'var1': 1, 'var2': 2, 'var3': 5}

In [95]:
for i in range(4,10):
    vars(obj1)[f'var{i}'] = 0
vars(obj1)

{'var1': 1,
 'var2': 2,
 'var3': 5,
 'var4': 0,
 'var5': 0,
 'var6': 0,
 'var7': 0,
 'var8': 0,
 'var9': 0}

## Build a VGG block

### `__init__`
In the constructor `__init__`, store the conv2D parameters and also define the number of conv2D layers using the parameters passed into `__init__`.
- Store the filters, kernel_size, and repetitions as class variables so that they can be used later in the `call` function.
- Using a for loop, define a number of Conv2D [Conv2D](https://keras.io/api/layers/convolution_layers/convolution2d/) layers, based on the number of `repetitions` desired for this block.
    - You can define each conv2D layer using `vars` and string formatting to create conv2D_0, conv2D_1, conv2D_3 etc.
    - Set these four parameters of Conv2D:
        - filters
        - kernel_size
        - activation: set this to 'relu'
        - padding: set this to 'same' (default pading is 'valid').
        
- Define the [MaxPool2D](https://keras.io/api/layers/pooling_layers/max_pooling2d/) layer that follows these Conv2D layers. 
    - Set the following parameters for MaxPool2D:
        - pool_size: this will be a tuple with two values.
        - strides: this will also be a tuple with two values.

### `call`
In `call`, you will connect the layers together.
- The 0-th conv2D layer, `conv2D_0`, immediately follows the `inputs`.
- For conv2D layers 1,2 and onward, you can use a for loop to connect conv2D_1 to conv2D_0, and connect conv2D_2 to conv2D_1, and so on.
- After connecting all of the conv2D_i layers, add connect the max_pool layer and return the max_pool layer.

In [96]:
class Block(tf.keras.Model):
    def __init__(self, filters, kernel_size, repetitions, 
                pool_size=2, strides=2):
        super(Block, self).__init__()
        self.filters = filters
        self.kernel_size = kernel_size
        self.repetitions = repetitions

        for i in range(self.repetitions):
            vars(self)[f'conv2D_{i}'] = tf.keras.layers.Conv2D(filters, kernel_size,
                                                              padding='same',
                                                              activation='relu')
        self.max_pool = tf.keras.layers.MaxPool2D(pool_size, strides)

    def call(self, inputs):
        conv2D_0 = self.conv2D_0
        x = conv2D_0(inputs)
        for i in range(1, self.repetitions):
            conv2D_i = vars(self)[f'conv2D_{i}']
            x = conv2D_i(x)
        max_pool = self.max_pool(x)
        return max_pool

## Build a VGG Network

### `__init__`
- Recall that the `__init__` constructor of `Block` takes several function parameters, 
    - filters, kernel_size, repetitions: you'll set these.
    - kernel_size and strides: you can use the default values.
- For blocks a through e, build the blocks according to the following specifications:
- block_a: 64  filters, kernel_size 3, repetitions 2
- block_b: 128 filters, kernel_size 3, repetitions 2
- block_c: 256 filters, kernel_size 3, repetitions 3
- block_d: 512 filters, kernel_size 3, repetitions 3
- block_e: 512 filters, kernel_size 3, repetitions 3

After block 'e', add the following layers:
- flatten: use [Flatten](https://keras.io/api/layers/reshaping_layers/flatten/).
- fc: create a fully connected layer using [Dense](https://keras.io/api/layers/core_layers/dense/).  Give this 256 units, and a `'relu'` activation.
- classifier: create the classifier using a Dense layer.  The number of units equals the number of classes.  For multi-class classification, use a `'softmax'` activation.

### `call`
Connect these layers together using the functional API syntax:
- inputs
- block_a
- block_b
- block_c
- block_d
- block_e
- flatten
- fc
- classifier

Return the classifier layer.

In [103]:
class MyVGG(tf.keras.models.Model):
    def __init__(self, num_classes):
        super(MyVGG, self).__init__()
        self.block_a = Block(64, 3, 2)
        self.block_b = Block(128, 3, 2)
        self.block_c = Block(256, 3, 3)
        self.block_d = Block(512, 3, 3)
        self.block_e = Block(512, 3, 3)

        self.flatten = tf.keras.layers.Flatten()
        self.fc = tf.keras.layers.Dense(256, activation='relu')
        self.classifier = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, inputs):
        x = self.block_a(inputs)
        x = self.block_b(x)
        x = self.block_c(x)
        x = self.block_d(x)
        x = self.block_e(x)
        x = self.flatten(x)
        x = self.fc(x)
        x = self.classifier(x)
        return x

In [115]:

# Initialize VGG with the number of classes 
vgg = MyVGG(num_classes=2)

# Compile with losses and metrics
vgg.compile(optimizer='adam', 
            loss='sparse_categorical_crossentropy', 
            metrics=['accuracy'])


In [None]:
# # For reference only. Please do not uncomment in Coursera Labs because it might cause the grader to time out.
# # You can upload your notebook to Colab instead if you want to try the code below.

# # Download the dataset
# dataset = tfds.load('cats_vs_dogs', split=tfds.Split.TRAIN, data_dir='data/')

# # Initialize VGG with the number of classes 
# vgg = MyVGG(num_classes=2)

# # Compile with losses and metrics
# vgg.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# # Define preprocessing function
# def preprocess(features):
#     # Resize and normalize
#     image = tf.image.resize(features['image'], (224, 224))
#     return tf.cast(image, tf.float32) / 255., features['label']

# # Apply transformations to dataset
# dataset = dataset.map(preprocess).batch(32)

# # Train the custom VGG model
# vgg.fit(dataset, epochs=10)