In [1]:
# Huber loss
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
def huber_fn(y_true, y_pred):
    error = y_true-y_pred
    is_small_error = tf.abs(error)<1
    squared_loss = tf.square(error)/2
    linear_loss = tf.abs(error) - 0.5
    return tf.where(is_small_error, squared_loss, linear_loss)
# where( a, b, c)は aのTrueに対応する要素でb
#                   aのFalseに対応する要素でcを代入して新しいtensor.arrayを返す

In [22]:
# compile the model with huber_loss
model.compile(loss=huber_fn, optimizer="nadam")
model.fit(X_train, y_train, [...])


In [23]:
# How to save and load models containing custom components
# load custom_objects = {"xx":...}

# model = keras.models.load_model("my_model_with_a_custom_loss.h5",
#                                  custom_objects={"huber_fn": huber_fn})

# change the threshold
def create_huber(threshold=1.0):
    def huber_fn(y_true, y_pred):
        error = y_true-y_pred
        is_small_error = tf.abs(error)< threshold
        squared_loss = tf.square(error)/2
        linear_loss = threshold * tf.abs(error) - threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    return huber_fn
# threshold is not saved
# model = keras.models.load_model("my_model_with_a_custom_loss.h5",
#                                  custom=objects={"huber_fn":create_huber(2.0)})

In [26]:
create_huber(threshold=2.0)(y_true, y_pred)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.5, 1.5],
       [0.5, 0.5]], dtype=float32)>

In [None]:
class HuberLoss(keras.losses.loss):
    def __init__(self, threshold=1.0, **kwargs):
        self.threshold = threshold
        super().__init__(**kwargs)
    def call(self, y_true, y_pred):
         error = y_true-y_pred
        is_small_error = tf.abs(error)< self.threshold
        squared_loss = tf.square(error)/2
        linear_loss = self.threshold *tf.abs(error)-self.threshold**2/2
    def get_config(self):
        base_config = super.get_config()
        return {**base_config, "threshold":self.threshold}
        
# explanation of this code 
# P497-498

In [29]:
# example of kwargs
# **kwargs →assuming dictionary typed input
def print_person_info(**kwargs):
    for key, value in kwargs.items():
        print(key + ": " + str(value))

print_person_info(name="Alice", age=25, city="New York")

name: Alice
age: 25
city: New York


In [None]:
# compile the model
model.compile(loss=HuberLoss(2.), optimizer="nadam")
model = keras.models.load_models("my_model_with_a_custom_loss_class.h5",
                                  custom_objects={"HuberLoss": HuberLoss})

In [39]:
# an example of a custom activation function
#  a custom glorot_inirializer
#  a custom l1 regularizer
#  a custom constraint that ensures weights to be all positive
def my_softplus(z): # return value is tf.nn.softplus(z)
    return tf.math.log(tf.exp(z)+1.0)
def my_glorot_initializer(shape, dtype=tf.float32):
    stddev = tf.sqrt(2. /(shape[0]+shape[1]))
    return tf.random.normal(shape, stddev=stddev, dtype=dtype)
def my_l1_regularizer(weights):
    return tf.reduce_sum(tf.abs(0.01 * weights))
def my_positive_weights(weights): # return value is just tf.nn.relu(weights)
    return tf.where(weights< 0., tf.zeros_like(weights), weights)
    

In [32]:
layer = keras.layers.Dense(30, activation=my_softplus,
                           kernel_initializer= my_glorot_initializer,
                           kernel_regularizer = my_l1_regularizer,
                           kernel_constraint = my_positive_weights)
# if hyperparameters have to be saved, subclass should be used

class MyL1Regularizer(keras.regularizers.Regularizer):
    def __init__(self, factor):
        self.factor = factor
    def __call__(self, weights):
        return tf.reduce_sum(tf.abs(self.factor * weights))
    def get_config(self):
        return {"factor": self.factor}
# __call__ メソッドはclassを二回呼び出すと自動的に呼び出さる
# to use is 
# kernel_regularizer=MyL1Regularizer(factor=0.01)

In [38]:
# example:
class Adder:
    def __init__(self, num):
        self.num = num

    def __call__(self, x):
        return self.num + x

# クラスのインスタンスを関数として呼び出す
adder = Adder(5)
result = adder(3)
num=3
print(adder)
print(Adder(num)(2))

<__main__.Adder object at 0x000001DB68CFA550>
5


In [33]:
# examples for reduce_sum
x = tf.constant([[1, 2, 3],
                 [4, 5, 6]])

# 全ての要素の合計
total_sum = tf.reduce_sum(x)

# 列方向（軸=0）に要素の合計を計算
column_sum = tf.reduce_sum(x, axis=0)

# 行方向（軸=1）に要素の合計を計算
row_sum = tf.reduce_sum(x, axis=1)

# 結果の表示
print(total_sum)
print(column_sum)
print(row_sum)

tf.Tensor(21, shape=(), dtype=int32)
tf.Tensor([5 7 9], shape=(3,), dtype=int32)
tf.Tensor([ 6 15], shape=(2,), dtype=int32)


In [39]:
# Custom Metrics
# metrics：評価関数、微分不可能でもよく、モデルを評価する際だけ使う
# custom losses can be directly used as metrics
# example:
model.compile(loss="mse", optimizer="nadam", metrics=[create_huber(2.0)])

NameError: name 'model' is not defined

In [52]:
# for binary classification, the accuracy should be operated by:
precision = keras.metrics.Precision()
precision([0,1,1,1,0,1,0,1],[1,1,0,1,0,1,0,1])
# ([labels],[predictions])
# 4/5 = 0.8

<tf.Tensor: shape=(), dtype=float32, numpy=0.8>

In [53]:
precision([0, 1, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 0, 0, 0, 0])
# 4/(5+3) = 0.5
# batches are accumulated in precision

<tf.Tensor: shape=(), dtype=float32, numpy=0.5>

In [54]:
precision.result() # current value of the metric

<tf.Tensor: shape=(), dtype=float32, numpy=0.5>

In [57]:
precision.variables # tracking the number of true and false positives

[<tf.Variable 'true_positives:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>,
 <tf.Variable 'false_positives:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]

In [56]:
precision.reset_states() # reset both variables to 0

In [58]:
# subclass from keras.metrics.Metric class
def create_huber(threshold=1.0):
    def huber_fn(y_true, y_pred):
        error = y_true-y_pred
        is_small_error = tf.abs(error)< threshold
        squared_loss = tf.square(error)/2
        linear_loss = threshold * tf.abs(error) - threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    return huber_fn
# add_weight is from keras.metrics.Metric
# 
class HuberMetric(keras.metrics.Metric):
    def __init__(self, threshold=1.0, **kwargs):
        super().__init__(**kwargs)
        self.threshold = threshold
        self.huber_fn = create_huber(theshold)
        self.total = self.add_weight("total", initializer="zeros")
        self.count=self.add_weight("count",initializer="zeros")
    def update_state(self, y_true, y_pred, sample_weight=None):
        metric = self.huber_fn(y_true, y_pred)
        self.total.assign_add(tf.reduce_sum(metric))
        self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))
    def result(self):
        return self.total/self.count
    def get_config(self):
        base_config = super.get_config()
        return {**base_config, "threshold":self.threshold}
# the sum of all huber losses(total) and the number of instances seen so far(count)


In [59]:
# Custom Layers
exponential_layer = keras.layers.Lambda(lambda x: tf.exp(x))

In [65]:
# example of tf.exp()
uu = tf.Variable([3,3,3], dtype=tf.float32)
result = tf.exp(uu)

[20.085537 20.085537 20.085537]


In [66]:
# this exponential_layer could be used a an layer of exponential activation function
# equivalent to
# activation=tf.exp, activation = keras.activations.exponential, or activation="exponential"

# to build a custom stateful layer with weights, use subclassing
class MyDense(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super().__init__(**kwargs)
        self.units=units
        self.activation = keras.activations.get(activation)
    def build(self, batch_input_shape):
        self.kernel=self.add_weight(name="kernel",
                                    shape=[batch_input_shape[-1], self.units],
                                    initializer="glorot_normal")
        self.bias = self.add_weight(name="bias",
                                    shape=[self.units],
                                    initializer="zeros")
        super().build(batch_input_shape) # must be at the end
    def call(self, X):
        return self.activation(X @ self.kernel +self.bias)
    def compute_output_shape(self, batch_input_shape):
        return tf.TensorShape(batch_input_shape.as_list()[:-1]+[self.units])
    def get_config(self):
        base_config = super.get_config()
        return {**base_config, "units":self.units,
                 "activation":keras.activations.serialize(self.activation)}

In [67]:
# build multiple inputs layer
# the call() method should return the list of outputs
# compute_output_shape() should return the list of batch output shapes
class MyMultiLayer(keras.layers.Layer):
    def call(self, X):
        X1, X2 = X
        return [X1 + X2, X1 * X2, X1 / X2]
    def compute_output_shape(self, batch_input_shape):
        b1, b2 = batch_input_shape
        return [b1, b1, b1] # should probably handle broadcasting rules
# this layer takes two inputs and returns three outputs

In [68]:
# to provide different behaviors during training and during testing,
# adding a training argument to the call() methos and use this argument to decide
# what to do
class MyGaussianNoise(keras.layers.Layer):
    def __init__(self, stddev, **kwargs):
        super().__init__(**kwargs)
        self.stddev = stddev
    def call(self, X, training=None):
        if training:
            noise = tf.random.normal(tf.shape(X),stddev = self.stddev)
            return X +noise
        else:
            return X
    def compute_output_shape(self, batch_input_shape):
        return batch_input_shape

In [69]:
#Custom models
# P508
# build a ResidualBlockLayer
class ResidualBlock(keras.layers.Layer):
    def __init__(self, n_layers, n_neurons, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [keras.layers.Dense(n_neurons, activation="elu",
                       kernel_initializer="he_normal" )for _ in range(n_layers)]
    def call(self, inputs):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)  # this calculate the output (when inputs=Z )
        return inputs+Z  # add outputs of inputs and inputs together

In [None]:
class ResidualRegressor(keras.Model):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden1 = keras.layers.Dense(30, activation="elu",
                                          kernel_initializer="he_normal")
        self.block1 = ResidualBlock(2, 30)
        self.block2 = ResidualBlock(2, 30)
        self.out = keras.layers.Dense(output_dim)
    def call(self, inputs):
        Z = self.hidden1(inputs)
        for _ in range(1+3):    #最初のResidualBlockで3回繰り返す構造を作りたい
            Z = self.block1(Z)
        Z = self.block2(Z)
        return self.out(Z)
# implement get_config() method to make the model to be able to be saved

In [4]:
# custom model with a custom reconstruction loss:
class ReconstructingRegressor(keras.Model):
    def __init__(self, output_dim, **kwargs):
        super.__init__(**kwargs)
        self.hidden = [keras.layers.Dense(30, activation="selu",
                       kernel_initializer = "lecun_normal") for _ in range(5)]
        self.out = keras.layers.Dense(output_dim)
    def build(self, batch_input_shape):
        n_inputs=batch_input_shape[-1]
        self.reconstruct = keras.layers.Dense(n_inputs)
        super().build(batch_input_shape)
    def call(self, inputs):
        Z = inputs
        for layer in self.hidden:
            Z = layer(Z)
        reconstruction = self.reconstruct(Z) # ここでbuildメソッドが起動されて,batch_input_shape = Zで入る
        recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))
        self.add_loss(0.05 * recon_loss)
        return self.out(Z)
# interpretations 
# P511-512

In [5]:
#Autodiff

In [37]:
def f(w1, w2):
    return 3*w1**2 +2*w1*w2

In [7]:
#　近似的手法では
w1, w2=5,3
eps=1e-6 #微小量の変化
(f(w1+eps,w2)-f(w1,w2))/eps

36.000003007075065

In [8]:
(f(w1,w2+eps)-f(w1,w2))/eps

10.000000003174137

In [10]:
# using tensorflow's autodiff
w1, w2 = tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape:
    z=f(w1,w2)
gradients = tape.gradient(z, [w1,w2])

In [11]:
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [35]:
# tape.gradients get delected after one autodiff
with tf.GradientTape(persistent=True) as tape:
    z=f(w1,w2)
dzdw1 = tape.gradient(z,w1)
dzdw2 = tape.gradient(z,w2)
del tape #persistent後、free resources with del tape
print(dzdw1, dzdw2)

tf.Tensor(30.0, shape=(), dtype=float32) None


In [33]:
# tape will only track operations involving variables, 
# with c1 ,c2 = tf.constant(5.), tf.constant(3.)
# returns [None, None]
c1 ,c2 = tf.constant(5.), tf.constant(3.)
with tf.GradientTape() as tape:
    z=f(c1,c2)
gradients = tape.gradient(z, [c1,c2])
print(gradients)

[None, None]


In [36]:
# to make none-variables able to do autodiff
# set tape.watch(xx)
with tf.GradientTape() as tape:
    tape.watch(c1)
    tape.watch(c2)
    z = f(c1,c2)
gradients = tape.gradient(z,[c1,c2])
gradients
# 実応用例：inputsを基にしたloss計算(inputs はconstantだからtapeの必要がある)


[<tf.Tensor: shape=(), dtype=float32, numpy=30.0>, None]

In [19]:
# Second order derivatives
# P 515 and Github

In [20]:
# to stop gradients from backpropagating through some part of the neural network
def f(w1, w2):
    return 3*w1**2 +tf.stop_gradient(2*w1*w2)
with tf.GradientTape() as tape:
    z=f(w1,w2)
gradients = tape.gradient(z, [w1,w2])
gradients# returns [tensor=30, None]

[<tf.Tensor: shape=(), dtype=float32, numpy=30.0>, None]

In [30]:
# numerical issues like my_softplus() function for large inputs
with tf.GradientTape() as tape:
    z = f(w1, w2)

dz_dw1 = tape.gradient(z, w1)
try:
    dz_dw2 = tape.gradient(z, w2)
except RuntimeError as ex:
    print(ex)
# this is due to a floating-point precision errors

A non-persistent GradientTape can only be used to compute one set of gradients (or jacobians)


In [28]:
with tf.GradientTape(persistent=True) as tape:
    z = f(w1, w2)

dz_dw1 = tape.gradient(z, w1)
dz_dw2 = tape.gradient(z, w2) # works now!
del tape
print(dz_dw1, dz_dw2)

tf.Tensor(30.0, shape=(), dtype=float32) None


In [40]:
x = tf.Variable([100.])
with tf.GradientTape() as tape:
    z = my_softplus(x)

tape.gradient(z, [x])

[<tf.Tensor: shape=(1,), dtype=float32, numpy=array([nan], dtype=float32)>]

In [41]:
# dixed softplus function to solve the numerical problem
@tf.custom_gradient
def my_better_softplus(z):
    exp = tf.exp(z)
    def my_softplus_gradients(grad):
        return grad / (1 + 1 / exp)
    return tf.math.log(exp + 1), my_softplus_gradients


In [42]:
x = tf.Variable([1000.])
with tf.GradientTape() as tape:
    z = my_better_softplus(x)

z, tape.gradient(z, [x])

(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([inf], dtype=float32)>,
 [<tf.Tensor: shape=(1,), dtype=float32, numpy=array([1.], dtype=float32)>])

In [43]:
# Custom Training Loops
# example: implement two different optimizers for Wide & Deep neural network
# first, build a simple model
l2_reg = keras.regularizers.l2(0.05)
model = keras.models.Sequential([
    keras.layers.Dense(30, activation="elu", kernel_initializer="he_normal",
                       kernel_regularizer=l2_reg),
    keras.layers.Dense(1, kernel_regularizer=l2_reg)
])


In [44]:
# a function that randomly sample a batch of instances from the training set
def random_batch(X, y, batch_size=32):
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]


In [49]:
# a function that will display the training status
def print_status_bar(iteration, total, loss, metrics=None):
    metrics= " - ".join(["{}:{:.4f}".format(m.name, m.result())
                         for m in [loss]+(metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total)+metrics, end=end)
    

In [51]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target.reshape(-1, 1), random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

In [52]:
# define some hyperparameters and choose the optimizer, the loss function, and the metrics
n_epochs=5
batch_size = 32
n_steps=len(X_train)//batch_size
optimizer = keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = keras.losses.mean_squared_error
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.MeanAbsoluteError()]

# build the custom loop ( equivalent to .fit)
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train_scaled, y_train)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)#要素ごと加算する
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        for variable in model.variables:
            if variable.constraint is not None:
                variable.assign(variable.constraint(variable))
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
    for metric in [mean_loss] + metrics:
        metric.reset_states()
# tqdm method to build the custom loop
# Github In[204]
            
            

Epoch 1/5
11610/11610 - mean:1.4502 - mean_absolute_error:0.5950
Epoch 2/5
11610/11610 - mean:0.6594 - mean_absolute_error:0.5270
Epoch 3/5
11610/11610 - mean:0.6385 - mean_absolute_error:0.5215
Epoch 4/5
11610/11610 - mean:0.6509 - mean_absolute_error:0.5261
Epoch 5/5
11610/11610 - mean:0.6554 - mean_absolute_error:0.5266


In [53]:
# Tensorflow functions and Graphs
def cube(x):
    return x**3
cube(2)

8

In [54]:
cube(tf.constant(2.0))

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

In [56]:
tf_cube = tf.function(cube)
tf_cube

<tensorflow.python.eager.polymorphic_function.polymorphic_function.Function at 0x272d4a922e0>

In [57]:
tf_cube(2)

<tf.Tensor: shape=(), dtype=int32, numpy=8>

In [58]:
tf_cube(tf.constant(2.0))

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

In [75]:
# @tf.function as a decoration

@tf.function
def tf_cube(x):
    return x**3

In [76]:
tf_cube.python_function(2)

8

In [77]:
tf_cube(2)



<tf.Tensor: shape=(), dtype=int32, numpy=8>

In [78]:
tf_cube(tf.constant([10,20])) # if tf_cube([10, 20]), error



<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1000, 8000])>

In [79]:
# more details and tips P522-523
# TF function is most of the time faster especially if it performs complex computations
concrete_function = tf_cube.get_concrete_function(tf.constant(2.0))

In [80]:
concrete_function.graph

<tensorflow.python.framework.func_graph.FuncGraph at 0x272d6d8aee0>

In [81]:
tf_cube(10)

<tf.Tensor: shape=(), dtype=int32, numpy=1000>

In [82]:
tf.autograph.to_code(sum_squares.python_function)

NameError: name 'sum_squares' is not defined