# Keras Customization: Loss and Activation Functions

Your functions must be defined with TensorFlow graph commands.  The derivative will be taken automatically. (assuming all components of your function are differentiable)

# TensorFlow for Calculation

In [4]:
import tensorflow as tf

tf.multiply(tf.constant(2),tf.constant(5))

<tf.Tensor: id=16, shape=(), dtype=int32, numpy=10>

In [5]:
import numpy as np
tf.multiply(np.array([2,4]),np.array([2,4]))

<tf.Tensor: id=20, shape=(2,), dtype=int64, numpy=array([ 4, 16])>

In [7]:
tf.multiply(2.0,4.0)

<tf.Tensor: id=28, shape=(), dtype=float32, numpy=8.0>

In [8]:
tf.divide(2,4)

0.5

In [10]:
tf.pow(2,4)

<tf.Tensor: id=36, shape=(), dtype=int32, numpy=16>

In [11]:
x = 5.0
y = tf.divide(1.0,tf.add(1,tf.exp(tf.negative(x))))
y

<tf.Tensor: id=44, shape=(), dtype=float32, numpy=0.9933072>

# Calculus with TensorFlow

How do we take derivatives?

* [Symbolic differentiation](http://tutorial.math.lamar.edu/pdf/common_derivatives_integrals.pdf)
* [Numerical differentiation](https://en.wikipedia.org/wiki/Finite_difference) (the method of finite differences)
* [Automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation)

Take the derivative of $f(x) = x^2$.

Symbolic derivative $f'(x) = rx^{r-1}$

$f(4) = 4^2 = 16$

$f'(4) = 2 \cdot 4 = 8$

This can be done in TensorFlow:

In [12]:
x = tf.constant(4.0)

with tf.GradientTape() as t:
  t.watch(x)
  z = tf.multiply(x, x)

# Derivative of z with respect to the original input tensor x
dz_dx = t.gradient(z, x)
print(dz_dx)

tf.Tensor(8.0, shape=(), dtype=float32)


Lets express the [Logistic function](https://en.wikipedia.org/wiki/Logistic_function) in TensorFlow. This is also called the Sigmoid Activation function in neural network literature.

$f(x) = \frac{1}{1 + e^{-x}}$

Written in TensorFlow:

In [13]:
x = tf.constant([5.0])
with tf.GradientTape() as t:
    t.watch(x)
    y = tf.divide(1.0,tf.add(1,tf.exp(tf.negative(x))))
    
print(y)
dy_dx = t.gradient(y, x)
print(dy_dx)

tf.Tensor([0.9933072], shape=(1,), dtype=float32)
tf.Tensor([0.00664806], shape=(1,), dtype=float32)


Lets check the regular function.

In [14]:
import math

1/(1+math.exp(-5))

0.9933071490757153

And lets check the derivative:
    
$f'(x) = \frac{e^x}{(e^x + 1)^2}$


In [15]:
math.exp(-5)/(math.exp(-5)+1)**2

0.006648056670790156

In [None]:
x = tf.ones((2, 2))
y = tf.reduce_sum(x)
z = tf.multiply(y, y)
y.numpy()

How to take second (and beyond) derivatives:

In [None]:
x = tf.constant(3.0)
with tf.GradientTape() as g:
  g.watch(x)
  with tf.GradientTape() as gg:
    gg.watch(x)
    y = x * x
  dy_dx = gg.gradient(y, x)     # Will compute to 6.0
d2y_dx2 = g.gradient(dy_dx, x)  # Will compute to 2.0

# Custom Loss (Objective) Function

$ \operatorname{RMSE}=\sqrt{\frac{\sum_{t=1}^T (\hat y_t - y_t)^2}{T}} $

In [16]:
def mean_pred(y_true, y_pred):
    return tf.sqrt(tf.divide(tf.reduce_sum(tf.pow(tf.subtract(y_true, y_pred),2.0)),tf.cast(tf.size(y_true), tf.float32)))

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
import pandas as pd
import io
import os
import requests
import numpy as np
from sklearn import metrics

df = pd.read_csv(
    "https://data.heatonresearch.com/data/t81-558/auto-mpg.csv", 
    na_values=['NA', '?'])

cars = df['name']

# Handle missing value
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].median())

# Pandas to Numpy
x = df[['cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'year', 'origin']].values
y = df['mpg'].values # regression

# Build the neural network
model = Sequential()
model.add(Dense(25, input_dim=x.shape[1], activation='relu')) # Hidden 1
model.add(Dense(10, activation='relu')) # Hidden 2
model.add(Dense(1)) # Output
model.compile(loss=mean_pred, optimizer='adam')
model.fit(x,y,verbose=2,epochs=100)

W0913 08:17:40.330691 140735774155648 deprecation.py:323] From /Users/jheaton/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1205: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 398 samples
Epoch 1/100
398/398 - 0s - loss: 824.3803
Epoch 2/100
398/398 - 0s - loss: 489.1181
Epoch 3/100
398/398 - 0s - loss: 156.0873
Epoch 4/100
398/398 - 0s - loss: 60.6038
Epoch 5/100
398/398 - 0s - loss: 26.5777
Epoch 6/100
398/398 - 0s - loss: 19.7083
Epoch 7/100
398/398 - 0s - loss: 14.9978
Epoch 8/100
398/398 - 0s - loss: 14.2451
Epoch 9/100
398/398 - 0s - loss: 14.1308
Epoch 10/100
398/398 - 0s - loss: 13.8209
Epoch 11/100
398/398 - 0s - loss: 13.5140
Epoch 12/100
398/398 - 0s - loss: 13.1198
Epoch 13/100
398/398 - 0s - loss: 13.1923
Epoch 14/100
398/398 - 0s - loss: 13.2130
Epoch 15/100
398/398 - 0s - loss: 12.4092
Epoch 16/100
398/398 - 0s - loss: 11.9362
Epoch 17/100
398/398 - 0s - loss: 11.7038
Epoch 18/100
398/398 - 0s - loss: 10.8634
Epoch 19/100
398/398 - 0s - loss: 10.4383
Epoch 20/100
398/398 - 0s - loss: 10.1428
Epoch 21/100
398/398 - 0s - loss: 10.1045
Epoch 22/100
398/398 - 0s - loss: 9.5447
Epoch 23/100
398/398 - 0s - loss: 9.1451
Epoch 24/100
398/398 

<tensorflow.python.keras.callbacks.History at 0x1a2da148d0>

# Custom Activation (Transfer) Functions

In [18]:
import tensorflow as tf
def elliot_sym(x):
    return tf.divide(x,tf.add(1.0,tf.abs(x)))     

In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
import pandas as pd
import io
import os
import requests
import numpy as np
from sklearn import metrics

df = pd.read_csv(
    "https://data.heatonresearch.com/data/t81-558/auto-mpg.csv", 
    na_values=['NA', '?'])

cars = df['name']

# Handle missing value
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].median())

# Pandas to Numpy
x = df[['cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'year', 'origin']].values
y = df['mpg'].values # regression

# Build the neural network
sgd = tf.keras.optimizers.SGD(lr=1e-10, decay=1e-6, momentum=0.9, nesterov=True)
model = Sequential()
model.add(Dense(25, input_dim=x.shape[1], activation=elliot_sym)) # Hidden 1
model.add(Dense(10, activation=elliot_sym)) # Hidden 2
model.add(Dense(1)) # Output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x,y,verbose=2,epochs=400)

Train on 398 samples
Epoch 1/400
398/398 - 0s - loss: 577.1784
Epoch 2/400
398/398 - 0s - loss: 554.4562
Epoch 3/400
398/398 - 0s - loss: 522.0445
Epoch 4/400
398/398 - 0s - loss: 504.1360
Epoch 5/400
398/398 - 0s - loss: 490.4333
Epoch 6/400
398/398 - 0s - loss: 480.1240
Epoch 7/400
398/398 - 0s - loss: 471.5591
Epoch 8/400
398/398 - 0s - loss: 463.3780
Epoch 9/400
398/398 - 0s - loss: 456.1621
Epoch 10/400
398/398 - 0s - loss: 449.1736
Epoch 11/400
398/398 - 0s - loss: 442.3812
Epoch 12/400
398/398 - 0s - loss: 435.8225
Epoch 13/400
398/398 - 0s - loss: 429.3853
Epoch 14/400
398/398 - 0s - loss: 423.1167
Epoch 15/400
398/398 - 0s - loss: 417.0223
Epoch 16/400
398/398 - 0s - loss: 410.9635
Epoch 17/400
398/398 - 0s - loss: 405.0465
Epoch 18/400
398/398 - 0s - loss: 399.3166
Epoch 19/400
398/398 - 0s - loss: 393.6151
Epoch 20/400
398/398 - 0s - loss: 387.9807
Epoch 21/400
398/398 - 0s - loss: 382.5052
Epoch 22/400
398/398 - 0s - loss: 377.0686
Epoch 23/400
398/398 - 0s - loss: 371.8103

Epoch 191/400
398/398 - 0s - loss: 65.3442
Epoch 192/400
398/398 - 0s - loss: 65.1779
Epoch 193/400
398/398 - 0s - loss: 65.0019
Epoch 194/400
398/398 - 0s - loss: 64.8380
Epoch 195/400
398/398 - 0s - loss: 64.6965
Epoch 196/400
398/398 - 0s - loss: 64.5372
Epoch 197/400
398/398 - 0s - loss: 64.3971
Epoch 198/400
398/398 - 0s - loss: 64.2553
Epoch 199/400
398/398 - 0s - loss: 64.1287
Epoch 200/400
398/398 - 0s - loss: 63.9927
Epoch 201/400
398/398 - 0s - loss: 63.8708
Epoch 202/400
398/398 - 0s - loss: 63.7433
Epoch 203/400
398/398 - 0s - loss: 63.6317
Epoch 204/400
398/398 - 0s - loss: 63.5124
Epoch 205/400
398/398 - 0s - loss: 63.4164
Epoch 206/400
398/398 - 0s - loss: 63.3110
Epoch 207/400
398/398 - 0s - loss: 63.2145
Epoch 208/400
398/398 - 0s - loss: 63.1251
Epoch 209/400
398/398 - 0s - loss: 63.0278
Epoch 210/400
398/398 - 0s - loss: 62.9317
Epoch 211/400
398/398 - 0s - loss: 62.8447
Epoch 212/400
398/398 - 0s - loss: 62.7620
Epoch 213/400
398/398 - 0s - loss: 62.6805
Epoch 214/4

Epoch 382/400
398/398 - 0s - loss: 60.9457
Epoch 383/400
398/398 - 0s - loss: 60.9450
Epoch 384/400
398/398 - 0s - loss: 60.9547
Epoch 385/400
398/398 - 0s - loss: 60.9406
Epoch 386/400
398/398 - 0s - loss: 60.9410
Epoch 387/400
398/398 - 0s - loss: 60.9428
Epoch 388/400
398/398 - 0s - loss: 60.9422
Epoch 389/400
398/398 - 0s - loss: 60.9430
Epoch 390/400
398/398 - 0s - loss: 60.9412
Epoch 391/400
398/398 - 0s - loss: 60.9531
Epoch 392/400
398/398 - 0s - loss: 60.9425
Epoch 393/400
398/398 - 0s - loss: 60.9444
Epoch 394/400
398/398 - 0s - loss: 60.9410
Epoch 395/400
398/398 - 0s - loss: 60.9455
Epoch 396/400
398/398 - 0s - loss: 60.9508
Epoch 397/400
398/398 - 0s - loss: 60.9427
Epoch 398/400
398/398 - 0s - loss: 60.9479
Epoch 399/400
398/398 - 0s - loss: 60.9447
Epoch 400/400
398/398 - 0s - loss: 60.9440


<tensorflow.python.keras.callbacks.History at 0x1a2e325b38>