# Tensorflow

In [None]:
%matplotlib inline

In [None]:
import warnings
warnings.simplefilter('ignore', RuntimeWarning)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
data = fetch_california_housing()

We work with TF1 but ask it to emulate TF2 behavior

In [None]:
# import tensorflow.compat.v2 as tf
# tf.enable_v2_behavior()
import tensorflow as tf

In [None]:
%%capture
import tensorflow_probability as tfp
tfd = tfp.distributions

## Working with tensors

Almost exaclty like numpy arrays.`m

In [None]:
tf.constant([1., 2., 3.])

In [None]:
x = tf.Variable([[1.,2.,3.], [4.,5.,6.]])

In [None]:
x.shape

In [None]:
x.dtype

### Conversin to numpy

In [None]:
x.numpy()

### Indexing

In [None]:
x[:, :2]

### Assignment

In [None]:
x[0,:].assign([3.,2.,1.])

In [None]:
x

### Reductions

In [None]:
tf.reduce_mean(x, axis=0)

In [None]:
tf.reduce_sum(x, axis=1)

### Broadcasting

In [None]:
x + 10

In [None]:
x * 10

In [None]:
x - tf.reduce_mean(x, axis=1)[:, tf.newaxis]

### Matrix operations

In [None]:
x @ tf.transpose(x)

### Ufuncs

In [None]:
tf.exp(x)

In [None]:
tf.sqrt(x)

### Random numbers

In [None]:
X = tf.random.normal(shape=(10,4))
y = tf.random.normal(shape=(10,1))

### Linear algebra

In [None]:
tf.linalg.lstsq(X, y)

### Vectorization

In [None]:
X = tf.random.normal(shape=(1000,10,4))
y = tf.random.normal(shape=(1000,10,1))

In [None]:
tf.linalg.lstsq(X, y)

### Automatic differntiation

In [None]:
def f(x,y):
    return x**2 + 2*y**2 + 3*x*y

#### Gradient

In [None]:
x, y = tf.Variable(1.0), tf.Variable(2.0)

In [None]:
with tf.GradientTape() as tape:
    z = f(x, y)

In [None]:
tape.gradient(z, [x,y])

#### Hessian

In [None]:
with tf.GradientTape(persistent=True) as H_tape:
    with tf.GradientTape() as J_tape:
        z = f(x, y)
    Js = J_tape.gradient(z, [x,y])
Hs = [H_tape.gradient(J, [x,y]) for J in Js]
del H_tape                    

In [None]:
np.array(Hs)

## Keras

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target)

In [None]:
y_train.min(), y_train.max()

In [None]:
scalar = StandardScaler()
X_train_s = scalar.fit_transform(X_train)
X_test_s = scalar.transform(X_test)

In [None]:
import tensorflow.keras as keras

In [None]:
Dense = keras.layers.Dense

We can consider a DL model as just a black box with a bunch of unnown parameters. For exanple, when the outoput is a Dense layer with just one node, the entire network model is just doing some form of regression. Hence we can replace a linear regression model with such a neural network model and run MCMC or VI as usual.

In [None]:
model = keras.models.Sequential([
    Dense(30, 
          activation='elu', 
          input_shape=X_train.shape[1:]),
    Dense(1)
])

In [None]:
model.compile(loss="mse", optimizer="nadam", metrics=["mae"])

In [None]:
model.summary()

In [None]:
model.layers

In [None]:
model.layers[0].name

In [None]:
model.layers[0].activation

In [None]:
hist = model.fit(X_train_s, 
                 y_train,
                 epochs=10,
                 validation_split=0.2)

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame(hist.history)

In [None]:
df.head()

In [None]:
df.plot()
pass

In [None]:
model.evaluate(X_test_s, y_test)

In [None]:
np.c_[model.predict(X_test_s[:3, :]), y_test[:3]]

In [None]:
model.save('housing.h5')

In [None]:
model = keras.models.load_model('housing.h5')

## Tensorflow proability

### Distributions

In [None]:
[str(x).split('.')[-1][:-2] for x in tfd.distribution.Distribution.__subclasses__()]

In [None]:
dist = tfd.Normal(loc=100, scale=15)

In [None]:
x = dist.sample((3,4))
x

In [None]:
n = 100
xs = dist.sample(n)
plt.hist(xs, density=True)
xp = tf.linspace(50., 150., 100)
plt.plot(xp, dist.prob(xp))
pass

### Broadcasting

In [None]:
dist = tfd.Normal(loc=[3,4,5,6], scale=0.5)

In [None]:
dist.sample(5)

In [None]:
xp = tf.linspace(0., 9., 100)[:, tf.newaxis]
plt.plot(np.tile(xp, dist.batch_shape), dist.prob(xp))
pass

### Mixtures

In [None]:
tfd.MixtureSameFamily?

In [None]:
gmm = tfd.MixtureSameFamily(
    mixture_distribution=tfd.Categorical(
        probs=[0.4, 0.1, 0.2, 0.3]
    ),
    components_distribution=tfd.Normal(
      loc=[3., 4., 5., 6.],      
      scale=[0.1, 0.5, 0.5, .1])
)

In [None]:
n = 10000
xs = gmm.sample(n)

In [None]:
sns.distplot(xs)
pass

### Transformations

In [None]:
[x for x in dir(tfp.bijectors) if x[0].isupper()]

In [None]:
lognormal = tfp.bijectors.Exp()(tfd.Normal(0, 0.5))

In [None]:
xs = lognormal.sample(1000)
sns.distplot(xs)
xp = np.linspace(tf.reduce_min(xs), tf.reduce_max(xs), 100)
plt.plot(xp, tfd.LogNormal(loc=0, scale=0.5).prob(xp))
pass

## Regression

In [None]:
xs = tf.Variable([0., 1., 2., 5., 6., 8.])
ys = tf.sin(xs) + tfd.Normal(loc=0, scale=0.5).sample(xs.shape[0])

In [None]:
xs.shape, ys.shape

In [None]:
xs.numpy()

In [None]:
ys.numpy()

In [None]:
xp = tf.linspace(-1., 9., 100)[:, None]
plt.scatter(xs.numpy(), ys.numpy())
plt.plot(xp, tf.sin(xp))
pass

In [None]:
kernel = tfp.math.psd_kernels.ExponentiatedQuadratic(length_scale=1.5)
reg = tfd.GaussianProcessRegressionModel(
    kernel, xp[:, tf.newaxis], xs[:, tf.newaxis], ys
)

In [None]:
lb.

In [None]:
ub, lb = reg.mean() + [2*reg.stddev(), -2*reg.stddev()]
plt.fill_between(np.ravel(xp), np.ravel(ub), np.ravel(lb), alpha=0.2)
plt.plot(xp, reg.mean(), c='red', linewidth=2)
plt.scatter(xs[:], ys[:], s=50, c='k')
pass

### Modeling

Sampling from a normal distribuiton using HMC (prior predictive samples)

In [None]:
[x for x in dir(tfp.mcmc) if x[0].isupper()]

In [None]:
dir(tfp.vi)

In [None]:
from tensorflow_probability import edward2 as ed

In [None]:
# From example in help docs

def unnormalized_log_prob(x):
    return -x - x**2.
 
# Initialize the HMC transition kernel.
num_results = int(1e2)
num_burnin_steps = int(1e2)
adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(
    tfp.mcmc.HamiltonianMonteCarlo(
        target_log_prob_fn=unnormalized_log_prob,
        num_leapfrog_steps=3,
        step_size=1.),
    num_adaptation_steps=int(num_burnin_steps * 0.8))
 
# Run the chain (with burn-in).
samples, is_accepted = tfp.mcmc.sample_chain(
    num_results=num_results,
    num_burnin_steps=num_burnin_steps,
    current_state=1.,
    kernel=adaptive_hmc,
    trace_fn=lambda _, pkr: pkr.inner_results.is_accepted)
 
sample_mean = tf.reduce_mean(samples)
sample_stddev = tf.math.reduce_std(samples)

In [None]:
sample_mean

In [None]:
sample_stddev

In [None]:
sns.distplot(samples)
plt.axvline(sample_mean.numpy(), c='red')
plt.plot([sample_mean - 2*sample_stddev, sample_mean + 2*sample_stddev], 
         [0.01, 0.01], c='k', linewidth=3)
pass