# Tensorflow

Mac OS Monterey can now fully utilize GPUs in Tensorflow. See https://developer.apple.com/metal/tensorflow-plugin/

In [None]:
%matplotlib inline

In [None]:
import warnings
warnings.simplefilter('ignore', RuntimeWarning)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
import tensorflow as tf

In [None]:
tf.__version__

In [None]:
import tensorflow_probability as tfp
tfd = tfp.distributions

## Working with tensors

Almost exactly like numpy arrays.

In [None]:
tf.constant([1., 2., 3.])

Variables are often used as weights in networks, as they need to be updated.

In [None]:
x = tf.Variable([[1.,2.,3.], [4.,5.,6.]])

In [None]:
x.shape

In [None]:
x.dtype

### Conversin to numpy

In [None]:
x.numpy()

### Indexing

In [None]:
x[:, :2]

### Assignment

In [None]:
x[0,:].assign([3.,2.,1.])

In [None]:
x

### Reductions

In [None]:
tf.reduce_mean(x, axis=0)

In [None]:
tf.reduce_sum(x, axis=1)

### Broadcasting

In [None]:
x + 10

In [None]:
x * 10

In [None]:
x - tf.reduce_mean(x, axis=1)[:, tf.newaxis]

### Matrix operations

In [None]:
x @ tf.transpose(x)

### Ufuncs

In [None]:
tf.exp(x)

In [None]:
tf.sqrt(x)

### Random numbers

In [None]:
X = tf.random.normal(shape=(10,4))
y = tf.random.normal(shape=(10,1))

In [None]:
X

In [None]:
y

### Linear algebra

In [None]:
tf.linalg.lstsq(X, y)

### Vectorization

In [None]:
X = tf.random.normal(shape=(1000,10,4))
y = tf.random.normal(shape=(1000,10,1))

In [None]:
tf.linalg.lstsq(X, y)

### Automatic differentiation

Consider the simple function
$$
f =x^2 + 2y^2 + 3xy
$$

What are the partial derivatives with respect to $x$ and $y$ at $(1,2)$?

We have 
$$
\frac{\partial f}{\partial x} = 2x + 3y
$$

and 
$$
\frac{\partial f}{\partial y} = 4y+ 3x
$$

Evaluated at $(1,2)$, this gives $\frac{\partial f}{\partial x} = 8$ and $\frac{\partial f}{\partial y} = 11$.

We can also calculate th Hessian which in this case is the constant matrix
$$
\begin{bmatrix}
2 & 3 \\
3 & 4
\end{bmatrix}
$$

In [None]:
def f(x,y):
    return x**2 + 2*y**2 + 3*x*y

#### Gradient

In [None]:
x, y = tf.Variable(1.0), tf.Variable(2.0)

In [None]:
with tf.GradientTape() as tape:
    z = f(x, y)

In [None]:
tape.gradient(z, [x,y])

#### Hessian

In [None]:
with tf.GradientTape(persistent=True) as H_tape:
    with tf.GradientTape() as J_tape:
        z = f(x, y)
    Js = J_tape.gradient(z, [x,y])
Hs = [H_tape.gradient(J, [x,y]) for J in Js]
del H_tape                    

In [None]:
np.array(Hs)

## Regression

In [None]:
xs = tf.Variable([0., 1., 2., 5., 6., 8.])
ys = tf.sin(xs) + tfd.Normal(loc=0, scale=0.5).sample(xs.shape[0])

In [None]:
xs.shape, ys.shape

In [None]:
xs.numpy()

In [None]:
ys.numpy()

In [None]:
xp = tf.linspace(-1., 9., 100)[:, None]
plt.scatter(xs.numpy(), ys.numpy())
plt.plot(xp, tf.sin(xp))
pass

In [None]:
kernel = tfp.math.psd_kernels.ExponentiatedQuadratic(length_scale=1.5)
reg = tfd.GaussianProcessRegressionModel(
    kernel, xp[:, tf.newaxis], xs[:, tf.newaxis], ys
)

In [None]:
ub, lb = reg.mean() + [2*reg.stddev(), -2*reg.stddev()]
plt.fill_between(np.ravel(xp), np.ravel(ub), np.ravel(lb), alpha=0.2)
plt.plot(xp, reg.mean(), c='red', linewidth=2)
plt.scatter(xs[:], ys[:], s=50, c='k')
pass

## Tenssorflow Data

Tesnorflow provides a data API to allow it to work seamlessly with large data sets that may not fit into memory. This results in`Tesnorfolw Dataset (TFDS)` objects that handle multi-threading, queuing, batching and pre-fetching. 

You can think of TFDS as being a smart generator from data. Generally, you first create a TFDS from data using `from_tensor_slices` or from data in the file system or a relational database. Then you apply `trasnforms` to the data to process it, before handing it off to, say, a deep learning method.

### Using `from_tensor_slices`

You can pass in a list, dict, `numpy` array, or Tensorflow tensor.

In [None]:
x = np.arange(6)
ds = tf.data.Dataset.from_tensor_slices(x)
ds

In [None]:
for item in ds.take(3):
    print(item)

### Transformations

Once you have a TFDS, you can chain its transformation methods to process the data. We will cover functional programming next week, but most of this should be comprehensible even without a deep understanding of functional programming.

In [None]:
ds = ds.map(lambda x: x**2).repeat(3)

In [None]:
for item in ds.take(3):
    print(item)

In [None]:
ds = ds.shuffle(buffer_size=4, seed=0).batch(5)

In [None]:
for item in ds.take(3):
    print(item)

#### Prefetching is an optimization to preload data in parallel

In [None]:
ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

### Reading from files

You can also read from CSV, text files or SQLite database and transform in the same way.

In [None]:
with open('data/X_train_unscaled.csv') as f:
    for line in f:
        n_fields = len(line.split(','))
        break

In [None]:
ds = tf.data.experimental.CsvDataset(
    'data/X_train_unscaled.csv',
    record_defaults=[tf.float32]*n_fields,
    header=True
)

In [None]:
for item in ds.take(1):
    print(item)

## Tensorflow proability

### Distributions

In [None]:
[str(x).split('.')[-1][:-2] for x in tfd.distribution.Distribution.__subclasses__()]

In [None]:
dist = tfd.Normal(loc=100, scale=15)

In [None]:
x = dist.sample((3,4))
x

In [None]:
n = 100
xs = dist.sample(n)
plt.hist(xs, density=True)
xp = tf.linspace(50., 150., 100)
plt.plot(xp, dist.prob(xp))
pass

### Broadcasting

In [None]:
dist = tfd.Normal(loc=[3,4,5,6], scale=0.5)

In [None]:
dist.sample(5)

In [None]:
xp = tf.linspace(0., 9., 100)[:, tf.newaxis]
plt.plot(np.tile(xp, dist.batch_shape), dist.prob(xp))
pass

### Mixtures

In [None]:
gmm = tfd.MixtureSameFamily(
    mixture_distribution=tfd.Categorical(
        probs=[0.4, 0.1, 0.2, 0.3]
    ),
    components_distribution=tfd.Normal(
      loc=[3., 4., 5., 6.],      
      scale=[0.1, 0.5, 0.5, .1])
)

In [None]:
n = 10000
xs = gmm.sample(n)

In [None]:
sns.distplot(xs)
pass

### Transformations

In [None]:
[x for x in dir(tfp.bijectors) if x[0].isupper()]

In [None]:
lognormal = tfp.bijectors.Exp()(tfd.Normal(0, 0.5))

In [None]:
xs = lognormal.sample(1000)
sns.distplot(xs)
xp = np.linspace(tf.reduce_min(xs), tf.reduce_max(xs), 100)
plt.plot(xp, tfd.LogNormal(loc=0, scale=0.5).prob(xp))
pass