<a href="https://colab.research.google.com/github/afairley/ColaboratoryNotebooks/blob/main/FlaxBasics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --upgrade  -q pip jax jaxlib
!pip install --upgrade -q git+https://github.com/google/flax.git
import jax
from typing import Any, Callable, Sequence
from jax import random, numpy as jnp
import flax
from flax import linen as nn

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.2/79.2 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for flax (pyproject.toml) ... [?25l[?25hdone
[0m

In [None]:
model = nn.Dense(features=5)
key1, key2 = random.split(random.key(0))
x = random.normal(key1, (10,))
params = model.init(key2,x)
jax.tree_util.tree_map(lambda x: x.shape,params)
model.apply(params,x)

n_samples = 20
x_dim = 10
y_dim = 5

nextKey = random.key(0)
k1, k2 = random.split(nextKey)
W = random.normal(k1, (x_dim, y_dim))
b = random.normal(k2,(y_dim,))

true_params = flax.core.freeze({'params':{'bias': b, 'kernel': W}})
key_sample, key_noise = random.split(k1)
x_samples = random.normal(key_sample, (n_samples, x_dim))
y_samples = jnp.dot(x_samples,W) + b + 0.1 *\
 random.normal(key_noise,(n_samples, y_dim))
print('x shape:', x_samples.shape, '; y shape:', y_samples.shape)
print('x:', x_samples, '; y:', y_samples)

In [16]:
#@jax.jit
def mean_squared_error(params, model, x_batched, y_batched):
  def squared_error(x, y):
    pred = model.apply(params, x)
    return jnp.inner(y-pred, y-pred) / 2.0
  return jnp.mean(jax.vmap(squared_error)(x_batched, y_batched), axis=0)

In [24]:
learning_rate = 0.3
print('Loss for "true" W, b : ', mean_squared_error(true_params, model, x_samples, y_samples))
loss_grad_fn = jax.value_and_grad(mean_squared_error)

@jax.jit
def update_params(params, learning_rate, grads):
  params = jax.tree_util.tree_map(
      lambda p, g: p - learning_rate * g, params, grads)
  return params
print("Reinitializing parameters")
params = model.init(key2,x)
print("\nParams\n", params, "\n")
for i in range(1001):
  loss_val, grads = loss_grad_fn(params, model, x_samples, y_samples)
  params = update_params(params, learning_rate, grads)
  if i % 10 == 0:
    print(f'Loss step {i}:', loss_val)
print("\nParams\n", params, "\n")

Loss for "true" W, b :  0.023639789
Reinitializing parameters

Params
 {'params': {'kernel': Array([[ 2.35571519e-01, -1.71652585e-01, -4.45728786e-02,
        -4.68043566e-01,  4.54595268e-01],
       [-6.87736452e-01,  3.67835373e-01, -1.79262087e-01,
         1.29276231e-01, -2.42580160e-01],
       [ 2.02303097e-01, -2.49465615e-01,  2.74955630e-01,
         4.73488361e-01, -1.98002517e-01],
       [ 2.74478316e-01, -1.21369645e-01, -2.25361675e-01,
        -4.78193641e-01, -9.63979885e-02],
       [-6.19886033e-02, -1.72743499e-01,  2.96945305e-04,
        -7.17593372e-01,  2.00894207e-01],
       [-5.60321152e-01,  3.27208370e-01,  1.06281497e-01,
         1.28758654e-01,  1.16973236e-01],
       [ 1.82218999e-01,  1.11444063e-01, -1.62924141e-01,
         3.24953087e-02, -1.67053342e-01],
       [ 4.31294113e-01,  2.08004564e-01,  1.47714227e-01,
        -8.51502866e-02, -1.26487061e-01],
       [ 3.29497308e-01,  1.08470365e-01, -4.01340067e-01,
         1.66956007e-01,  5.7472

In [23]:
import optax
tx = optax.adam(learning_rate=learning_rate)
print("Reinitializing parameters")
params = model.init(key2,x)
opt_state = tx.init(params)
loss_grad_fn = jax.value_and_grad(mean_squared_error)#this isn't actually needed

for i in range(1001):
  loss_val, grads = loss_grad_fn(params, model, x_samples, y_samples)
  updates, opt_state = tx.update(grads, opt_state)
  params = optax.apply_updates(params, updates)
  if i % 10 == 0:
    print('Loss step {}: '.format(i), loss_val)


Reinitializing parameters
Loss step 0:  35.343876
Loss step 10:  4.3842506
Loss step 20:  1.277389
Loss step 30:  0.3606207
Loss step 40:  0.16952783
Loss step 50:  0.080962725
Loss step 60:  0.03698138
Loss step 70:  0.019477855
Loss step 80:  0.014700154
Loss step 90:  0.01265764
Loss step 100:  0.011839735
Loss step 110:  0.011675058
Loss step 120:  0.011621475
Loss step 130:  0.011586293
Loss step 140:  0.011573903
Loss step 150:  0.011570497
Loss step 160:  0.011568826
Loss step 170:  0.011568704
Loss step 180:  0.011568473
Loss step 190:  0.0115683945
Loss step 200:  0.011568376
Loss step 210:  0.01156837
Loss step 220:  0.011568373
Loss step 230:  0.011568373
Loss step 240:  0.01156837
Loss step 250:  0.011568378
Loss step 260:  0.0115683675
Loss step 270:  0.011568369
Loss step 280:  0.011568366
Loss step 290:  0.011568363
Loss step 300:  0.011568369
Loss step 310:  0.011568374
Loss step 320:  0.011568375
Loss step 330:  0.011568363
Loss step 340:  0.01156837
Loss step 350:  0.