In [1]:
%load_ext autoreload
%autoreload 2
#%matplotlib notebook
%matplotlib inline

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from scipy.spatial.distance import euclidean

import sys
sys.path.append("../src") if "../src" not in sys.path else None # avoid adding multiple relave paths to sys.path

In [3]:
from Agent import Agent, trajectory_generator, batch_trajectory_generator
from Environment import Rectangle
from Models import SorscherRNN, UnitPathIntegrator

from ctimeit import ctimeit # for timing

# Initialise Environment

In [4]:
# Environment params
boxsize = (2.2, 2.2)
origo = (0,0)
soft_boundary = 0.2

# Init Environment
env = Rectangle(boxsize=boxsize, soft_boundary=soft_boundary)
env.avoid_walls = None

Singular matrix
Singular matrix


# Initialise Agent / Data generator

In [5]:
# Training data (Agent) params
batch_size = 32
seq_len = 2
angle0 = None # random
p0 = None     # random
# As used by Sorscher
dt = 0.02
sigma = 5.76 * 2
b = 0.13 * 2 * np.pi
mu = 0

# Init training data
btg = batch_trajectory_generator(batch_size, env, seq_len, angle0, p0, dt=dt, sigma=sigma, b=b, mu=mu)

# Format data-generator

In [6]:
def to_one_hot(x):
    """OBS! if max(x,axis=-1) is not unique, then return result is not one-hot, but k-hot"""
    return np.where(x == np.max(x,axis=-1,keepdims=True),1,0)
    
def data_generator(btg):
    while True:
        pos, vel = next(btg)
        init_pos, y_true = pos[:,0], pos[:,1] # "next" pos is label
        vel = vel[:,1] # first velocity (idx 0) is always 0 (initial vel)
        yield (vel,init_pos), y_true 

# Initialise data generator
dg = data_generator(btg)

# Specify and instantiate Model

In [7]:
# Model params
Ng=2
Np=2
weight_decay=1e-4
activation=None#"relu"
lr=1e-3 # 1e-3 is default for Adam()

# model init
model = UnitPathIntegrator(Ng,Np,weight_decay,activation)

model.compile(
    optimizer=tf.keras.optimizers.Adam(lr=lr),
    loss=tf.keras.losses.MeanAbsoluteError()
)

# Build (by specifying input_shape) and summarize model
input_shape = [(batch_size, 2), (batch_size, Np)] # velocity-input UNION initial-state
model.build(input_shape)
model.summary()

Model: "unit_path_integrator"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
velocity_encoder (Dense)     multiple                  4         
_________________________________________________________________
init_position_encoder (Dense multiple                  4         
_________________________________________________________________
decoder (Dense)              multiple                  4         
_________________________________________________________________
activation (Activation)      multiple                  0         
Total params: 12
Trainable params: 12
Non-trainable params: 0
_________________________________________________________________


# Train model

In [8]:
# train model
epochs = 15
steps_per_epoch = 200
model.fit(x=dg, epochs=epochs, steps_per_epoch=steps_per_epoch)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7f4958609bb0>

In [9]:
# Ideal path integrator in Euclidean coordinates for a model: p = C@activation(A@v + B@p0) 
# should yield all weights as idendity weights ? 
model.weights

[<tf.Variable 'velocity_encoder/kernel:0' shape=(2, 2) dtype=float32, numpy=
 array([[-1.1223828 , -0.41783872],
        [ 0.04182081,  1.2343307 ]], dtype=float32)>,
 <tf.Variable 'init_position_encoder/kernel:0' shape=(2, 2) dtype=float32, numpy=
 array([[-1.1253455 , -0.4160894 ],
        [ 0.03968569,  1.233881  ]], dtype=float32)>,
 <tf.Variable 'decoder/kernel:0' shape=(2, 2) dtype=float32, numpy=
 array([[-0.89914596, -0.30311564],
        [ 0.02876061,  0.81992716]], dtype=float32)>]

# Interesting observations

In [24]:
# ACTUALLY! The weights doesn't have to be indentities, i.e.: < I >. It also works if 
# A = B, and C = A.inverse(). Thus, A & B are linear basis changes for A & B which is also a vector space?
# Thus their addition is well-defined. Afterwards, the inverse linear transform C takes us back into our 
# original coordinate system.

A,B,C = model.weights
A,B,C = A.numpy(),B.numpy(),C.numpy()

# Check if decoder weight is the inverse of the encoder weights
print("--- Are the three weight matricies approximately equal? ---\n")
print("A = ",A)
print("B = ",B)
print("C^-1 = ",np.linalg.inv(C))


C_inv = np.linalg.inv(C)
print("\nsum(abs(A-B)) = {}".format(np.sum(np.abs(A-B))))
print("sum(abs(A-C^-1)) = {}".format(np.sum(np.abs(A-C_inv))))
print("sum(abs(B-C^-1)) = {}".format(np.sum(np.abs(B-C_inv))))

--- Are the three weight matricies approximately equal? ---

A =  [[-1.1223828  -0.41783872]
 [ 0.04182081  1.2343307 ]]
B =  [[-1.1253455  -0.4160894 ]
 [ 0.03968569  1.233881  ]]
C^-1 =  [[-1.1254752  -0.4160725 ]
 [ 0.03947833  1.2342151 ]]

sum(abs(A-B)) = 0.007296815514564514
sum(abs(A-C^-1)) = 0.007316630333662033
sum(abs(B-C^-1)) = 0.0006881020963191986


In [11]:
(vel,init_pos), y_true = next(dg)
idx = 0
y_pred = model((vel,init_pos))