In [1]:
%load_ext autoreload
%autoreload 2
#%matplotlib notebook
%matplotlib inline

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# custom packages
from ratsimulator import Agent, trajectory_generator, batch_trajectory_generator
from ratsimulator.Environment import Rectangle
from ctimeit import ctimeit # for timing

import sys
sys.path.append("../src") if "../src" not in sys.path else None # avoid adding multiple relave paths to sys.path

from Brain import Brain
from Models import SorscherRNN

# Initialise Environment

In [3]:
# Environment params
boxsize = (2.2, 2.2)
origo = (0,0)
soft_boundary = 0.2

# Init Environment
env = Rectangle(boxsize=boxsize, soft_boundary=soft_boundary)

Singular matrix
Singular matrix


# Initialise Brain

In [4]:
# Brain params
npcs = 512 # as used in Sorscher model
sigma = 0.12

# Init brain
brain = Brain(env, npcs, sigma)

# Initialise Agent / Data generator

In [5]:
# Training data (Agent) params
batch_size = 64
seq_len = 20
angle0 = None # random
p0 = None     # random
# As used by Sorscher
dt = 0.02
sigma = 5.76 * 2
b = 0.13 * 2 * np.pi
mu = 0

# Init training data
btg = batch_trajectory_generator(batch_size, env, seq_len, angle0, p0, dt=dt, sigma=sigma, b=b, mu=mu)

In [6]:
pos, vel = next(btg)

# Test how long it takes to generate one mini-batch of training data

##### It is unfortunate if the most expensive part of training the network is because of the data generating process. Then, we should invest more time into optimizing the data generating process for speeding up training. One direction is to implement <<keras.utils.Sequence>> in order to enable sampling the data generator in parallell.

In [7]:
@ctimeit
def f(x):
    _,_ = next(x)
    return None

# do test
# f(btg)

# Format data-generator

In [8]:
def to_one_hot(x):
    """OBS! if max(x,axis=-1) is not unique, then return result is not one-hot, but k-hot"""
    return np.where(x == np.max(x,axis=-1,keepdims=True),1,0)
    
def data_generator(btg, brain):
    
    while True:
        pos, vel = next(btg)
        
        labels = brain(pos) # from euclidean positions to place-cell positions
        init_pos, labels = labels[:,0], labels[:,1:] # "next" pos is label
        
        vel = vel[:,1:] # discard first velocity -> always 0 (initial vel)
        # labels = to_one_hot(labels) 
        
        yield (vel,init_pos), labels 

# Initialise data generator
dg = data_generator(btg,brain)

# Specify and instantiate Model

In [9]:
# Model params
Ng=4096
Np=npcs # defined for Brain already
weight_decay=1e-4
activation="relu"
lr=1e-3 # 1e-3 is default for Adam()

# model init
model = SorscherRNN(Ng,Np,weight_decay,activation)

model.compile(
    optimizer=tf.keras.optimizers.Adam(lr=lr),
    loss=tf.nn.softmax_cross_entropy_with_logits,
    #metrics=[tf.keras.metrics.SparseCategoricalAccuracy()], # requires dims: (batch_size,N)
)

# Build (by specifying input_shape) and summarize model
input_shape = [(batch_size, seq_len, 2), (batch_size, Np)] # velocity-input UNION initial-state
model.build(input_shape)
model.summary()

Model: "sorscher_rnn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Dense)              multiple                  2097152   
_________________________________________________________________
RNN (SimpleRNN)              multiple                  16785408  
_________________________________________________________________
decoder (Dense)              multiple                  2097152   
Total params: 20,979,712
Trainable params: 20,979,712
Non-trainable params: 0
_________________________________________________________________


# Train model

In [10]:
# train model
epochs = 10
steps_per_epoch = 200
model.fit(x=dg, epochs=epochs, steps_per_epoch=steps_per_epoch)

Epoch 1/10
  5/200 [..............................] - ETA: 3:02 - loss: 6.6238

KeyboardInterrupt: 

In [None]:
model.RNN.weights