In [None]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM
import scipy.io as sci
import andi 
AD = andi.andi_datasets()

In [None]:
########### Generating data for training neural network for inference #########
'''
Define dimension, number of trajectories for each dataset, trajectory length and number of datasets to be generated
'''
dimension = 1                # 1, 2 or 3 Dimensions
N = 100000                   # Number of trajectories per datasets
traj_length = 225            # Length of the trajectories
number_dataset = 30          # Number of datasets to be saved

n=0
for repeat in range(number_dataset): 
    X, Y, NA, NA, NA, NA = AD.andi_dataset(N = N, tasks = 1, dimensions = dimension,
                                             min_T = traj_length, max_T = traj_length+1,)
    sci.savemat(r'data\inference\ ' + str(dimension) + 'D_' + str(traj_length) + '_' + str(n) + '.mat',{'X': X, 'Y':Y})
    n += 1 

In [None]:
###### Building the recurrent neural network #####

model_inference = Sequential()

block_size = 4*dimension                                   # Size of the blocks of data points

model_inference.add(LSTM(250,                              # first layer: LSTM of dimension 250
                         return_sequences=True,            # return sequences for the second LSTM layer            
                         recurrent_dropout=0.2,            # recurrent dropout for preventing overtraining
                         input_shape=(None, block_size)))  # input shape
                                                           
model_inference.add(LSTM(50,                               # second layer: LSTM of dimension 50
                        dropout=0,
                        recurrent_dropout=0.2))

model_inference.add(Dense(1))                              # output 

model_inference.compile(optimizer='adam',
                        loss='mse', 
                        metrics=['mae'])

model_inference.summary()                                  # Printing a summary of the built network

In [None]:
##### Define the function for preprocessing the data

def data_prepare(X,Y,N,traj_length,dimension):                # regularize trajectories for training
    import numpy as np 
    thr=1e-10
    r = np.array(X).reshape(N,dimension,traj_length)              
    r = np.diff(r,axis=2)
    x = np.zeros((N,0))
    for dim in range(dimension):
        y = r[:,dim,:]
        sy = np.std(y,axis=1)
        y = (y-np.mean(y,axis=1).reshape(len(y),1)) / np.where(sy>thr,sy,1).reshape(len(y),1)   # normalize x data
        y = np.concatenate((y,np.zeros((N,1))),axis=1)
        x = np.concatenate((x,y),axis=1)                   # merge dimensions
    x = np.transpose(x.reshape(N,dimension,traj_length),axes = [0,2,1])
    
    label = Y
    
    return(x, label)

In [None]:
###### Training the recurrent neural network #####

batch_sizes = [32, 128, 512, 2048]
dataset_used = [1, 4, 5, 20]
number_epochs = [5, 4, 3, 2]
n = 0

for batch in range(len(batch_sizes)):    
    for repeat in range(dataset_used[batch]):
        data = sci.loadmat(r'data\inference\ ' + str(dimension) + 'D_' + str(traj_length) + '_' + str(n) + '.mat')
        n += 1
        X = data['X'][0][dimension-1]
        Y = data['Y'][0][dimension-1].reshape(N,)
        x, label = data_prepare(X,Y,N,traj_length,dimension)
        model_inference.fit(x.reshape(N,int(traj_length/block_size),block_size),
                            label, 
                            epochs=number_epochs[batch], 
                            batch_size=batch_sizes[batch],
                            validation_split=0.1,
                            shuffle=True)

model_inference.save('nets\user_trained\inference_' + str(traj_length) + '.h5')     # Save the network 