In [7]:
import time
import gym
import numpy as np
import control as ct
import tensorflow as tf
import tensorflow.keras.layers as layers
from tensorflow_probability import distributions as tfd
import matplotlib.pyplot as plt
import os
tf.enable_eager_execution()

In [137]:
class MVNFull():
    def __init__(self, loc,covariance_matrix):
        self.loc = loc
        self.cov = covariance_matrix
        self.shape = tf.cast(self.cov.shape[-1],dtype=tf.float64)
#         print(self.loc.shape)
#         print(self.cov.shape)
        assert(self.loc.shape[-2]==self.cov.shape[-1]), "mean and covariance must have same n"
        assert(self.cov.shape[-1]==self.cov.shape[-2]),'covariance must have shape [...,n,n]'
        
    def prob(self, value):
#         value = value
#         print('Inside prob function')
#         print('value',value.shape)
        assert(value.shape[-1]==self.loc.shape[-1] and value.shape[-2]==self.loc.shape[-2]),'value must have same last 2 dimensions as loc'
#         print('cov',self.cov.shape)
        cov_inv = tf.linalg.inv(self.cov)
#         print('cov_inv',cov_inv.shape)
#         print(cov_inv.eval())
        cov_det = tf.linalg.det(self.cov)
#         print('cov_det', cov_det.shape)
#         print(cov_det.eval())
        denomenator = tf.math.sqrt(tf.math.pow((tf.cast(2*np.pi,dtype = tf.float64)),self.shape)*cov_det)
#         print('denomenator', denomenator.shape)
        diff = value-self.loc
#         print('diff', diff.shape)
        numerator = tf.squeeze(tf.math.exp((-0.5)*tf.matmul(tf.matmul(diff,cov_inv, transpose_a=True),diff)))
#         print('numerator', numerator.shape)
#         print('final value', tf.math.divide(numerator,denomenator).shape)
#         print('leaving prob function')
        return tf.math.divide(numerator,denomenator)

In [138]:
def forward_filter_fn(A,B,u,g,C,sigma,l_a_posteriori,P_a_posteriori,z):
    '''Calculates prior distribution based on the previous posterior distribution
        and the current residual updates posterior distribution based on the new
        prior distribution
    '''
#     print('z',z.shape)
#     print('A', A.shape)
#     print('B',B.shape)
#     print('u',u.shape)
#     print('g',g.shape)
#     print('sigma',sigma.shape)
#     print('C', C.shape)
#     print('l_a_posteriori', l_a_posteriori.shape)
#     print('P_a_posteriori', P_a_posteriori.shape)
    _I = tf.eye(int(A.shape[0]), dtype = tf.float64)
    
    z = tf.expand_dims(z,-1)
    l_a_priori = tf.matmul(A,l_a_posteriori) + tf.matmul(B,u)
#     print('l_a_priori',l_a_priori.shape)
    P_a_priori = tf.matmul(tf.matmul(A,P_a_posteriori), A, transpose_b = True) + tf.matmul(g,g, transpose_b=True)
#     print('P_a_priori',P_a_priori.shape)
    y_pre = z - tf.matmul(C,l_a_priori)
#     print('y_pre', y_pre.shape)
    S = tf.matmul(sigma, sigma, transpose_b=True) + \
        tf.matmul(tf.matmul(C, P_a_priori), C, transpose_b=True)
#     print('S', S.shape)
    
    S_inv = tf.linalg.inv(S)
    K = tf.matmul(tf.matmul(P_a_priori, C, transpose_b=True), S_inv)
#     print('K', K.shape)
    l_a_posteriori = l_a_priori + tf.matmul(K,y_pre)
#     print('l_a_posteriori', l_a_posteriori.shape)
    I_KC = _I-tf.matmul(K,C)
#     print('I-KC', I_KC.shape)
    P_a_posteriori = tf.matmul(tf.matmul(I_KC, P_a_priori), I_KC, transpose_b=True) + \
                        tf.matmul(tf.matmul(K,tf.matmul(sigma, sigma, transpose_b = True)),
                                K, transpose_b=True)
#     print('P_a_posteriori',P_a_posteriori.shape)
    y_post = z-tf.matmul(C,l_a_posteriori)
#     print('y_post', y_post.shape)
    pred = tf.matmul(C, l_a_posteriori)
#     print('pred', pred.shape)
        
    return A,B,u,g,C,sigma,l_a_posteriori,P_a_posteriori,z

In [152]:
def build_LSTM(lstm_sizes):
    lstms = [tf.contrib.rnn.LSTMCell(size, reuse=tf.get_variable_scope().reuse) for size in lstm_sizes]
    dropouts = [tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob = 0.5) for lstm in lstms]

    cell = tf.contrib.rnn.MultiRNNCell(dropouts)
    return cell

def affine_transformation(lstm_output, first=False):
    if first:
        
        mu_0 = tf.reshape(layers.Dense(m, name = 'mu_0dense')(lstm_output), shape=(m,1))
        mu_0 = ((mu_0_upper_bound-mu_0_lower_bound)/(1+tf.exp(-mu_0)))+mu_0_lower_bound

        Sigma_0 = tf.reshape(layers.Dense(m, name = 'Sigma_0dense')(lstm_output),shape = (m,1))

        Sigma_0 = ((Sigma_0_upper_bound-Sigma_0_lower_bound)/(1+tf.exp(-Sigma_0)))+Sigma_0_lower_bound
        Sigma_0 = tf.matmul(Sigma_0,Sigma_0,transpose_b=True)+tf.eye(4, dtype=tf.float64)*0.001

        l_0_distribution = tfd.MultivariateNormalFullCovariance(loc = tf.squeeze(mu_0),
                                                                covariance_matrix= Sigma_0,
                                                                validate_args=True)
        l_0 = tf.expand_dims(l_0_distribution.sample(),1)
        return mu_0,Sigma_0,l_0

#     A = tf.reshape(tf.add(tf.matmul(lstm_output, self.W_A), self.bias_A),shape=(self.m,self.n))
    A = tf.reshape(layers.Dense(m*n, name = 'A_dense')(lstm_output),shape=(m,n))
#     B = tf.reshape(tf.add(tf.matmul(lstm_output, self.W_B), self.bias_B),shape=(self.m,self.r))
    B = tf.reshape(layers.Dense(m*r, name = 'B_dense')(lstm_output),shape=(m,r))

#     g = tf.transpose(tf.add(tf.matmul(lstm_output, self.W_g), self.bias_g))
    g = tf.reshape(layers.Dense(m, name = 'g_dense')(lstm_output),shape = (m,1))
    g = ((g_upper_bound-g_lower_bound)/(1+tf.exp(-g)))+g_lower_bound

#     sigma = tf.transpose(tf.add(tf.matmul(lstm_output, self.W_sigma), self.bias_sigma))
    sigma = tf.reshape(layers.Dense(dim_z, name = 'sigma_dense')(lstm_output),shape=(dim_z,1))
    sigma = ((sigma_upper_bound-sigma_lower_bound)/(1+tf.exp(-sigma)))+sigma_lower_bound

    return A,B,g,sigma


def likelihood_fn(params, inputs):
    A, B, u, g, C, sigma, l_filtered, P_filtered = inputs
    mu_1, Sigma_1 = params
#     print('A',len(A))
#     print('B',len(B))
#     print('u',len(u))
#     print('C',len(C))
#     print('g',len(g))
#     print('sigma',len(sigma))
#     print('l_filtered',len(l_filtered))
#     print('p_filtered',len(P_filtered))
#     print('mu_1',mu_1.shape)
#     print('Sigma_1',Sigma_1.shape)
    mu = [mu_1]
    Sigma = [Sigma_1]
    assert(len(A)==len(B) and len(B)==len(u) and len(u)==len(C) and len(C)==len(sigma) and 
           len(sigma)==len(l_filtered) and len(l_filtered)==len(P_filtered)),"Not all sequences are same length"
    for i in range(len(A)):
        mu.append(tf.matmul(C[i], tf.add(tf.matmul(A[i],l_filtered[i]), tf.matmul(B[i],u[i]))))
        temp = tf.matmul(tf.matmul(A[i], P_filtered[i]), A[i], transpose_b=True) + \
                    tf.matmul(g[i], g[i], transpose_b=True)
        Sigma.append(tf.matmul(tf.matmul(C[i], temp), C[i], transpose_b=True) + \
                     tf.matmul(sigma[i],sigma[i],transpose_b=True))
    return mu,Sigma

def calculate_loss():
    mu_1 = tf.add(tf.matmul(A_all[0], mu_0),tf.matmul(B_all[0],u_all[0]))
    Sigma_1 = tf.add(tf.matmul(tf.matmul(C_all[0],Sigma_0),C_all[0], transpose_b=True),tf.matmul(sigma_all[0],sigma_all[0],
                                                                            transpose_b=True))
#     print(mu_1.shape)
#     print(Sigma_1.shape)
    likelihoods = []
    z_distribution = MVNFull(loc = mu_1, covariance_matrix = Sigma_1)
    likelihoods.append(z_distribution.prob(env_states[0]))
    if rewards > 1:
        mu,Sigma = likelihood_fn((mu_1,Sigma_1),(A_all[1:],B_all[1:],u_all[1:],g_all[1:],
                                                 C_all[1:],sigma_all[1:],
                                                 l_a_posteriori[2:],P_a_posteriori[2:]))
        
    print('mu',len(mu))
    print('Sigma',len(Sigma))
    print('A_all',len(A_all))
    print('env_states',len(env_states))
    for i in range(len(mu)):
        z_distribution = MVNFull(loc = mu[i], covariance_matrix = Sigma[i])
        likelihoods.append(z_distribution.prob(env_states[i]))
    
        
    return

m = 4
dim_z = m
n = 4
r = 1
lstm_input_dim = m+4
sigma_upper_bound = 1
sigma_lower_bound = 0
g_upper_bound = 1
g_lower_bound = 0.1
mu_0_upper_bound = 1
mu_0_lower_bound = 0
Sigma_0_upper_bound = 1
Sigma_0_lower_bound = 0
beta = 0.01
b_upper_bound = 0.25
b_lower_bound = -0.25
thetaacc_error = 0
initial_state_variation = 0.1

initial_variance = 5

count = 0
A_all = []
B_all = []
u_all = []
g_all = []
C_all = []
sigma_all = []
l_a_posteriori = []
P_a_posteriori = []
env_states = []
all_KF_params = [A_all,B_all,u_all,g_all,C_all,sigma_all,l_a_posteriori,P_a_posteriori,env_states]
rewards = 0
lstm_sizes = [128,64]
env = gym.make('Custom_CartPole-v0', thetaacc_error=thetaacc_error, initial_state=initial_state_variation)
gravity = env.gravity
cart_mass = env.masscart
pole_mass = env.masspole
pole_length = env.length
env_params = tf.expand_dims(np.array([gravity, cart_mass,pole_mass,pole_length],
                                     dtype=np.float64),0)



cell1 = build_LSTM(lstm_sizes)
initial_state = cell1.get_initial_state(batch_size=1,dtype = tf.float64)
initial_input = tf.concat((env_params, np.zeros(shape = [1,4])),axis=1)

output_single, state_single = cell1(inputs=initial_input, state=initial_state)
mu_0, Sigma_0,l_0= affine_transformation(output_single,first=True)

# u_all.append(tf.zeros(shape = [1,r], dtype=tf.float64))
l_a_posteriori.append(l_0)
P_a_posteriori.append(initial_variance*tf.eye(m, dtype = tf.float64))


observation=env.reset()
# env_states.append(tf.expand_dims(tf.convert_to_tensor(observation,dtype=tf.float64),-1))
env.render()
done = False
while not done:
    env.render()
    A, B, g, sigma = affine_transformation(output_single)
    no_control = tf.zeros(shape = [1,r], dtype=tf.float64)
    C = tf.eye(dim_z, dtype = tf.float64)
    
    observation, reward, done, info = env.step(tf.squeeze(no_control))
    
    KF_update = forward_filter_fn(A, B, no_control,g, C, sigma,l_a_posteriori[-1],P_a_posteriori[-1],
                                  tf.convert_to_tensor(observation,dtype=tf.float64))
    for KF_single,KF_param  in zip(KF_update,all_KF_params):
        KF_param.append(KF_single)
        
    rewards+=1
    
    next_input = tf.concat((env_params,tf.expand_dims(environment_states[-1],0)),axis=1)
    output_single,state_single=cell1(inputs=next_input,state=state_single)
env.close()

param_names = ['A_all','B_all','u_all','g_all','C_all','sigma_all','l_a_posteriori','P_a_posteriori','env_states']
for name,KF_param in zip(param_names,all_KF_params):
    print(name,len(KF_param), KF_param[0].shape)
calculate_loss()

A_all 48 (4, 4)
B_all 48 (4, 1)
u_all 48 (1, 1)
g_all 48 (4, 1)
C_all 48 (4, 4)
sigma_all 48 (4, 1)
l_a_posteriori 49 (4, 1)
P_a_posteriori 49 (4, 4)
env_states 48 (4, 1)
mu 48
Sigma 48
A_all 48
env_states 48


In [104]:
param_names = ['A_all','B_all','u_all','g_all','C_all','sigma_all','l_a_posteriori','P_a_posteriori','env_states']
for name,KF_param in zip(param_names,all_KF_params):
    print(name,len(KF_param), KF_param[0].shape)

A_all 54 (4, 4)
B_all 54 (4, 1)
u_all 55 (1, 1)
g_all 54 (4, 1)
C_all 54 (4, 4)
sigma_all 54 (4, 1)
l_a_posteriori 55 (4, 1)
P_a_posteriori 55 (4, 4)
env_states 55 (4, 1)
