In [1]:
import time
import gym
import numpy as np
import control
import tensorflow as tf
from tensorflow_probability import distributions as tfd
import matplotlib.pyplot as plt
import os


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



In [2]:
class MVNFull():
    def __init__(self, loc,covariance_matrix):
        self.loc = tf.expand_dims(loc,-1)
        self.cov = covariance_matrix
        self.shape = tf.cast(self.cov.shape[-1],dtype=tf.float64)
        assert(self.loc.shape[-2]==self.cov.shape[-1]), "mean and covariance must have same n"
        assert(self.cov.shape[-1]==self.cov.shape[-2]),'covariance must have shape [...,n,n]'
        
    def prob(self, value):
        value = tf.expand_dims(value,-1)
#         print('Inside prob function')
#         print('value',value.shape)
        assert(value.shape[-1]==self.loc.shape[-1] and value.shape[-2]==self.loc.shape[-2]),'value must have same last 2 dimensions as loc'
#         print('cov',self.cov.shape)
        cov_inv = tf.linalg.inv(self.cov)
#         print('cov_inv',cov_inv.shape)
#         print(cov_inv.eval())
        cov_det = tf.linalg.det(self.cov)
#         print('cov_det', cov_det.shape)
#         print(cov_det.eval())
        denomenator = tf.math.sqrt(tf.math.pow((tf.cast(2*np.pi,dtype = tf.float64)),self.shape)*cov_det)
#         print('denomenator', denomenator.shape)
        diff = value-self.loc
#         print('diff', diff.shape)
        numerator = tf.squeeze(tf.math.exp((-0.5)*tf.matmul(tf.matmul(diff,cov_inv, transpose_a=True),diff)))
#         print('numerator', numerator.shape)
#         print('final value', tf.math.divide(numerator,denomenator).shape)
#         print('leaving prob function')
        return tf.math.divide(numerator,denomenator)

def forward_filter_fn(A,B,u,C,l_a_posteriori,P_a_posteriori,g,sigma,z):
    '''Calculates prior distributions based on the given posterior distributions
        and the current residual updates posterior distributions based on the new
        prior distributions
    '''
#     print('z',z.shape)
#     print('A', A.shape)
#     print('B',B.shape)
#     print('u',u.shape)
#     print('g',g.shape)
#     print('sigma',sigma.shape)
#     print('C', C.shape)
#     print('l_a_posteriori', l_a_posteriori.shape)
#     print('P_a_posteriori', P_a_posteriori.shape)
    _I = tf.eye(int(A.shape[0]), dtype = tf.float32)
    
    z = tf.expand_dims(z,-1)
    l_a_priori = tf.matmul(A,l_a_posteriori) + tf.matmul(B,u)
#     print('l_a_priori',l_a_priori.shape)
    P_a_priori = tf.matmul(tf.matmul(A,P_a_posteriori), A, transpose_b = True) + tf.matmul(g,g, transpose_b=True)
#     print('P_a_priori',P_a_priori.shape)
    y_pre = z - tf.matmul(C,l_a_priori)
#     print('y_pre', y_pre.shape)
    S = tf.matmul(sigma, sigma, transpose_b=True) + \
        tf.matmul(tf.matmul(C, P_a_priori), C, transpose_b=True)
#     print('S', S.shape)
    
    S_inv = tf.linalg.inv(S)
    K = tf.matmul(tf.matmul(P_a_priori, C, transpose_b=True), S_inv)
#     print('K', K.shape)
    l_a_posteriori = l_a_priori + tf.matmul(K,y_pre)
#     print('l_a_posteriori', l_a_posteriori.shape)
    I_KC = _I-tf.matmul(K,C)
#     print('I-KC', I_KC.shape)
    P_a_posteriori = tf.matmul(tf.matmul(I_KC, P_a_priori), I_KC, transpose_b=True) + \
                        tf.matmul(tf.matmul(K,tf.matmul(sigma, sigma, transpose_b = True)),
                                K, transpose_b=True)
#     print('P_a_posteriori',P_a_posteriori.shape)
    y_post = z-tf.matmul(C,l_a_posteriori)
#     print('y_post', y_post.shape)
    pred = tf.matmul(C, l_a_posteriori)
#     print('pred', pred.shape)
        
    return A,B,u,C,l_a_posteriori,P_a_posteriori,g,sigma,z

def dict_append(d,new_items,vals):
            for idx,item in enumerate(vals):
                d[item].append(new_items[idx])
def ct(x):
    return tf.convert_to_tensor(x,dtype=tf.float32)

In [42]:
class LSTM_SSM_model(object):
    def __init__(self, sess, name, m = 4, n=4, r=1,
                 learning_rate = 0.0001, lr_decay = 0.95, sigma_upper_bound = 1,
                 sigma_lower_bound = 0, g_upper_bound = 1,
                 g_lower_bound = 0.1, mu_0_upper_bound = 1,mu_0_lower_bound = 0,
                 Sigma_0_upper_bound = 1, Sigma_0_lower_bound = 0, beta = 0.00001,
                 b_upper_bound = 0.25, b_lower_bound = -0.25,thetaacc_error=0,initial_state=1.0
                ):
        
        '''thetaacc_error gives the amount of random angular acceleration that can be put on the pendulum,
        initial_state gives the amount of variation in the initial state
        '''
        if name == '':
            raise NameError("A model has no name")

        '''This functions assumes the state space model:
            l_(t+1) = A_(t)l_(t)+B(t)u_(t)
            z_(t+1) = C_(t)l_(t)
            where:
            l has dim m
            u has dim r
            z has dim m
            A has dim mxn
            B has dim mxr
            C has dim mxm
            '''
            
        self.sess = sess
        
        '''nn model hyperparameters'''
        self.learning_rate = learning_rate
        self.max_sample_len = tf.Variable(200, name = 'sample_len', trainable=False)
        max_sample_len = 200
        self.global_step = tf.Variable(0, name='global_step',trainable=False)
        self.increment_global_step = tf.assign_add(self.global_step,1, name = 'increment_global_step')
        self.lr_decay = lr_decay
        self.lstm_sizes = [128,128]
        last_lstm = self.lstm_sizes[-1]
        self.lstm_input_size = 8 # number of covariates to feed the lstm

        '''dims'''
        self.m = m # size of the state space
        self.dim_z = m
        self.n = n
        self.r = r
        self.lstm_input_dim = m+4 # previous states plus physical parameters
        
        
        self.initial_variance = 1
        
        '''error bounds'''
        self.sigma_upper_bound = sigma_upper_bound
        self.g_lower_bound = g_lower_bound
        self.g_upper_bound = g_upper_bound
        self.mu_0_upper_bound = mu_0_upper_bound
        self.mu_0_lower_bound = mu_0_lower_bound
        self.Sigma_0_upper_bound = Sigma_0_upper_bound
        self.Sigma_0_lower_bound= Sigma_0_lower_bound
        self.b_upper_bound = b_upper_bound
        self.b_lower_bound = b_lower_bound
        self.beta = beta
        
        '''LQR parameters'''
        self.initial_state = tf.Variable(1.0, name = 'initial_state',trainable=False)
#         self.
        self.sess.run(self.initial_state.initializer)
        self.env = gym.make('Custom_CartPole-v0', thetaacc_error=thetaacc_error, initial_state=self.initial_state.eval())
        self.gravity = self.env.gravity
        self.cart_mass = self.env.masscart
        self.pole_mass = self.env.masspole
        self.pole_length = self.env.length
        self.env_params = tf.expand_dims(np.array([self.gravity, self.cart_mass,
                                                   self.pole_mass,self.pole_length], dtype=np.float32),0)
        self.Q = np.eye(4)*[1,1,1,1]
        self.R = 1
        
        self.KF_states = {'A':[tf.Variable(tf.zeros([m,n],dtype=tf.float32,trainable=False)) for _ in range(max_sample_len)],
                          'B':[tf.Variable(tf.zeros([m,r],dtype=tf.float32,trainable=False)) for _ in range(max_sample_len)],
                          'u':[tf.Variable(tf.zeros([r,1],dtype=tf.float32,trainable=False)) for _ in range(max_sample_len)],
                          'C':[tf.Variable(tf.zeros([m,m],dtype=tf.float32,trainable=False)) for _ in range(max_sample_len)],
                          'l_a_posteriori':[tf.Variable(tf.zeros([m,n],dtype=tf.float32,trainable=False)) for _ in range(max_sample_len)],
                          'P_a_posteriori':[tf.Variable(tf.zeros([m,m],dtype=tf.float32,trainable=False)) for _ in range(max_sample_len)],
                          'g':[tf.Variable(tf.zeros([m,1],dtype=tf.float32,trainable=False)) for _ in range(max_sample_len)],
                          'sigma':[tf.Variable(tf.zeros([m,n],dtype=tf.float32,trainable=False)) for _ in range(max_sample_len)],
                          'z':[tf.Variable(tf.zeros([m,n],dtype=tf.float32,trainable=False)) for _ in range(max_sample_len)]}
        self.vals = ['A','B','u','C','l_a_posteriori','P_a_posteriori','g','sigma','z']
        
        self.y_0 = tf.Variable(tf.zeros([self.dim_z]), dtype = tf.float32, name = 'y_0', trainable = False)
#         A,B,u,C,l_a_posteriori,P_a_posteriori,g,sigma,z
        
        '''Saving model stuff, don"t need for now'''
        self.model_folder = 'development_trials/{}'.format(name)
        if not os.path.isdir(self.model_folder):
            print('This model has no folder')
            os.makedirs(self.model_folder)
        self.saved_model_location = '{}/model.ckpt'.format(self.model_folder)

        self.losses = []
        
        with tf.variable_scope('KF', reuse = tf.AUTO_REUSE):
            self.C = tf.get_variable(initializer = tf.eye(self.dim_z),dtype = tf.float32,
                                     name = 'C', trainable = False)
            self.W_A = tf.get_variable(initializer = tf.random.normal([last_lstm, m*n]),
                                       dtype = tf.float32, name = 'W_A')
            self.bias_A = tf.get_variable(initializer = tf.zeros([1, m*n]),
                                          dtype = tf.float32, name = 'bias_A')
            
            self.W_B = tf.get_variable(initializer = tf.random.normal([last_lstm, m*r]),
                                       dtype = tf.float32, name = 'W_B')
            self.bias_B = tf.get_variable(initializer = tf.zeros([1, m*r]),
                                         dtype = tf.float32, name = 'bias_B')

            self.W_g = tf.get_variable(initializer = tf.random.normal([last_lstm, m]),
                                       dtype = tf.float32, name = 'W_g')
            self.bias_g = tf.get_variable(initializer = tf.zeros([1, m]),
                                          dtype = tf.float32, name = 'bias_g')

            self.W_sigma = tf.get_variable(initializer = tf.random.normal([last_lstm, self.dim_z]),
                                           dtype = tf.float32, name = 'W_sigma')
            self.bias_sigma = tf.get_variable(initializer = tf.zeros([1, self.dim_z]),
                                              dtype = tf.float32, name = 'bias_sigma')

            self.W_mu_0 = tf.get_variable(initializer = tf.random.normal([last_lstm, self.m]),
                                          dtype = tf.float32, name = 'W_mu_0')
            self.bias_mu_0 = tf.get_variable(initializer = tf.zeros([1, self.m]),
                                             dtype = tf.float32, name = 'bias_mu_0')
            
            self.W_Sigma_0 = tf.get_variable(initializer = tf.random.normal([last_lstm, self.m]),
                                             dtype = tf.float32, name = 'W_Sigma_0')
            self.bias_Sigma_0 = tf.get_variable(initializer = tf.zeros([1, self.m]),
                                                dtype = tf.float32, name = 'bias_Sigma_0')
            
            self.P_0 = tf.Variable(self.initial_variance*tf.eye(self.m,dtype = tf.float32),
                                   name = 'P_0', trainable = False)

            self.y_0 = tf.Variable(tf.zeros([self.dim_z]), dtype = tf.float32, name = 'y_0', trainable = False)
            self.z_0 = tf.Variable(tf.zeros([self.dim_z, self.dim_z]), dtype = tf.float32, name = 'z_0', trainable = False)
            self.pred_0 = tf.Variable(tf.zeros([self.dim_z]), dtype = tf.float32, name = 'pred_0', trainable = False)
            
            
            '''Variables for test range in LQE only'''
#             self.A_test = tf.placeholder(tf.float32, shape = [self.batch_size, self.test_range, m, n], name = 'A_test')
#             self.B_test = tf.placeholder(tf.float32, shape = [self.batch_size, self.test_range, m, r], name = 'B_test')
#             self.g_test = tf.placeholder(tf.float32, shape = [self.batch_size, self.test_range, self.m, 1], name = 'g_test')
#             self.sigma_test = tf.placeholder(tf.float32, shape = [self.batch_size, self.test_range, self.dim_z, 1], name = 'sigma_test')
#             self.l_0_test = tf.placeholder(tf.float32, shape = [self.batch_size, self.m, 1], name = 'l_0_test')
#             self.final_z = tf.placeholder(tf.float32, shape = [self.batch_size, 1, self.dim_z], name = 'final_z')

#         self.z = tf.placeholder(tf.float32, shape = [self.dim_z],name = 'z')
        self.reward = tf.placeholder(tf.float32,shape = [])
#         self.A = tf.placeholder(tf.float32, shape = [None, m,n])
#         self.B = tf.placeholder(tf.float32, shape = [None, m,r])
#         self.u = tf.placeholder(tf.float32, shape = [None, r,1])
#         self.C = tf.placeholder(tf.float32, shape = [None, self.dim_z,m])
#         self.g = tf.placeholder(tf.float32, shape = [None, m,1])
#         self.sigma = tf.placeholder(tf.float32, shape = [None, dim_z,dim_z])
#         with tf.variable_scope('LSTM', reuse = tf.AUTO_REUSE):
#             self.lstm_input = tf.placeholder(tf.float32, shape = [None, self.lstm_input_dim],
#                                             name = 'lstm_input')

#         self.state_placeholder = tf.placeholder(tf.float32, [len(self.lstm_sizes), 2, 1, self.lstm_sizes[0]])
#         l = tf.unstack(self.state_placeholder, axis=0)
#         self.rnn_tuple_state = tuple([tf.nn.rnn_cell.LSTMStateTuple(l[idx][0], l[idx][1]) for idx in range(len(self.lstm_sizes))])

        self.lstm_state = None
        self.lstm_input_placeholder = tf.placeholder(tf.float32, [1,self.lstm_input_size])
    
    def build_LSTM(self):
        with tf.name_scope('LSTM'):
            with tf.variable_scope('LSTM', reuse=tf.AUTO_REUSE):

                lstms = [tf.contrib.rnn.LSTMCell(size, reuse=tf.get_variable_scope().reuse) for size in self.lstm_sizes]
                dropouts = [tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob = 0.5) for lstm in lstms]

                self.cell = tf.contrib.rnn.MultiRNNCell(dropouts)
                if self.lstm_state:
                    state = self.lstm_state
                else:
                    state = self.cell.zero_state(batch_size=1, dtype=tf.float32)
                self.lstm_output,self.lstm_state = self.cell(inputs = self.lstm_input_placeholder, state= state)
        return self
    
    def build_initial_affine(self):
        with tf.variable_scope('affine_transformations'):
            mu_0 = tf.transpose(tf.add(tf.matmul(self.lstm_output, self.W_mu_0), self.bias_mu_0))
            self.mu_0 = ((self.mu_0_upper_bound-self.mu_0_lower_bound)/(1+tf.exp(-mu_0)))+self.mu_0_lower_bound

            Sigma_0 = tf.transpose(tf.add(tf.matmul(self.lstm_output, self.W_Sigma_0), self.bias_Sigma_0))
            self.Sigma_0 = ((self.Sigma_0_upper_bound-self.Sigma_0_lower_bound)/(1+tf.exp(-Sigma_0)))+self.Sigma_0_lower_bound

            l_0_distribution = tfd.MultivariateNormalDiag(loc = self.mu_0, scale_diag = self.Sigma_0)
            self.l_0 = l_0_distribution.sample()
            return self
    
    def build_affine(self):
        with tf.variable_scope('affine_transformations'):
            self.A = tf.reshape(tf.add(tf.matmul(self.lstm_output, self.W_A), self.bias_A),shape=(self.m,self.n))
            self.B = tf.reshape(tf.add(tf.matmul(self.lstm_output, self.W_B), self.bias_B),shape=(self.m,self.r))
            
            g = tf.transpose(tf.add(tf.matmul(self.lstm_output, self.W_g), self.bias_g))
            self.g = ((self.g_upper_bound-self.g_lower_bound)/(1+tf.exp(-g)))+self.g_lower_bound

            sigma = tf.transpose(tf.add(tf.matmul(self.lstm_output, self.W_sigma), self.bias_sigma))
            self.sigma = (self.sigma_upper_bound)/(1+tf.exp(-sigma))
            
        return self
    
    def likelihood_fn(self, params, inputs):
        '''Compute likelihood over a batch
        params contains: mu, Sigma - the parameters of the likelihood distribution
        inputs contains: calculations of mu: F, a, l_filtered==l_a_posteriori
                        calculations of Sigma: a, F, P_a_posteriori, g, sigma
        '''
        A, B, u, C, g, sigma, l_filtered, P_filtered = inputs
        mu, Sigma = params
        '''
        a (bs, m, 1)
        b (bs, 1)
        F (bs, m, m)
        g (bs, m, 1)
        sigma (bs, 1, 1)
        f (bs, m, 1)
        S (bs, m, m)
        mu (bs, 1, 1)
        Sigma (bs, 1, 1)
        '''
        print('A',A.shape)
        print('B',B.shape)
        print('u',u.shape)
        print('C',C.shape)
        print('g',g.shape)
        print('sigma',sigma.shape)
        print('l_filtered',l_filtered.shape)
        print('p_filtered',P_filtered.shape)
        print('mu',mu.shape)
        print('Sigma',Sigma.shape)
        mu = tf.matmul(C, tf.add(tf.matmul(A,l_filtered), tf.matmul(B,u)))
#         mu = tf.add(tf.matmul(tf.matmul(a, F, transpose_a=True), f), b)

        temp = tf.matmul(tf.matmul(A, P_filtered), A, transpose_b=True) + tf.matmul(g, g, transpose_b=True)
        Sigma = tf.matmul(tf.matmul(C, temp), C, transpose_b=True) + tf.square(sigma)

#         temp = tf.matmul(tf.matmul(F, S), F, transpose_b=True) + tf.matmul(g, g, transpose_b=True)
#         Sigma = tf.matmul(tf.matmul(a, temp, transpose_a=True), a) + tf.square(sigma)
        
        return mu, Sigma
    
    def build_loss(self):
        with tf.variable_scope('loss', reuse = tf.AUTO_REUSE):
#             for key,value in self.KF_states.items():
#                 print(key,value[0].shape)
#             print('mu_0',self.mu_0.shape)
#             print('Sigma_0',self.Sigma_0.shape)
            mu_1 = tf.matmul(self.KF_states['A'][0], self.mu_0)+\
                    tf.matmul(self.KF_states['B'][0],self.KF_states['u'][0])
            Sigma_1 = tf.matmul(tf.matmul(self.KF_states['C'][0],
                                          tf.linalg.diag(tf.squeeze(self.Sigma_0,-1))),self.KF_states['C'][0],
                               transpose_b=True)+tf.square(self.KF_states['sigma'][0])
#             print('mu_1',mu_1.shape)
#             print('Sigma_1', Sigma_1.shape)
#             print(len(self.KF_states['A'][1:]),
#                                              self.KF_states['B'][1:],
#                                              self.KF_states['u'][1:],
#                                              self.KF_states['C'][1:],
#                                              self.KF_states['g'][1:],
#                                              self.KF_states['sigma'][1:],
#                                              self.KF_states['l_a_posteriori'][:-1],
#                                              self.KF_states['P_a_posteriori'][1:])
            if self.reward.eval()>1:
                mu, Sigma = tf.scan(self.likelihood_fn,
                                    elems = (ct(self.KF_states['A'][1:]),
                                             ct(self.KF_states['B'][1:]),
                                             ct(self.KF_states['u'][1:]),
                                             ct(self.KF_states['C'][1:]),
                                             ct(self.KF_states['g'][1:]),
                                             ct(self.KF_states['sigma'][1:]),
                                             ct(self.KF_states['l_a_posteriori'][:-1]),
                                             ct(self.KF_states['P_a_posteriori'][1:])),
                                    initializer = (mu_1, Sigma_1))

                self.mu = tf.concat([tf.expand_dims(mu_1,0), mu], 0)
                self.Sigma = tf.concat([tf.expand_dims(Sigma_1,0),Sigma], 0)
#             print(self.mu.shape)
#             print(self.Sigma.shape)
#             print(self.Sigma.eval())
            else:
                self.mu = tf.expand_dims(mu_1,0)
                self.Sigma=tf.expand_dims(Sigma_1,0)
            
            '''TODO:
                Find a legitimate way to get a symmetric covariance here. Needs to be decomposable by
                cholesky decomposition. Does that mean is must be positive definite?
            '''
            z_distribution = tfd.MultivariateNormalFullCovariance(loc = tf.squeeze(self.mu,-1), \
                                covariance_matrix = tf.add(tf.eye(self.m, batch_shape=[self.Sigma.shape[0]]), \
                                                           tf.matmul(self.Sigma,self.Sigma,transpose_a=True)), \
                                                           validate_args=True)

            self.z_probability = z_distribution.prob(tf.squeeze(self.KF_states['z'],-1))
            regularizers = tf.nn.l2_loss(self.W_g) + tf.nn.l2_loss(self.W_mu_0) + \
                        tf.nn.l2_loss(self.W_sigma) + tf.nn.l2_loss(self.W_Sigma_0) + \
                        tf.nn.l2_loss(self.W_A) + tf.nn.l2_loss(self.W_B)
            print(self.z_probability.eval())
#             self.loss = tf.reduce_mean(self.beta*regularizers)- \
#                             (tf.math.square(tf.reciprocal(self.reward))* \
#                              tf.reduce_sum(tf.log(self.z_probability+1e-8)))
            self.loss = tf.reduce_mean(self.beta*regularizers)-tf.math.square(reward)* \
                                tf.reduce_sum(tf.log(self.z_probability+1e-8))
            tf.summary.scalar('loss', self.loss)

#             self.optimizer = tf.train.AdamOptimizer(decayed_learning_rate)
            self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
            grads_and_vars = self.optimizer.compute_gradients(self.loss)
#             for grad,var in grads_and_vars:
#                 print(grad,var)
#             capped_grads_and_vars = [(tf.clip_by_norm(grad, 1.), var) for grad, var in grads_and_vars]
            capped_grads_and_vars = [gv if gv[0] is None else [tf.clip_by_value(gv[0], -10., 10.), gv[1]] for gv in grads_and_vars]
            self.train_op = self.optimizer.apply_gradients(capped_grads_and_vars)
        return self
    
    def train(self, epochs):
        '''In this method, we require each epoch as 1 training sample
            We will generate single steps at a time for input to the LSTM,
            and then single step calculations of the optimal control
        '''
        Q = np.eye(4)*[10,1,1,1]
        R = 1
        
        

        self.rewards = [0 for _ in range(epochs)]
        
        for i in range(epochs):
            start = time.time()
            self.environment_states=[]
#             self.KF_states = {'A':[],'B':[],'u':[],'C':[],'l_a_posteriori':[],'P_a_posteriori':[],
#                              'g':[],'sigma':[],'z':[]}
            
            done = False
            observation = self.env.reset()
            self.environment_states.append(ct(observation))
            preds = []
            epoch_loss = []
            initial_state = self.sess.run(self.cell.zero_state(batch_size=1,dtype=tf.float32))
            initial_input = tf.concat((self.env_params,np.zeros(shape=[1,4])),axis=1)

            with tf.variable_scope('LSTM', reuse=tf.AUTO_REUSE):
                feed_dict = {self.lstm_input_placeholder:initial_input.eval()}
                mu_0, Sigma_0, l_0,A,B,g,sigma = self.sess.run([self.mu_0, self.Sigma_0, self.l_0,self.A,self.B,self.g,self.sigma], 
                                                               feed_dict=feed_dict)
            u = tf.zeros(shape = [1,self.r], dtype=tf.float32)
            l_a_posteriori = self.l_0
            P_a_posteriori = self.P_0
            
            uninitialized_vars= []
            for var in tf.global_variables():
                try:
                    sess.run(var)
                except tf.errors.FailedPreconditionError:
                    uninitialized_vars.append(var)
                    
#             print(uninitialized_vars)
#             print(tf.global_variables())
            while not done:
                dict_append(self.KF_states,
                            forward_filter_fn(ct(A),ct(B),u,self.C,l_a_posteriori,P_a_posteriori,ct(g),ct(sigma),
                                              self.environment_states[-1]),self.vals)

                K,S,E = control.lqr(self.KF_states['A'][-1].eval(),self.KF_states['B'][-1].eval(),Q,R)
                u = -tf.matmul(K.astype(np.float32),tf.expand_dims(self.environment_states[-1],-1))
                observation,reward,done,info = self.env.step(np.squeeze(u.eval()))
                self.environment_states.append(ct(observation))
#                 print(self.lstm_state)
                self.rewards[i] += 1
                next_input = tf.concat((self.env_params,tf.expand_dims(self.environment_states[-1],0)),axis=1)
                feed_dict = {self.lstm_input_placeholder:next_input.eval()}
                A,B,g,sigma = self.sess.run([self.A,self.B,self.g,self.sigma], feed_dict=feed_dict)
                
            feed_dict = {self.reward:self.rewards[i]}
            loss_,_,_ = self.sess.run([self.loss,self.train_op,self.increment_global_step],feed_dict=feed_dict)
            
            print(self.rewards)
            for state in self.KF_states:
                print(state,len(self.KF_states[state]))
            for item in tf.global_variables():
                print(item)

In [43]:
tf.reset_default_graph()
with tf.Session() as sess:
    testing = LSTM_SSM_model(sess, name = 'deleteme')
    testing.build_LSTM().build_initial_affine().build_affine().build_loss()
    testing.sess.run(tf.global_variables_initializer())
    testing.train(1)
#     lstm_sizes=[128,64]
#     placeholders = [tf.placeholder(tf.float32, [2, 1, state_size]) for state_size in lstm_sizes]
#     rnn_tuple_state = \
#             tuple([tf.nn.rnn_cell.LSTMStateTuple(placeholders[idx][0],
#                                                  placeholders[idx][1]) for idx in range(len(lstm_sizes))])

TypeError: Input 'b' of 'MatMul' Op has type float32 that does not match type int32 of argument 'a'.