In [3]:
import numpy as np

In [5]:
def generate_transition_matrix(num_st, transition_multiplier):
    # by setting specific design we give more probability to self-loops
    diag_matrix = np.eye(num_st) * transition_multiplier
    markov_chain = np.random.random_integers(
        low=1, high=15, size=(num_st, num_st))
    markov_chain = markov_chain + diag_matrix
    transition_matrix = markov_chain / markov_chain.sum(axis=1)[:, None]
    return transition_matrix

def generate_state_action_reward_dist(num_st, num_ac, num_ob, observation_multiplier):
    state_action_reward_matrix = np.empty(
        shape=(num_st, num_ac, num_ob))
    perturbation_matrix = np.zeros(shape=(num_ac, num_ob))

    if num_ac >= num_ob:
        for i in range(int(num_ac / num_ob)):
            perturbation_matrix[num_ob*i:num_ob*(i+1), :] = observation_multiplier * np.eye(num_ob)
    else:
        for i in range(int(num_ob / num_ac)):
            perturbation_matrix[:, num_ac*i:num_ac*(i+1)] = observation_multiplier * np.eye(num_ac)

    for state in range(num_st):
        action_reward = np.random.random((num_ac, num_ob))
        if num_ac >= num_ob:
            permutation = np.random.permutation(num_ac)
            permuted_matrix = perturbation_matrix[permutation, :]
        else:
            permutation = np.random.permutation(num_ob)
            permuted_matrix = perturbation_matrix[:, permutation]

        action_reward += permuted_matrix
        action_reward = action_reward / action_reward.sum(axis=1)[:, None]
        state_action_reward_matrix[state] = action_reward

        #for action in range(num_ac):
        #    categorical = np.random.random(size=num_ob)
        #    categorical = categorical / categorical.sum()
        #    state_action_reward_matrix[state, action, :] = \
        #        np.array(categorical)
    return state_action_reward_matrix

def generate_reference_matrix(state_action_reward_matrix, num_st, num_ac, num_ob):
    reference_matrix = np.empty(
        shape=(num_ac ** 2 * num_ob ** 2,
               num_st ** 2))

    for starting_state in range(num_st):
        for arriving_state in range(num_st):
            # print(f"From state {starting_state} to {arriving_state}")
            column = starting_state * num_st + arriving_state
            for first_action in range(num_ac):
                for second_action in range(num_ac):
                    starting_row = first_action * num_ac * num_ob ** 2 + second_action * num_ob ** 2
                    ending_row = starting_row + num_ob ** 2
                    first_obs_prob = state_action_reward_matrix[starting_state, first_action]
                    second_obs_prob = state_action_reward_matrix[arriving_state, second_action]
                    obs_probabilities = np.outer(first_obs_prob, second_obs_prob).reshape(-1)
                    reference_matrix[starting_row:ending_row, column] = obs_probabilities
                    # print(obs_probabilities)
    return reference_matrix

In [6]:
num_states = 5
num_actions = 10
num_obs = 5
trans_multiplier = 20
obs_multiplier = 10

In [7]:
trans_matrix = generate_transition_matrix(num_st=num_states, transition_multiplier=trans_multiplier)
observation_matrix = generate_state_action_reward_dist(num_st=num_states, num_ac=num_actions, num_ob=num_obs, observation_multiplier=obs_multiplier)
ref_matrix = generate_reference_matrix(observation_matrix, num_st=num_states, num_ob=num_obs, num_ac=num_actions)

  markov_chain = np.random.random_integers(


In [14]:
O = np.empty((num_obs*num_actions, num_states))
for state in range(num_states):
    curr_state_ac_obs = observation_matrix[state, :, :].reshape(-1)
    O[:, state] = curr_state_ac_obs

In [16]:
u, s, vh = np.linalg.svd(O, full_matrices=True)
# print(s)
print(f"Min singular {s.min()} \t Rank {len(s)}")
u, s, vh = np.linalg.svd(ref_matrix, full_matrices=True)
# print(s)
print(f"Min singular {s.min()} \t\t Rank {len(s)}")

Min singular 1.5539760639142857 	 Rank 5
Min singular 1.0537996294624188 	 Rank 5
Min singular 2.414841607218538 		 Rank 25


In [13]:
print(observation_matrix.shape)

(5, 10, 5)
