In [None]:
# Generate a random MDP

import numpy as np

# Initialize parameters
S = 3
U = 3
seed = 20

# Set random seed
np.random.seed(seed)

# Generate random arrays
c = 10 * np.random.rand(S, S, U)
Pp = np.random.rand(S, S, U)

# Create binary mask Q based on threshold
Q = np.random.rand(*Pp.shape)
threshold = 0.25
Q = np.where(Q < threshold, 0, 1)

# Apply the mask to Pp to get P
P = Pp * Q

# Normalize P for each user and state
for u in range(U):
    for s in range(S):
        # If row sums to zero, replace with random values
        if np.sum(P[s, :, u]) == 0:
            P[s, :, u] = np.random.rand(S)
        
        # Normalize the row to sum to 1
        P[s, :, u] = P[s, :, u] / np.sum(P[s, :, u])

In [None]:
# Generate a random Unichain MDP

import numpy as np

# Initialize parameters
S = 3
U = 3
seed = 20

# Set random seed
np.random.seed(seed)

# Generate random arrays
c = 10 * np.random.rand(S, S, U)
Pp = np.random.rand(S, S, U)

# Create binary mask Q based on threshold
Q = np.random.rand(*Pp.shape)
threshold = 0.2
Q = np.where(Q < threshold, 0, 1)

# Define recurrent states
recurrent = int(np.ceil(S/2))

# Set specific region of Q to 0
Q[recurrent:, :recurrent, :] = 0

# Apply the mask to Pp to get P
P = Pp * Q

# Normalize P for each user and state
for u in range(U):
    for s in range(S):
        # If row sums to zero, replace with random values
        if np.sum(P[s, :, u]) == 0:
            P[s, :, u] = np.random.rand(S)
        
        # Set specific region to 0 for states beyond recurrent
        if s >= recurrent:
            P[s, :recurrent, u] = 0
        
        # Normalize the row to sum to 1
        P[s, :, u] = P[s, :, u] / np.sum(P[s, :, u])