In [1]:
import numpy as np

def read_mdp(file_path):
    states = 0
    actions = 0
    transitions = {}
    gamma = 0.0

    with open(file_path, 'r') as file:
        for line in file:
            parts = line.split()
            if parts[0] == 'states':
                states = int(parts[1])
            elif parts[0] == 'actions':
                actions = int(parts[1])
                transitions = {s: {a: [] for a in range(actions)} for s in range(states)}
            elif parts[0] == 'tran':
                initial_state, action_taken, final_state, reward, transition_prob = map(float, parts[1:])
                transitions[int(initial_state)][int(action_taken)].append((transition_prob, int(final_state), reward, False))
            elif parts[0] == 'gamma':
                gamma = float(parts[1])

    return states, actions, transitions, gamma


def value_iteration(P, gamma, theta=1e-10):
    V = np.zeros(len(P), dtype=np.float64)
    while True:
        Q = np.zeros((len(P), len(P[0])), dtype=np.float64)
        for s in range(len(P)):
            for a in range(len(P[s])):
                for prob, next_state, reward, done in P[s][a]:
                    Q[s][a] += prob * (reward + gamma * V[int(next_state)] * (not done))
        if np.max(np.abs(V - np.max(Q, axis=1))) < theta:
            break
        V = np.max(Q, axis=1)
    return V


def extract_policy(P, V, gamma):
    policy = {}
    for s in range(len(P)):
        policy[s] = np.argmax([sum(prob * (reward + gamma * V[int(next_state)] * (not done)) for prob, next_state, reward, done in P[s][a]) for a in range(len(P[s]))])
    return policy


def write_output(output_file, values, policy):
    with open(output_file, 'w') as file:
        for v, p in zip(values, policy.values()):
            rounded_v = round(v, 6)
            file.write(f"{rounded_v} {p}\n")


def process_mdp_file(file_path, output_file_name):
    states, actions, transitions, gamma = read_mdp(file_path)
    optimal_values = value_iteration(transitions, gamma)
    optimal_policy = extract_policy(transitions, optimal_values, gamma)
    
    output_file_path = f"./MDP/{output_file_name}"
    write_output(output_file_path, optimal_values, optimal_policy)
    print("successful")


In [2]:
mdp_file_path_1 = "./MDP/mdp-10-5.txt"
process_mdp_file(mdp_file_path_1,"output_file_mdp-10-5.txt")

successful


In [3]:
mdp_file_path_2 = "./MDP/mdp-2-2.txt"
process_mdp_file(mdp_file_path_2,"output_file_mdp-2-2.txt")

successful
