In [None]:
def value_function(states, actions, rewards, transition_function, discount_factor=0.9, theta=0.0001):
    """
    Calculate the value function for a given policy.

    Args:
        states: List of all states in the environment.
        actions: List of all actions in the environment.
        rewards: Rewards for each state-action pair.
        transition_function: Function to get the next state given current state and action.
        discount_factor: Discount factor for future rewards.
        theta: Threshold for value iteration.

    Returns:
        Value function: A dictionary mapping state -> value.
    """
    # Initialize value function
    V = {s: 0 for s in states}

    while True:
        delta = 0
        # For each state, perform a full backup
        for s in states:
            v = 0
            # Look at the possible next actions
            for a in actions[s]:
                # For each action, look at the possible next states...
                for prob, next_state, reward, done in transition_function(s, a):
                    # Calculate the expected value
                    v += prob * (reward + discount_factor * V[next_state])
            # How much our value function changed (across any states)
            delta = max(delta, abs(v - V[s]))
            V[s] = v
        # Stop evaluating once our value function change is below a threshold
        if delta < theta:
            break

    return V
states = ['s1', 's2', 's3','s4','s5','s6','Home']
actions = {'s1': ['a1', 'a2'], 's2': ['a1', 'a2'], 's3': ['a1', 'a2']}
rewards = {('s1', 'a1'): 1, ('s1', 'a2'): 2, ('s2', 'a1'): 2, ('s2', 'a2'): 1, ('s3', 'a1'): 2, ('s3', 'a2'): 1}

def transition_function(s, a):
    if s == 's1' and a == 'a1':
        return [(0.5, 's2', 1, False), (0.5, 's3', 2, False)]
    elif s == 's1' and a == 'a2':
        return [(1, 's2', 2, False)]
    # ... continue for all state-action pairs

discount_factor = 0.9
theta = 0.0001

V = value_function(states, actions, rewards, transition_function, discount_factor, theta)
