In [1]:
import numpy as np

def print_to_file(content, filename="output.txt", mode="a"):
    """
    Print any content to a file
    Args:
        content: Any type of content (will be converted to string)
        filename: Name of the output file (default: output.txt)
        mode: File opening mode (default: append)
    """
    with open(filename, mode) as f:
        if isinstance(content, np.ndarray):
            np.savetxt(f, content, fmt='%.8f')
        else:
            f.write(str(content) + "\n")

def value_iteration(grid_size=4, gamma=1.0, theta=1e-4):
    N = grid_size
    rewards = np.full((N, N), -1.0)  # Reward of -1 for each move
    rewards[N-1, N-1] = 0  # Terminal states with reward 0
    
    V = np.zeros((N, N))  # Initialize value function to 0 for all states
    actions = [(-1, 0), (1, 0), (0, -1), (0, 1)]  # Up, Down, Left, Right
    
    while True:
        delta = 0  # Track max change in value function
        V_new = np.copy(V)
        
        for i in range(N):
            for j in range(N):
                if (i == N-1 and j == N-1):
                    continue  # Skip terminal states
                
                values = []
                for action in actions:
                    ni, nj = i + action[0], j + action[1]
                    if 0 <= ni < N and 0 <= nj < N:
                        values.append(0.25 * (rewards[i, j] + gamma * V[ni, nj]))  # Equal probability for all moves
                    else:
                        values.append(0.25 * (rewards[i, j] + gamma * V[i, j]))  # Stay in place if out of bounds
                
                V_new[i, j] = sum(values)  # Update value function using Bellman equation
                delta = max(delta, abs(V_new[i, j] - V[i, j]))
                
                # Use the new function to log iterations
                print_to_file("--------------------------------")
                print_to_file(abs(V_new[i, j] - V[i, j]))
                print_to_file(V_new[i, j])
                print_to_file(V[i, j])
                print_to_file(f"Position: ({i}, {j})")
                print_to_file(f"New Value: {V_new[i, j]:.4f}")
                print_to_file(f"Old Value: {V[i, j]:.4f}")
                print_to_file(f"Delta: {delta:.4f}")
        
        V = V_new  # Update value function
        if delta < theta:
            break  # Stop when values converge
    
    return V

# Run Value Iteration

def print_grid(str):
    f = open("grid.txt", "a")
    f.write(str)
    f.close()

optimal_values = value_iteration()
print_to_file("\nFinal Value Function:", mode="a")
print_to_file(optimal_values)
print(optimal_values)


[[-59.42367735 -57.42387125 -54.2813141  -51.71012579]
 [-57.42387125 -54.56699476 -49.71029394 -45.13926711]
 [-54.2813141  -49.71029394 -40.85391609 -29.99766609]
 [-51.71012579 -45.13926711 -29.99766609   0.        ]]
