In [None]:
import numpy as np

class RoutePlannerMDP:
    def __init__(self, states, actions, transition_probabilities, rewards, gamma=0.9):
        self.states = states
        self.actions = actions
        self.P = transition_probabilities  # Transition probability matrix P(s' | s, a)
        self.R = rewards  # Reward matrix R(s, a)
        self.gamma = gamma  # Discount factor for future rewards
        self.policy = {s: np.random.choice(actions) for s in states}  # Initialize random policy

    def value_iteration(self, theta=1e-6):
        """
        Solve MDP using Value Iteration algorithm.
        """
        V = {s: 0 for s in self.states}  # Initialize value function
        while True:
            delta = 0
            for s in self.states:
                v = V[s]
                max_value = float('-inf')
                for a in self.actions:
                    action_value = sum(self.P[s].get(a, {}).get(s_next, 0) * (self.R[s].get(a, 0) + self.gamma * V.get(s_next, 0))
                                       for s_next in self.states)
                    max_value = max(max_value, action_value)
                V[s] = max_value
                delta = max(delta, abs(v - V[s]))
            if delta < theta:
                break
        
        # Update policy based on value function
        for s in self.states:
            best_action = None
            best_value = float('-inf')
            for a in self.actions:
                action_value = sum(self.P[s].get(a, {}).get(s_next, 0) * (self.R[s].get(a, 0) + self.gamma * V.get(s_next, 0))
                                   for s_next in self.states)
                if action_value > best_value:
                    best_value = action_value
                    best_action = a
            self.policy[s] = best_action

    def get_optimal_route(self, start_state, max_steps=100):
        """
        Get the best route from the start state based on the computed policy.
        Add a max_steps limit to avoid infinite loops.
        """
        route = [start_state]
        current_state = start_state
        steps = 0
        # Stop if destination reached or step limit exceeded
        while current_state != "S10" and steps < max_steps:
            action = self.policy[current_state]
            next_states = list(self.P[current_state][action].keys())
            probabilities = list(self.P[current_state][action].values())
            current_state = np.random.choice(next_states, p=probabilities)
            route.append(current_state)
            steps += 1
        if current_state != "S10":
            # Indicate if route doesn't reach destination within max_steps
            route.append("Max Steps Exceeded")
        return route



# Define the states
states = ["S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10"]

# Define possible actions
actions = ["Continue", "Re-route", "Adjust Speed", "Choose Alternate"]


class RoutePlannerMDP:
    def __init__(self, states, actions, transition_probabilities, rewards, gamma=0.9):
        self.states = states
        self.actions = actions
        self.P = transition_probabilities
        self.R = rewards
        self.gamma = gamma
        self.policy = {s: np.random.choice(actions) for s in states}

    def value_iteration(self, theta=1e-6):
        V = {s: 0 for s in self.states}
        while True:
            delta = 0
            for s in self.states:
                v = V[s]
                max_value = float('-inf')
                for a in self.actions:
                    action_value = sum(self.P[s].get(a, {}).get(s_next, 0) * (self.R[s].get(a, 0) + self.gamma * V.get(s_next, 0))
                                       for s_next in self.states)
                    max_value = max(max_value, action_value)
                V[s] = max_value
                delta = max(delta, abs(v - V[s]))
            if delta < theta:
                break

        for s in self.states:
            best_action = None
            best_value = float('-inf')
            for a in self.actions:
                action_value = sum(self.P[s].get(a, {}).get(s_next, 0) * (self.R[s].get(a, 0) + self.gamma * V.get(s_next, 0))
                                   for s_next in self.states)
                if action_value > best_value:
                    best_value = action_value
                    best_action = a
            self.policy[s] = best_action

    def get_optimal_route(self, start_state, max_steps=100):
        route = [start_state]
        current_state = start_state
        steps = 0
        while current_state != "S10" and steps < max_steps:
            action = self.policy[current_state]
            next_states = list(self.P[current_state][action].keys())
            probabilities = list(self.P[current_state][action].values())
            current_state = np.random.choice(next_states, p=probabilities)
            route.append(current_state)
            steps += 1
        if current_state != "S10":
            route.append("Max Steps Exceeded")
        return route


states = ["S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10"]
actions = ["Continue", "Re-route", "Adjust Speed", "Choose Alternate"]

transition_probabilities = {
    "S1": {
        "Continue": {"S2": 0.7, "S3": 0.3},
        "Re-route": {"S6": 1.0},
        "Adjust Speed": {"S1": 1.0},
        "Choose Alternate": {"S4": 0.6, "S5": 0.4}
    },
    "S2": {
        "Continue": {"S3": 0.8, "S4": 0.2},
        "Re-route": {"S6": 0.7, "S5": 0.3},
        "Adjust Speed": {"S2": 1.0},
        "Choose Alternate": {"S7": 0.5, "S4": 0.5}
    },
    "S3": {
        "Continue": {"S5": 0.6, "S7": 0.4},
        "Re-route": {"S8": 0.6, "S6": 0.4},
        "Adjust Speed": {"S3": 1.0},
        "Choose Alternate": {"S4": 0.5, "S9": 0.5}
    },
    "S4": {
        "Continue": {"S5": 0.7, "S6": 0.3},
        "Re-route": {"S8": 0.5, "S9": 0.5},
        "Adjust Speed": {"S4": 1.0},
        "Choose Alternate": {"S7": 0.5, "S10": 0.5}
    },
    "S5": {
        "Continue": {"S6": 0.8, "S7": 0.2},
        "Re-route": {"S9": 0.7, "S10": 0.3},
        "Adjust Speed": {"S5": 1.0},
        "Choose Alternate": {"S4": 0.5, "S8": 0.5}
    },
    "S6": {
        "Continue": {"S7": 0.9, "S8": 0.1},
        "Re-route": {"S6": 1.0},
        "Adjust Speed": {"S6": 1.0},
        "Choose Alternate": {"S4": 0.3, "S10": 0.7}
    },
    "S7": {
        "Continue": {"S10": 0.95, "S6": 0.05},
        "Re-route": {"S4": 0.7, "S9": 0.3},
        "Adjust Speed": {"S7": 1.0},
        "Choose Alternate": {"S6": 0.3, "S3": 0.7}
    },
    "S8": {
        "Continue": {"S9": 0.8, "S6": 0.2},
        "Re-route": {"S7": 0.6, "S10": 0.4},
        "Adjust Speed": {"S8": 1.0},
        "Choose Alternate": {"S7": 0.3, "S4": 0.7}
    },
    "S9": {
        "Continue": {"S10": 1.0},
        "Re-route": {"S7": 0.5, "S4": 0.5},
        "Adjust Speed": {"S9": 1.0},
        "Choose Alternate": {"S3": 0.6, "S7": 0.4}
    },
    "S10": {
        "Continue": {"S10": 0.0},
        "Re-route": {"S10": 0.0},
        "Adjust Speed": {"S10": 0.0},
        "Choose Alternate": {"S10": 0.0}
    }
}

rewards = {
    "S1": {"Continue": 5, "Re-route": 2, "Adjust Speed": 3, "Choose Alternate": 4},
    "S2": {"Continue": 6, "Re-route": 3, "Adjust Speed": 4, "Choose Alternate": 5},
    "S3": {"Continue": 4, "Re-route": 5, "Adjust Speed": 2, "Choose Alternate": 6},
    "S4": {"Continue": 7, "Re-route": 3, "Adjust Speed": 4, "Choose Alternate": 8},
    "S5": {"Continue": 8, "Re-route": 2, "Adjust Speed": 3, "Choose Alternate": 7},
    "S6": {"Continue": 2, "Re-route": 10, "Adjust Speed": 1, "Choose Alternate": 3},
    "S7": {"Continue": 9, "Re-route": 4, "Adjust Speed": 3, "Choose Alternate": 6},
    "S8": {"Continue": -1, "Re-route": 5, "Adjust Speed": 2, "Choose Alternate": -2},
    "S9": {"Continue": -3, "Re-route": 6, "Adjust Speed": 3, "Choose Alternate": 5},
    "S10": {"Continue": 10, "Re-route": 0, "Adjust Speed": 1, "Choose Alternate": 2}
}

route_planner = RoutePlannerMDP(
    states, actions, transition_probabilities, rewards)
route_planner.value_iteration()

optimal_route = route_planner.get_optimal_route("S1")
print("Optimal Route:", optimal_route)

Optimal Route: ['S1', np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.str_('S6'), np.