# **Environment Setup (simple_rl_environment Function):**<br>
The environment consists of 2 states (State1, State2) and 2 actions (Action1, Action2).<br>
A rewards dictionary is defined, specifying the reward for each state-action pair.<br>

In [1]:
import numpy as np

def simple_rl_environment():
    """
    Creates a simple environment with 2 states and 2 actions.
    """
    states = ['State1', 'State2']
    actions = ['Action1', 'Action2']
    rewards = {
        'State1': {'Action1': 10, 'Action2': 0},
        'State2': {'Action1': 0, 'Action2': 5}
    }
    return states, actions, rewards

# **Simple Agent Class (SimpleAgent):<br>**
**Initialization (__init__):** The agent is initialized with the available states, actions, and the rewards structure. It starts in a random state.<br>
**Choose Action (choose_action):** The agent selects an action randomly from the available actions.<br>
**Take Action (take_action):** The agent receives a reward based on the current state and the chosen action. It then randomly transitions to a new state.<br>
**Run (run):** The agent interacts with the environment for a specified number of episodes, prints each step, and accumulates the total reward.



In [2]:
class SimpleAgent:
    def __init__(self, states, actions, rewards):
        """
        Initialize the agent with available states, actions, and the reward structure.
        """
        self.states = states
        self.actions = actions
        self.rewards = rewards
        self.current_state = np.random.choice(states)  # Start in a random state

    def choose_action(self):
        """
        Choose an action randomly.
        """
        return np.random.choice(self.actions)

    def take_action(self, action):
        """
        Simulate taking an action in the environment and receive a reward.
        """
        reward = self.rewards[self.current_state][action]  # Get the reward for the current state and action
        # Transition to a new state (randomly for simplicity)
        self.current_state = np.random.choice(self.states)
        return reward

    def run(self, episodes=10):
        """
        Run the agent in the environment for a specified number of episodes.
        """
        total_reward = 0
        for episode in range(episodes):
            action = self.choose_action()
            reward = self.take_action(action)
            total_reward += reward
            print(f"Episode {episode+1}: State={self.current_state}, Action={action}, Reward={reward}")
        print(f"Total Reward after {episodes} episodes: {total_reward}")

# Initialize the environment
states, actions, rewards = simple_rl_environment()

# Create the agent
agent = SimpleAgent(states, actions, rewards)

# Run the agent
agent.run(episodes=10)

Episode 1: State=State2, Action=Action1, Reward=0
Episode 2: State=State2, Action=Action2, Reward=5
Episode 3: State=State1, Action=Action1, Reward=0
Episode 4: State=State1, Action=Action2, Reward=0
Episode 5: State=State2, Action=Action1, Reward=10
Episode 6: State=State1, Action=Action1, Reward=0
Episode 7: State=State1, Action=Action1, Reward=10
Episode 8: State=State1, Action=Action2, Reward=0
Episode 9: State=State1, Action=Action2, Reward=0
Episode 10: State=State1, Action=Action1, Reward=10
Total Reward after 10 episodes: 35
