In [42]:
import random

# Define states and actions
states = ["high", "low"]
actions_high = ["search", "wait"]
actions_low = ["search", "wait", "recharge"]

# Transition probabilities and rewards
alpha = 0.9  # Probability of staying high after search in high
beta = 0.1   # Probability of staying low after search in low
r_search = 1  # Reward for searching
r_wait = 0.5  # Reward for waiting

# Transition dynamics and rewards
transition_table = {
    "high": {
        "search": [("high", alpha, r_search), ("low", 1 - alpha, r_search)],
        "wait": [("high", 1.0, r_wait)],
    },
    "low": {
        "search": [("low", beta, r_search), ("high", 1 - beta, -3)],  # Depletion transitions back to high
        "wait": [("low", 1.0, r_wait)],
        "recharge": [("high", 1.0, 0)],  # Recharge transitions to high with no reward
    },
}

# Function to simulate one step
def simulate_step(current_state):
    # Choose action based on current state
    if current_state == "high":
        action = random.choice(actions_high)
    elif current_state == "low":
        action = random.choice(actions_low)
    
    # Transition probabilities for the chosen action
    transitions = transition_table[current_state][action]
    
    # Determine next state based on probabilities
    next_state = random.choices(
        [t[0] for t in transitions], weights=[t[1] for t in transitions], k=1
    )[0]
    
    # Get reward for the transition
    reward = next(t[2] for t in transitions if t[0] == next_state)
    
    return current_state, action, reward, next_state

# Simulation for 10 time steps
current_state = "high"  # Start with high battery
print(f"{'Time':<5} {'State':<10} {'Action':<10} {'Reward':<10} {'Next State':<10}")
for t in range(1, 11):
    state, action, reward, next_state = simulate_step(current_state)
    print(f"{t:<5} {state:<10} {action:<10} {reward:<10} {next_state:<10}")
    current_state = next_state  # Update the state


Time  State      Action     Reward     Next State
1     high       wait       0.5        high      
2     high       search     1          high      
3     high       wait       0.5        high      
4     high       wait       0.5        high      
5     high       search     1          high      
6     high       search     1          low       
7     low        search     -3         high      
8     high       wait       0.5        high      
9     high       wait       0.5        high      
10    high       wait       0.5        high      
