In [1]:
import time
import matplotlib.pyplot as plt
from IPython.display import clear_output

# Define the grid world states
states = ['Start', 'Middle', 'Goal']
actions = ['Left', 'Right']

# Transition model: (state, action) -> (next_state, reward)
transitions = {
    ('Start', 'Right'): ('Middle', -1),
    ('Start', 'Left'): ('Start', -1),
    ('Middle', 'Right'): ('Goal', 10),
    ('Middle', 'Left'): ('Middle', -1),
    ('Goal', 'Right'): ('Goal', 0),
    ('Goal', 'Left'): ('Goal', 0),
}

# A simple fixed policy that chooses Right unless in Goal
def policy(state):
    if state == 'Goal':
        return None
    return 'Right'

# Visualize the robot on the grid
def visualize_state(state):
    clear_output(wait=True)
    grid = ['   '] * 3
    state_idx = states.index(state)
    grid[state_idx] = '[R]'
    print("Grid World:")
    print(" | ".join(grid))
    time.sleep(1)

# Simulate the robot using the fixed policy
def simulate():
    state = 'Start'
    total_reward = 0
    step = 0

    while state != 'Goal':
        visualize_state(state)
        action = policy(state)
        next_state, reward = transitions[(state, action)]
        print(f"Step {step}: {state} --{action}--> {next_state} (Reward: {reward})")
        total_reward += reward
        state = next_state
        step += 1

    visualize_state(state)
    print(f"Reached Goal in {step} steps. Total reward: {total_reward}")

simulate()


Grid World:
    |     | [R]
Reached Goal in 2 steps. Total reward: 9


In [3]:
# Define the desired transition probabilities from the image
import numpy as np

states = ["HOT", "COLD", "WARM"]
transition_matrix = {
    "HOT": {"HOT": 0.5, "COLD": 0.2, "WARM": 0.3},
    "COLD": {"HOT": 0.2, "COLD": 0.5, "WARM": 0.3},
    "WARM": {"HOT": 0.3, "COLD": 0.1, "WARM": 0.6}
}

initial_probs = {"HOT": 0.5, "COLD": 0.3, "WARM": 0.2}

# Generate synthetic sequences to match transition and initial probabilities
# We'll simulate a Markov chain using these probabilities
def simulate_markov_chain(init_probs, trans_probs, n_sequences=100, sequence_length=10):
    sequences = []
    for _ in range(n_sequences):
        # Choose initial state based on initial probabilities
        state = np.random.choice(states, p=[init_probs[s] for s in states])
        seq = [state]
        for _ in range(sequence_length - 1):
            next_state = np.random.choice(states, p=[trans_probs[state][s] for s in states])
            seq.append(next_state)
            state = next_state
        sequences.append(seq)
    return sequences

simulated_sequences = simulate_markov_chain(initial_probs, transition_matrix)

# Count transitions from these simulated sequences
from collections import defaultdict
import pandas as pd

transition_counts = defaultdict(lambda: defaultdict(int))

for seq in simulated_sequences:
    for i in range(len(seq) - 1):
        curr_state = seq[i]
        next_state = seq[i + 1]
        transition_counts[curr_state][next_state] += 1

# Create DataFrame for transition counts
df_counts = pd.DataFrame(index=states, columns=states).fillna(0)

for from_state in transition_counts:
    for to_state in transition_counts[from_state]:
        df_counts.loc[from_state, to_state] = transition_counts[from_state][to_state]

# Also count initial state frequencies
initial_counts = defaultdict(int)
for seq in simulated_sequences:
    initial_counts[seq[0]] += 1

df_initial = pd.DataFrame.from_dict(initial_counts, orient='index', columns=['Initial Count'])

# import ace_tools as tools; tools.display_dataframe_to_user(name="Transition Counts Table", dataframe=df_counts)
df_counts, df_initial

  df_counts = pd.DataFrame(index=states, columns=states).fillna(0)


(      HOT  COLD  WARM
 HOT   173    68    99
 COLD   49   117    70
 WARM  101    41   182,
       Initial Count
 COLD             29
 WARM             19
 HOT              52)

In [4]:
simulated_sequences

[[np.str_('COLD'),
  np.str_('COLD'),
  np.str_('WARM'),
  np.str_('HOT'),
  np.str_('COLD'),
  np.str_('COLD'),
  np.str_('HOT'),
  np.str_('COLD'),
  np.str_('HOT'),
  np.str_('WARM')],
 [np.str_('WARM'),
  np.str_('COLD'),
  np.str_('COLD'),
  np.str_('COLD'),
  np.str_('COLD'),
  np.str_('HOT'),
  np.str_('COLD'),
  np.str_('HOT'),
  np.str_('HOT'),
  np.str_('HOT')],
 [np.str_('HOT'),
  np.str_('WARM'),
  np.str_('WARM'),
  np.str_('COLD'),
  np.str_('WARM'),
  np.str_('HOT'),
  np.str_('COLD'),
  np.str_('COLD'),
  np.str_('WARM'),
  np.str_('WARM')],
 [np.str_('HOT'),
  np.str_('WARM'),
  np.str_('WARM'),
  np.str_('HOT'),
  np.str_('WARM'),
  np.str_('WARM'),
  np.str_('COLD'),
  np.str_('COLD'),
  np.str_('COLD'),
  np.str_('WARM')],
 [np.str_('COLD'),
  np.str_('COLD'),
  np.str_('COLD'),
  np.str_('WARM'),
  np.str_('HOT'),
  np.str_('HOT'),
  np.str_('HOT'),
  np.str_('COLD'),
  np.str_('WARM'),
  np.str_('WARM')],
 [np.str_('WARM'),
  np.str_('WARM'),
  np.str_('WARM'),
  