In [5]:
import numpy as np

# David Silver's Student Markov Chain
# States: C1 (Class1), C2 (Class2), C3 (Class3), FB (Facebook), Pub, Pass, Sleep


class StudentMarkovChain:
    def __init__(self):
        self.states = ["C1", "C2", "C3", "FB", "Pub", "Pass", "Sleep"]
        self.terminal_states = ["Pass", "Sleep"]

        # Transition probabilities: {state: [(prob, next_state), ...]}
        # Simplified model where student makes random choices at each state
        self.transitions = {
            "C1": [
                (0.5, "C2"),  # Study
                (0.5, "FB"),  # Facebook
            ],
            "C2": [
                (0.8, "C3"),  # Study
                (0.2, "Sleep"),  # Sleep
            ],
            "C3": [
                (0.6, "Pass"),
                (0.4, "Pub"),
            ],
            "FB": [
                (0.9, "FB"),  # Keep browsing
                (0.1, "C1"),  # Quit
            ],
            "Pub": [
                (0.2, "C1"),  # Leave pub, go to C1 (0.5 * 0.2)
                (0.4, "C2"),  # Leave pub, go to C2 (0.5 * 0.4)
                (0.4, "C3"),  # Leave pub, go to C3 (0.5 * 0.4)
            ],
            "Pass": [],  # Terminal state
            "Sleep": [],  # Terminal state
        }

    def step(self, state):
        """Take a step in the Markov Chain from current state"""
        if state in self.terminal_states:
            return state

        if state not in self.transitions:
            raise ValueError(f"Invalid state: {state}")

        # Get possible transitions
        possible_transitions = self.transitions[state]

        if not possible_transitions:
            return state

        # Sample next state based on probabilities
        probs = [t[0] for t in possible_transitions]
        idx = np.random.choice(len(possible_transitions), p=probs)
        _, next_state = possible_transitions[idx]

        return next_state

    def generate_trajectory(self, start_state, max_steps=20):
        """Generate a sample trajectory starting from start_state"""
        trajectory = [start_state]
        state = start_state

        for _ in range(max_steps):
            if state in self.terminal_states:
                break

            next_state = self.step(state)
            trajectory.append(next_state)
            state = next_state

        return trajectory


# Create the Markov Chain
mc = StudentMarkovChain()

# Generate sample trajectories from different starting states
print("=" * 70)
print("SAMPLE TRAJECTORIES FROM DAVID SILVER'S STUDENT MARKOV CHAIN")
print("=" * 70)

starting_states = ["C1", "C2", "C3", "FB", "Pub"]

for start_state in starting_states:
    print(f"\n{'=' * 70}")
    print(f"Starting from: {start_state}")
    print(f"{'=' * 70}")

    for i in range(3):  # 3 sample trajectories per starting state
        trajectory = mc.generate_trajectory(start_state)

        print(f"\nTrajectory {i + 1}: {' -> '.join(trajectory)}")

        if trajectory[-1] in mc.terminal_states:
            print(f"  Ended in terminal state: {trajectory[-1]}")


SAMPLE TRAJECTORIES FROM DAVID SILVER'S STUDENT MARKOV CHAIN

Starting from: C1

Trajectory 1: C1 -> C2 -> C3 -> Pub -> C2 -> C3 -> Pub -> C2 -> C3 -> Pub -> C2 -> C3 -> Pass
  Ended in terminal state: Pass

Trajectory 2: C1 -> FB -> FB -> FB -> FB -> FB -> FB -> C1 -> C2 -> C3 -> Pass
  Ended in terminal state: Pass

Trajectory 3: C1 -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> C1 -> C2 -> C3 -> Pass
  Ended in terminal state: Pass

Starting from: C2

Trajectory 1: C2 -> C3 -> Pub -> C2 -> C3 -> Pub -> C2 -> C3 -> Pass
  Ended in terminal state: Pass

Trajectory 2: C2 -> C3 -> Pub -> C1 -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> FB -> C1

Trajectory 3: C2 -> C3 -> Pub -> C3 -> Pass
  Ended in terminal state: Pass

Starting from: C3

Trajectory 1: C3 -> Pub -> C3 -> Pass
  Ended in terminal state: Pass

Trajectory 2: C3 -> Pass
  Ended in terminal state: Pass

Trajectory 3: C3 -> Pub -> C2