# Passenger pick up MDP
In this part you will attempt to formulate the MDP of our Minicity to solve the passenger pick up problem using Value Iteration and Policy Iteration.

In [None]:
# @markdown Run this cell to install dependencies.
%%capture

% cd /content
! git clone https://github.com/buzi-princeton/MDP.git

In [None]:
from MDP.mdp import MDP
from MDP.visualizer.minicity import MinicityVisualizer
import numpy as np
import os

In [None]:
# @markdown Run this cell to test our dependencies.
# TEST
# Run the following test to see if your MDP code fetch is working
print("Test MDP class")
a = [1, 2, 3, 4]
b = [5, 6, 7, 8]
c = [23, 24, 25]

mdp = MDP(states=[a, b, c], actions=[1,2,3,4])

for i in range(4*4*3):
  state = mdp.get_state(i)
  real_state = mdp.get_real_state_value(i)
  index = mdp.get_index(real_state)
  if i != index:
    raise ValueError("Something is wrong")
  if i%10==0:
    print(i, state, real_state, index)

print("Everything is correct!")
print("\nTest Minicity Visualizer")

import os

folder = "figure"
sub_folder = "minicity"

fig_folder = os.path.join("/content", folder)
fig_prog_folder = os.path.join(fig_folder, sub_folder)
os.makedirs(fig_prog_folder, exist_ok=True)

visualizer = MinicityVisualizer(fig_prog_folder=fig_prog_folder)
visualizer.reset(current_pos=0, goal=6)
visualizer.plot()
for i in range(1, 7):
    visualizer.update_pos(i)

import imageio
from IPython.display import Image
from tqdm.notebook import tqdm

gif_path = os.path.join(fig_prog_folder, 'result.gif')
length = len([i for i in os.listdir(os.path.join(fig_prog_folder)) if ".png" in i])

with imageio.get_writer(gif_path, mode='I') as writer:
  for i in tqdm(range(length)):
    print(i, end='\r')
    filename = os.path.join(fig_prog_folder, str(i)+".png")
    image = imageio.imread(filename)
    writer.append_data(image)
Image(open(gif_path,'rb').read(), width=400)

## MDP formulation of Minicity

Below is the sample 2-state MDP as discussed in lab 3 handout. Try to run the below code and play around to understand how MDP class works.

In [None]:
class TwoStateMDP(MDP):
  def __init__(self):
    self.states = ["s1", "s2"]
    self.actions = ["a0", "a1"]
    self.gam = 0.9
    
    # call the parent class
    # notice that the state is a list of state variables
    super().__init__(
      states=[self.states], actions=self.actions)
    self.populate_data()
    
  def populate_data(self):
    # add all routes from s1
    self.add_route(["s1"],"a0",["s1"])
    self.add_route(["s1"],"a1",["s2"])
    # add all routes from s2
    self.add_route(["s2"],"a0",["s2"])
    self.add_route(["s2"],"a1",["s2"])
    
    # let's populate the reward, assuming r>0 is 0.5
    for a in self.a:
      self.add_reward(["s1"],a,0.5)
      self.add_reward(["s2"],a,1.5)

twoStateMDP = TwoStateMDP()
print(twoStateMDP.get_index(["s1"]))
print(twoStateMDP.get_state(0))
print(twoStateMDP.get_real_state_value(0))

Now let's try to build our Minicity MDP.
We have 1 positional state variable $p_{cur}$, 1 directional state variable $d$ and 1 goal state variable $p_{goal}$.
$s = \{p_{cur}, d,  p_{goal}\}$

$p_{cur} \in \{0…6\},  p_{goal} \in \{3, 4, 5, 6\}$,  and 
$d \in \{cw, ccw\}$

In [None]:
class Minicity(MDP):
  def __init__(self):
    self.positional_states = [0, 1, 2, 3, 4, 5, 6]
    self.goal_states = [3, 4, 5, 6]
    self.directional_states = ["cw", "ccw"]
    self.actions = ["forward", "left", "right", "switch"]
    self.gam = 0.9

    super().__init__(states=[self.positional_states, self.directional_states, self.goal_states], actions=self.actions)
    
    self.populate_data()
  
  def populate_data(self):
    # populate state transition function and reward function
    ####
    ## YOUR CODE HERE
    raise NotImplementedError("Your Minicity MDP is empty!")
    ####

## Value and Policy Iteration
Let's now write the value iteration and policy iteration method

In [None]:
def value_iteration(threshold = .001, mdp=None):
  if mdp is None:
    raise ValueError("MDP cannot be None")
  numa, nums, R, P = mdp.get_mdp()
  V_star = np.zeros(nums)
  pi_star = np.zeros(nums)
  
  ####
  ## YOUR CODE HERE
  raise NotImplementedError("You have not written Value Iteration")
  ####

  return V_star, pi_star

def policy_eval(policy, threshold = .001, mdp=None):
  if mdp is None:
    raise ValueError("MDP cannot be None")
  numa, nums, R, P = mdp.get_mdp()
  V = np.zeros(nums)
  
  ####
  ## YOUR CODE HERE
  raise NotImplementedError("You have not written Policy Evaluation")
  ####

  return V

def policy_iteration(threshold = .001, mdp=None):
  if mdp is None:
    raise ValueError("MDP cannot be None")
  numa, nums, R, P = mdp.get_mdp()
  # initialize a random policy with length nums and action randomly assigned from numa
  pi_star = np.random.randint(0, numa, nums)
  V_star = np.zeros(nums)
  
  ####
  ## YOUR CODE HERE
  raise NotImplementedError("You have not written Policy Iteration")
  ####
  
  return V_star, pi_star

Run the below code to test your written Value iteration and Policy iteration. You should have similar optimal policy across the two methods.

In [None]:
# Test policy and value iteration
minicity = Minicity()
V_star_value, pi_star_value = value_iteration(mdp=minicity)
print("Value iteration")
print("V_star: ", V_star_value)
print("pi_star: ", pi_star_value)

V_star_policy, pi_star_policy = policy_iteration(mdp=minicity)
print("Policy iteration")
print("V_star: ", V_star_policy)
print("pi_star: ", pi_star_policy)

if not np.array_equal(pi_star_value, pi_star_policy):
  print("Warning: Your pi_star between value iteration and policy iteration is different!")
  print("Try to run these two different policies in the next test case to see if it makes sense")

Let's now try to use the computed policy into solving our Minicity passenger pick up MDP. Given the initial state $s=[1, cw, 6]$, what should be the sequence of actions taken, and what is the cumulative reward?

In [None]:
# Test pi_star
minicity = Minicity()
V_star, pi_star = value_iteration(mdp=minicity)
print("V_star: ", V_star)
print("pi_star: ", pi_star)

# Test calculated pi_star
state = [1, "cw", 6]

while True:
  # get the next action from pi_star
  # get next state, and continue until we get to the goal
  # display the reward, and the actions taken so far 
  # to solve initial state [1, "cw", 6]
  ####
  ## YOUR CODE HERE
  raise NotImplementedError(
      "You have not written the test loop for your calculated pi_star")
  ####
  
  if state[0] == state[2]:
    break

In [None]:
# figure folder
folder = "figure"
sub_folder = "minicity"

fig_folder = os.path.join("/content", folder)
fig_prog_folder = os.path.join(fig_folder, sub_folder)
os.makedirs(fig_prog_folder, exist_ok=True)

## Checkpoint 1
Put everything together, let's run 5 continuous random test cases and make a beautiful GIF of our car moving in the Minicity to pick up passengers!

In [None]:
import random
from tqdm.notebook import tqdm

visualizer = MinicityVisualizer(fig_prog_folder=fig_prog_folder)

for i in tqdm(range(5)):
  converged = False
  goal = random.choice([3, 4, 5, 6])
  pos = random.choice([0, 1, 2, 3, 4, 5, 6])
  
  while goal == pos:
    goal = random.choice([3, 4, 5, 6])
    pos = random.choice([0, 1, 2, 3, 4, 5, 6])

  direction = random.choice(["cw", "ccw"])
  state = [pos, direction, goal]

  visualizer.reset(current_pos=state[0], goal = state[2])
  visualizer.plot()

  while not converged:
    visualizer.update_pos(state[0], dir=state[1])

    if state[0] == state[2]:
      converged = True

    ## YOUR CODE HERE
    raise NotImplementedError(
        "You have not written the while loop for pi_star rollout")
    ####

In [None]:
import imageio
from IPython.display import Image
from tqdm.notebook import tqdm

gif_path = os.path.join(fig_prog_folder, 'result.gif')
length = len([i for i in os.listdir(os.path.join(fig_prog_folder)) if ".png" in i])

with imageio.get_writer(gif_path, mode='I') as writer:
  for i in tqdm(range(length)):
    print(i, end='\r')
    filename = os.path.join(fig_prog_folder, str(i)+".png")
    image = imageio.imread(filename)
    writer.append_data(image)
Image(open(gif_path,'rb').read(), width=400)