In [1]:
import itertools
import random
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

class TaxiEnvironment:
    def __init__(self, customers_info,start=(3,1)):
        self.grid_size = 5
        self.customers_info = customers_info
        self.hazardous = [(0,2),(1,2),(2,2)]
        self.discount_factor = 0.9
        self.initial_prob = 0.8
        self.pickup_prob = 0.9
        self.pickedup=[]
        self.premium_preference = 0
        self.start = start
        self.pickups = [i[0] for i in customers_info]
        self.is_different_customers = any(x[1] for x in customers_info) and any(not x[1] for x in customers_info)
        self.values = self.value_iteration()
        self.policy = self.extract_policy(self.values)
        print("Pick Up points : ",self.pickups)
        self.display_grid()
    def get_states(self):
        taxi_positions = [(taxi_x, taxi_y) for taxi_x in range(self.grid_size) for taxi_y in range(self.grid_size)]
        return taxi_positions

    def get_actions(self):
        return [(0, 1), (0, -1), (1, 0), (-1, 0)]  # Right, Left, Down, Up

    def is_valid_position(self, position):
        return 0 <= position[0] < self.grid_size and 0 <= position[1] < self.grid_size

    def get_reward(self, current_state):
        taxi_position = current_state
        customer_position=self.customers_info
        reward = -0.1
        for customer_position, is_premium in self.customers_info:
            if taxi_position == customer_position:
                if is_premium and len(self.pickedup)==0:
                  self.premium_preference+=1
                self.pickedup.append(taxi_position)
                reward += 30 if is_premium else 20

        # Check for hazardours areas
        if taxi_position in self.hazardous:
            reward -= 10

        return reward

    def Trans_Prob(self,state,action,values):
      next_value = 0
      next_taxi_position = (state[0] + action[0], state[1] + action[1])
      actions = [(0, 1), (0, -1), (1, 0), (-1, 0)]  # Right, Left, Down, Up
      # Ensure the next position stays within the grid boundaries
      next_taxi_position = (max(0, min(next_taxi_position[0], self.grid_size - 1)),max(0, min(next_taxi_position[1], self.grid_size - 1)))
      # Check for opposite direction movement
      if action != (0, 0) and (action[0] != -state[0] or action[1] != -state[1]):
        next_state = next_taxi_position
        reward = self.get_reward(next_state)
        next_value = reward + self.discount_factor * values[next_state]
       # Adjust probability based on pickup
        prob = self.pickup_prob if reward > 0 else self.initial_prob
        # Stochasticity: Adjusted probability of going in the intended direction
        for random_action in actions:
          if random_action != action:
            next_taxi_position_random = (state[0] + random_action[0],state[1] + random_action[1])
          # Ensure the random position stays within the grid boundaries
            next_taxi_position_random = (max(0, min(next_taxi_position_random[0], self.grid_size - 1)),max(0, min(next_taxi_position_random[1], self.grid_size - 1)))
            next_state_random = next_taxi_position_random
            reward_random = self.get_reward(next_state_random)
            value_random = reward_random + self.discount_factor * values[next_state_random]
            next_value += prob * (value_random - next_value)
      return next_value
    def value_iteration(self, epsilon=1e-7):
        states = self.get_states()
        actions = [(0, 1), (0, -1), (1, 0), (-1, 0)]  # Right, Left, Down, Up
        max_iterations = 500
        # Initialize values arbitrarily
        values = {state: 0 for state in states}
        k=0
        while True:
            delta = 0
            for state in states:
                current_value = values[state]
                max_value = float('-inf')

                # Iterate over possible actions
                for action in actions:
                  next_value = self.Trans_Prob(state,action,values)
                  max_value = max(max_value, next_value)

                values[state] = max_value
                delta = max(delta, abs(current_value - max_value))
            k+=1
            if delta < epsilon :
                break

        return values

    def extract_policy(self, values):
        policy = {}
        actions = self.get_actions()

        for state in values:
            taxi_position = state
            customers_info=self.customers_info.copy()

            reward = 0
            for customer_position, is_premium in customers_info:
                if taxi_position == customer_position:
                    reward += 30 if is_premium else 20

            # Check for Hazardous zones
            if taxi_position in self.hazardous:
                reward -= 10

            if reward > 0:
                continue  # Taxi already at a customer, no need to move

            max_action = None
            max_value = float('-inf')

            for action in actions:
                next_taxi_position = (taxi_position[0] + action[0], taxi_position[1] + action[1])

                # Ensure the next position stays within the grid boundaries
                next_taxi_position = (
                    max(0, min(next_taxi_position[0], self.grid_size - 1)),
                    max(0, min(next_taxi_position[1], self.grid_size - 1))
                )

                # Check for opposite direction movement
                if action != (0, 0) and (
                        action[0] != -taxi_position[0] or action[1] != -taxi_position[1]):
                    next_state = next_taxi_position
                    customers_info=self.customers_info
                    reward = self.get_reward(next_state)
                    next_value = reward + self.discount_factor * values[next_state]

                    # Adjust probability based on pickup
                    prob = self.pickup_prob if reward > 0 else self.initial_prob

                    # Stochasticity: Adjusted probability of going in the intended direction
                    for random_action in actions:
                        if random_action != action:
                            next_taxi_position_random = (
                                taxi_position[0] + random_action[0],
                                taxi_position[1] + random_action[1]
                            )

                            # Ensure the random position stays within the grid boundaries
                            next_taxi_position_random = (
                                max(0, min(next_taxi_position_random[0], self.grid_size - 1)),
                                max(0, min(next_taxi_position_random[1], self.grid_size - 1))
                            )

                            next_state_random = next_taxi_position_random
                            reward_random = self.get_reward(next_state_random)
                            value_random = reward_random + self.discount_factor * values[next_state_random]

                            next_value += prob * (value_random - next_value)

                    if next_value > max_value:
                        max_value = next_value
                        max_action = action

            policy[state] = max_value

        return policy

    def display_grid (self):
      grid = [['' for _ in range(self.grid_size)] for _ in range(self.grid_size)]
      for state, action in self.values.items():
        grid[state[0]][state[1]]=action
      grid[self.start[0]][self.start[1]]="   Taxi   "
      k=1
      self.pickedup=set(self.pickedup)
      print(self.pickedup,"pickedup")
      for i in self.pickedup:
        grid[i[0]][i[1]]="  Pickup"+str(k)+"  "
        k+=1
      df = pd.DataFrame(grid)
      styles = [
            dict(selector="th", props=[("border", "1px solid #cccccc")]),
            dict(selector="td", props=[("border", "1px solid #cccccc")]),
        ]
      display(df.style.set_table_styles(styles))

def random_customers():
    start_position_r4 = random.choice([(i, j) for i in range(5) for j in range(5) if (i, j) not in [(0,2),(1,2),(2,2)]])
    # Generate first customer request
    customer_a_r4 = (random.choice([i for i in [(0,1),(0,3),(4,0),(4,4)] if i != start_position_r4]),False)
    # Decide whether to generate a second request
    if random.random() <= 0.6:
      # Generate second customer request at a different location
      customer_b_r4 = (random.choice([i for i in [(0,1),(0,3),(4,0),(4,4)] if (i != customer_a_r4[0] and i !=start_position_r4)]),False)
      # Decide whether one of the requests is from a premium customer
      if random.random() <= 0.3:
        # Make one of the requests from a premium customer
        premium_customer_choice = random.choice([customer_a_r4, customer_b_r4])
        premium_customer_choice = (premium_customer_choice[0], True)
        if premium_customer_choice[0] == customer_a_r4[0]:
          customer_a_r4 = premium_customer_choice
        else:
          customer_b_r4 = premium_customer_choice
    else:
      customer_b_r4 = None

    return customer_a_r4, customer_b_r4, start_position_r4


**R1: The agent should pick up a regular customer from position A. In this scenario only one customer has made a request to the agent. The taxi starts in position (2,2). To demonstrate that your code is working, produce the optimal policy**

In [2]:
#R1  -> Regular customer at pick up pint A (0,1)

taxi_env = TaxiEnvironment(customers_info=[((0,1), False)])

Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,Taxi,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197


**R2: The agent should pick up a premium customer from position B. In this scenario only one customer has made a request to the agent**

In [3]:
#R2 -> Premium customer at pick up point B (0,3)
print("R2 -> Optimal Policy for premium customer at (0,3) :\n")
taxi_env = TaxiEnvironment( customers_info=[((0,3), True)])

R2 -> Optimal Policy for premium customer at (0,3) :

Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.102624,0.447674,6.100299,Pickup1,136.866211
1,0.0,0.977139,10.367558,194.934526,131.172204
2,-0.066538,1.521291,17.706685,163.752488,122.250113
3,-0.106912,Taxi,17.884585,138.412070,111.800676
4,-0.134003,2.195657,18.97785,119.872568,101.305121


**R3: The agent should pick up a premium customer from position B. In this
scenario two customers have made requests to the agent, a regular customer at point A and a premium customer at position B**

In [4]:

#R3 -> Regular customer at pickup point A (0,1) and  Premium customer at pick up point B (0,3)
print("R3 -> Optimal Policy for regular  customer at (0,1) and premium customer at (0,3):\n")
taxi_env = TaxiEnvironment( customers_info=[((0,1), False),((0,3), True)])

R3 -> Optimal Policy for regular  customer at (0,1) and premium customer at (0,3):

Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,124.489152
3,27.963495,Taxi,56.23307,141.801282,113.978739
4,26.109623,73.893449,54.96944,123.096978,103.413545


**R4: Run 1000 episodes with random locations of customers. At any given  episode, a customer request originating at any of the 4 pickup points is
generated at random. A further request (at a point also chosen at random but different from the first chosen point) may happen with probability 0.6. When two requests are received, one of these two (randomly chosen) requests happen to be from a premium customer with probability 0.3. The taxi position should also be chosen randomly from any location in the grid except for position R**

In [5]:
# R4: Run 1000 episodes with random locations of customers
premium_preference_count = 0
for episode in range(1000):
  customer_a_r4, customer_b_r4,start_position_r4 = random_customers()
  customers_info =[]
  customers_info.append(customer_a_r4)
  if customer_b_r4:
    customers_info.append(customer_b_r4)
  print(f"\nR4 -> Optimal Policy for episode - {episode+1}:")
  env_r4= TaxiEnvironment(customers_info,start_position_r4)
  premium_preference_count += env_r4.premium_preference
  print()


R4 -> Optimal Policy for episode - 1:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,Taxi
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 2:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.862270,60.604278
1,97.754167,123.233892,75.826565,53.309090,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,Taxi,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 3:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.348700,108.410339,80.890192
3,-0.163008,1.074303,Taxi,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 4:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,92.820734,Pickup1,72.977174,42.812832,27.649014
1,103.031934,142.257746,77.614576,45.693855,28.59528
2,218.487168,148.127999,88.150059,Taxi,29.295483
3,252.940416,157.670652,90.329694,50.782818,29.623299
4,Pickup2,163.443354,91.260965,50.932411,29.647909




R4 -> Optimal Policy for episode - 5:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,Taxi
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 6:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,Taxi,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 7:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,Taxi,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 8:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,Taxi,34.185877,Pickup2




R4 -> Optimal Policy for episode - 9:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,Taxi,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 10:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,Taxi,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 11:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,Taxi,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 12:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.259810
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,Taxi
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 13:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,Taxi
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 14:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,Taxi,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 15:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,Taxi,23.578512,13.522197




R4 -> Optimal Policy for episode - 16:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,Taxi,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 17:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,Taxi,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 18:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,Taxi,34.228066,19.790236




R4 -> Optimal Policy for episode - 19:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,Taxi,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 20:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,Taxi,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 21:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,Taxi,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 22:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,Taxi,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 23:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,Taxi,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 24:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,Taxi
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 25:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,Taxi,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 26:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.348700,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,Taxi,79.271361,66.96289




R4 -> Optimal Policy for episode - 27:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,49.304559,28.110986,46.035137
1,30.688680,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 28:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,Taxi,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.848030,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 29:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.269420,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,Taxi,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 30:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,Taxi
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 31:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,Taxi
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 32:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,Taxi,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 33:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,Taxi,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 34:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,Taxi
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 35:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,92.820734,Pickup1,72.977174,42.812832,27.649014
1,103.031934,142.257746,77.614576,45.693855,28.59528
2,218.487168,148.127999,88.150059,48.729943,29.295483
3,252.940416,157.670652,90.329694,50.782818,29.623299
4,Pickup2,163.443354,91.260965,Taxi,29.647909




R4 -> Optimal Policy for episode - 36:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,Taxi,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 37:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,Taxi,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 38:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,Taxi,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 39:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,80.537793,Pickup1,94.323416,46.383045,26.96021
1,86.024375,189.343150,88.900079,45.587051,26.839746
2,153.227124,162.030179,81.791483,43.980844,26.510472
3,174.967463,145.008830,78.795012,Taxi,26.265538
4,Pickup2,136.456902,78.108432,43.883632,26.068911




R4 -> Optimal Policy for episode - 40:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,Taxi
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 41:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 42:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,Taxi,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 43:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,92.820734,Pickup1,72.977174,42.812832,27.649014
1,103.031934,142.257746,77.614576,45.693855,28.59528
2,218.487168,148.127999,88.150059,48.729943,29.295483
3,252.940416,157.670652,90.329694,50.782818,29.623299
4,Pickup2,163.443354,91.260965,Taxi,29.647909




R4 -> Optimal Policy for episode - 44:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.23307,Taxi,113.978739
4,26.109623,73.893449,54.96944,123.096978,103.413545




R4 -> Optimal Policy for episode - 45:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,Taxi,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 46:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,Taxi,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 47:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,Taxi,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 48:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,Taxi




R4 -> Optimal Policy for episode - 49:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,Taxi,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 50:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,Taxi,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 51:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,Taxi,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 52:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 53:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,Taxi,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 54:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,Taxi,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 55:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,Taxi,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 56:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,Taxi,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 57:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,Taxi,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 58:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,Taxi,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 59:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,Taxi
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 60:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,Taxi,67.268405
1,64.809821,81.65748,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.84803,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 61:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,Taxi,69.036577




R4 -> Optimal Policy for episode - 62:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,Taxi,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 63:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,Taxi,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 64:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,Taxi,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 65:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,Taxi,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 66:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,Taxi,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 67:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,Taxi,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 68:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,Taxi,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 69:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,Taxi,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 70:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,Taxi,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 71:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,Taxi,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 72:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,Taxi
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 73:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.430843,69.482578,52.41298,Pickup2,138.543684
1,65.064169,82.809400,60.306932,197.809772,132.854667
2,145.559645,99.476453,70.122339,166.675334,123.943436
3,168.563434,106.000156,70.476764,141.373933,113.507935
4,Pickup1,Taxi,70.578182,122.855771,103.025876




R4 -> Optimal Policy for episode - 74:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.2033,Pickup1,90.674462,Taxi,55.214400
1,46.932966,182.138509,85.238669,46.925166,64.575437
2,44.81299,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,39.369126,109.260468,66.161285,46.971501,Pickup2




R4 -> Optimal Policy for episode - 75:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,Taxi,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 76:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,Taxi,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 77:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,Taxi,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 78:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,Taxi,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 79:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.068927,Pickup2,113.165964
1,0.0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,115.424107,133.481447
3,-0.149726,1.294357,13.002945,100.701833,155.908685
4,-0.196035,1.437771,13.896039,91.060312,Pickup1




R4 -> Optimal Policy for episode - 80:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,Taxi,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 81:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,Taxi,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 82:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,Taxi,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 83:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,Taxi




R4 -> Optimal Policy for episode - 84:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.348700,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,Taxi,79.271361,66.96289




R4 -> Optimal Policy for episode - 85:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,Taxi,33.512984,19.413184
4,Pickup1,108.628708,60.506390,33.620854,19.43144




R4 -> Optimal Policy for episode - 86:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,Taxi,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 87:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 88:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,90.674462,47.446994,55.214400
1,46.932966,182.138509,85.238669,46.925166,64.575437
2,44.812990,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,39.369126,109.260468,66.161285,46.971501,Pickup2




R4 -> Optimal Policy for episode - 89:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.688680,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,Taxi,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 90:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,Taxi,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 91:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,Taxi,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 92:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.65748,48.246968,Taxi,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.84803,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 93:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,Taxi
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 94:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,Taxi,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 95:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,Taxi,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 96:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,Taxi




R4 -> Optimal Policy for episode - 97:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,Taxi,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 98:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,Taxi
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 99:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,Taxi,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 100:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.068927,Pickup2,113.165964
1,0.0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,115.424107,Taxi
3,-0.149726,1.294357,13.002945,100.701833,155.908685
4,-0.196035,1.437771,13.896039,91.060312,Pickup1




R4 -> Optimal Policy for episode - 101:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,Taxi,82.322978,68.715045




R4 -> Optimal Policy for episode - 102:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,Taxi,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 103:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,Taxi,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 104:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.068927,Pickup2,113.165964
1,0.0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,115.424107,133.481447
3,-0.149726,1.294357,13.002945,100.701833,155.908685
4,-0.196035,1.437771,13.896039,91.060312,Pickup1




R4 -> Optimal Policy for episode - 105:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.553248,Pickup1,49.685466,31.54913,62.669241
1,30.696452,119.688790,46.135958,33.419992,77.736214
2,29.27604,99.329006,47.880669,36.125636,97.685713
3,27.52354,83.217543,44.978743,38.717843,Taxi
4,25.657834,71.577093,44.049447,39.600523,Pickup2




R4 -> Optimal Policy for episode - 106:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,Taxi,63.123016,47.86227,60.604278
1,97.754167,123.233892,75.826565,53.30909,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 107:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,Taxi,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 108:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,Taxi,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 109:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,Taxi,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 110:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.86227,60.604278
1,97.754167,123.233892,75.826565,53.30909,71.708829
2,218.469730,148.124636,89.265413,57.889224,Taxi
3,252.972596,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 111:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.348700,108.410339,80.890192
3,-0.163008,1.074303,Taxi,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 112:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.657480,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,Taxi,61.660276,48.768065,129.938081
4,Pickup2,108.848030,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 113:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.23307,141.801282,113.978739
4,26.109623,73.893449,54.96944,Taxi,103.413545




R4 -> Optimal Policy for episode - 114:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,Taxi,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 115:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,Taxi,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 116:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,90.674462,47.446994,55.214400
1,46.932966,182.138509,85.238669,46.925166,64.575437
2,44.812990,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,39.369126,109.260468,66.161285,46.971501,Pickup2




R4 -> Optimal Policy for episode - 117:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.553248,Pickup1,49.685466,31.549130,62.669241
1,30.696452,119.688790,46.135958,33.419992,77.736214
2,29.27604,99.329006,47.880669,36.125636,97.685713
3,27.52354,83.217543,44.978743,38.717843,123.903572
4,25.657834,71.577093,44.049447,Taxi,Pickup2




R4 -> Optimal Policy for episode - 118:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,Taxi,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 119:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.862270,60.604278
1,97.754167,123.233892,75.826565,53.309090,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,Taxi,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 120:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,Taxi,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 121:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,Taxi,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 122:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.65748,48.246968,40.536083,Taxi
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.84803,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 123:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,Taxi
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 124:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,Taxi
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 125:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,Taxi,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 126:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,Taxi
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 127:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,Taxi,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 128:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,Taxi
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 129:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,Taxi,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 130:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,Taxi,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 131:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,Taxi,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 132:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,Taxi,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 133:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,Taxi
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 134:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,Taxi
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 135:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,Taxi,33.512984,19.413184
4,Pickup1,108.628708,60.506390,33.620854,19.43144




R4 -> Optimal Policy for episode - 136:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,Taxi
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 137:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 138:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,92.820734,Pickup1,72.977174,42.812832,27.649014
1,Taxi,142.257746,77.614576,45.693855,28.59528
2,218.487168,148.127999,88.150059,48.729943,29.295483
3,252.940416,157.670652,90.329694,50.782818,29.623299
4,Pickup2,163.443354,91.260965,50.932411,29.647909




R4 -> Optimal Policy for episode - 139:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.068927,Pickup2,113.165964
1,0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,115.424107,133.481447
3,-0.149726,1.294357,13.002945,100.701833,155.908685
4,Taxi,1.437771,13.896039,91.060312,Pickup1




R4 -> Optimal Policy for episode - 140:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,Taxi,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 141:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,Taxi,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 142:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,Taxi,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 143:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,Taxi,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 144:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,Taxi,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 145:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,Taxi,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 146:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,Taxi,33.512984,19.413184
4,Pickup1,108.628708,60.506390,33.620854,19.43144




R4 -> Optimal Policy for episode - 147:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,Taxi,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 148:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.718860,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,Taxi,12.76418,Pickup1




R4 -> Optimal Policy for episode - 149:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,Taxi,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 150:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,Taxi,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 151:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,Taxi,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 152:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,80.537793,Pickup1,94.323416,46.383045,26.96021
1,86.024375,189.343150,88.900079,45.587051,26.839746
2,Taxi,162.030179,81.791483,43.980844,26.510472
3,174.967463,145.008830,78.795012,43.985099,26.265538
4,Pickup2,136.456902,78.108432,43.883632,26.068911




R4 -> Optimal Policy for episode - 153:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,Taxi,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 154:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.718860,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,Taxi,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 155:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,Taxi,69.036577




R4 -> Optimal Policy for episode - 156:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,Taxi
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 157:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,Taxi,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 158:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,Taxi
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 159:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.203300,Pickup1,90.674462,47.446994,55.214400
1,46.932966,182.138509,85.238669,46.925166,64.575437
2,Taxi,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,39.369126,109.260468,66.161285,46.971501,Pickup2




R4 -> Optimal Policy for episode - 160:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,Taxi
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 161:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,Taxi,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 162:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,Taxi
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 163:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,Taxi
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 164:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,Taxi,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 165:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,Taxi,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 166:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,Taxi,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 167:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,Taxi,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 168:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,Taxi,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 169:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,Taxi,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 170:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.203300,Pickup1,90.674462,47.446994,55.214400
1,46.932966,182.138509,85.238669,46.925166,64.575437
2,44.812990,151.544655,76.898016,45.720768,77.306801
3,Taxi,126.997302,67.636996,46.513663,94.627266
4,39.369126,109.260468,66.161285,46.971501,Pickup2




R4 -> Optimal Policy for episode - 171:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,Taxi,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 172:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,Taxi,23.578512,13.522197




R4 -> Optimal Policy for episode - 173:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,Taxi




R4 -> Optimal Policy for episode - 174:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,Taxi,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 175:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.430843,69.482578,52.41298,Pickup2,138.543684
1,65.064169,Taxi,60.306932,197.809772,132.854667
2,145.559645,99.476453,70.122339,166.675334,123.943436
3,168.563434,106.000156,70.476764,141.373933,113.507935
4,Pickup1,109.834807,70.578182,122.855771,103.025876




R4 -> Optimal Policy for episode - 176:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,Taxi,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 177:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,Taxi
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 178:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,Taxi,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 179:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,Taxi,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 180:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.068927,Pickup2,113.165964
1,0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,115.424107,133.481447
3,-0.149726,1.294357,13.002945,100.701833,155.908685
4,Taxi,1.437771,13.896039,91.060312,Pickup1




R4 -> Optimal Policy for episode - 181:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,Taxi,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 182:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,Taxi
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 183:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,49.304559,28.110986,46.035137
1,30.688680,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 184:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.2033,Pickup1,90.674462,47.446994,55.214400
1,46.932966,182.138509,85.238669,46.925166,64.575437
2,44.81299,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,39.369126,109.260468,66.161285,Taxi,Pickup2




R4 -> Optimal Policy for episode - 185:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 186:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,Taxi,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 187:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,Taxi,66.96289




R4 -> Optimal Policy for episode - 188:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,Taxi,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 189:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,Taxi
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 190:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,Taxi,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 191:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,Taxi,34.228066,19.790236




R4 -> Optimal Policy for episode - 192:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,Taxi,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 193:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,Taxi,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 194:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,Taxi,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 195:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 196:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,Taxi,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 197:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,Taxi,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 198:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,Taxi,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 199:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,Taxi,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 200:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,Taxi,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 201:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,Taxi,82.322978,68.715045




R4 -> Optimal Policy for episode - 202:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,Taxi,Pickup1




R4 -> Optimal Policy for episode - 203:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,Taxi
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 204:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,Taxi,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 205:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,Taxi,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 206:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,Taxi,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 207:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,Taxi,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 208:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,Taxi,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 209:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,Taxi,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 210:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,Taxi,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 211:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,Taxi
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 212:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,Taxi
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 213:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,Taxi,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 214:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,Taxi,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 215:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,Taxi,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 216:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,Taxi,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 217:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,Taxi
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 218:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,Taxi,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 219:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,Taxi,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 220:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,Taxi,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 221:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 222:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.86227,60.604278
1,97.754167,123.233892,75.826565,53.30909,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,Taxi,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 223:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,Taxi,34.228066,19.790236




R4 -> Optimal Policy for episode - 224:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,Taxi,23.578512,13.522197




R4 -> Optimal Policy for episode - 225:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,Taxi,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 226:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,Taxi,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 227:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,Taxi,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 228:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,Taxi,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 229:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,Taxi,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 230:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,Taxi,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 231:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,Taxi,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 232:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,Taxi,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 233:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,Taxi,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 234:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,Taxi,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 235:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,Taxi,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 236:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 237:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.430843,Taxi,52.41298,Pickup2,138.543684
1,65.064169,82.809400,60.306932,197.809772,132.854667
2,145.559645,99.476453,70.122339,166.675334,123.943436
3,168.563434,106.000156,70.476764,141.373933,113.507935
4,Pickup1,109.834807,70.578182,122.855771,103.025876




R4 -> Optimal Policy for episode - 238:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,Taxi
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 239:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,Taxi
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 240:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,Taxi,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 241:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,Taxi,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 242:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,Taxi,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 243:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,Taxi,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 244:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,Taxi,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 245:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.718860,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,Taxi,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 246:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,Taxi,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 247:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.430843,69.482578,52.41298,Pickup2,138.543684
1,65.064169,Taxi,60.306932,197.809772,132.854667
2,145.559645,99.476453,70.122339,166.675334,123.943436
3,168.563434,106.000156,70.476764,141.373933,113.507935
4,Pickup1,109.834807,70.578182,122.855771,103.025876




R4 -> Optimal Policy for episode - 248:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,Taxi
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 249:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,Taxi
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 250:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,Taxi,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 251:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,Taxi




R4 -> Optimal Policy for episode - 252:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,Taxi,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 253:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,Taxi,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 254:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.430843,69.482578,52.41298,Pickup2,Taxi
1,65.064169,82.8094,60.306932,197.809772,132.854667
2,145.559645,99.476453,70.122339,166.675334,123.943436
3,168.563434,106.000156,70.476764,141.373933,113.507935
4,Pickup1,109.834807,70.578182,122.855771,103.025876




R4 -> Optimal Policy for episode - 255:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 256:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,Taxi,Pickup1




R4 -> Optimal Policy for episode - 257:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,Taxi,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 258:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,Taxi
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 259:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,Taxi,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 260:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,Taxi,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 261:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,Taxi,Pickup1




R4 -> Optimal Policy for episode - 262:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,Taxi
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 263:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,Taxi,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 264:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,Taxi,13.522197




R4 -> Optimal Policy for episode - 265:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.688680,119.645086,45.736145,29.420721,56.152625
2,Taxi,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 266:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,Taxi,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 267:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,Taxi,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 268:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,Taxi,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 269:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.203300,Pickup1,90.674462,47.446994,55.214400
1,46.932966,182.138509,85.238669,46.925166,64.575437
2,44.812990,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,Taxi,109.260468,66.161285,46.971501,Pickup2




R4 -> Optimal Policy for episode - 270:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,Taxi,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 271:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,Taxi




R4 -> Optimal Policy for episode - 272:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.348700,108.410339,80.890192
3,-0.163008,1.074303,Taxi,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 273:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,Taxi
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 274:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 275:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,Taxi,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 276:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,Taxi,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 277:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,Taxi,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 278:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,Taxi,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 279:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,Taxi




R4 -> Optimal Policy for episode - 280:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,Taxi
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 281:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,Taxi,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 282:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.86227,60.604278
1,97.754167,123.233892,75.826565,53.30909,Taxi
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 283:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,Taxi,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 284:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,Taxi
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 285:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,Taxi
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 286:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,Taxi,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 287:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,Taxi,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 288:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,Taxi
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 289:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,Taxi,34.185877,Pickup2




R4 -> Optimal Policy for episode - 290:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,Taxi,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 291:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,Taxi,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 292:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,Taxi
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 293:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,Taxi,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 294:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,Taxi,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 295:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,Taxi,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 296:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,Taxi
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 297:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,Taxi,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 298:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-0.068927,Pickup2,113.165964
1,0.0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,115.424107,133.481447
3,-0.149726,1.294357,13.002945,100.701833,155.908685
4,-0.196035,Taxi,13.896039,91.060312,Pickup1




R4 -> Optimal Policy for episode - 299:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.2033,Pickup1,90.674462,Taxi,55.214400
1,46.932966,182.138509,85.238669,46.925166,64.575437
2,44.81299,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,39.369126,109.260468,66.161285,46.971501,Pickup2




R4 -> Optimal Policy for episode - 300:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,Taxi,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 301:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 302:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,Taxi,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 303:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,Taxi
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 304:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,Taxi,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 305:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,Taxi,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 306:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,Taxi,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 307:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,Taxi,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 308:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,Taxi,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 309:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.862270,60.604278
1,97.754167,123.233892,75.826565,53.309090,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,92.482577,Taxi,Pickup1




R4 -> Optimal Policy for episode - 310:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,Taxi,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 311:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,Taxi
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 312:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,Taxi,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 313:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,Taxi,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 314:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,Taxi,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 315:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,Taxi,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 316:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.553248,Pickup1,49.685466,31.54913,62.669241
1,30.696452,119.688790,46.135958,33.419992,77.736214
2,29.27604,99.329006,47.880669,36.125636,97.685713
3,27.52354,83.217543,44.978743,38.717843,123.903572
4,25.657834,71.577093,Taxi,39.600523,Pickup2




R4 -> Optimal Policy for episode - 317:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.269420,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,Taxi,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 318:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,Taxi
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 319:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.86227,60.604278
1,97.754167,123.233892,75.826565,53.30909,71.708829
2,Taxi,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 320:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,Taxi,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 321:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,Taxi,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 322:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.553248,Pickup1,49.685466,31.54913,62.669241
1,30.696452,119.688790,46.135958,33.419992,77.736214
2,29.27604,99.329006,47.880669,36.125636,97.685713
3,27.52354,83.217543,44.978743,38.717843,Taxi
4,25.657834,71.577093,44.049447,39.600523,Pickup2




R4 -> Optimal Policy for episode - 323:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,Taxi
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 324:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,Taxi,81.65748,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.84803,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 325:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,Taxi,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 326:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,Taxi,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 327:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,Taxi,19.43144




R4 -> Optimal Policy for episode - 328:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,Taxi




R4 -> Optimal Policy for episode - 329:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,Taxi
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 330:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,Taxi,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 331:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,Taxi,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 332:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,Taxi
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 333:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,Taxi,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 334:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,Taxi,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 335:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,Taxi
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 336:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 337:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,Taxi,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 338:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,Taxi
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 339:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,Taxi,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 340:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,Taxi,Pickup1




R4 -> Optimal Policy for episode - 341:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,Taxi,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 342:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,Taxi




R4 -> Optimal Policy for episode - 343:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,Taxi,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 344:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,Taxi,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 345:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,Taxi,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 346:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,Taxi,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 347:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,Taxi,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 348:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,Taxi
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 349:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.718860,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,Taxi,12.76418,Pickup1




R4 -> Optimal Policy for episode - 350:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,Taxi
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 351:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-0.653572,Pickup2,96.384091
1,0.0,Taxi,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 352:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,Taxi
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 353:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,Taxi
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 354:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,Taxi
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 355:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.232930
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.573330
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,Taxi




R4 -> Optimal Policy for episode - 356:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,Taxi
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 357:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,49.304559,28.110986,46.035137
1,30.688680,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 358:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,Taxi,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 359:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 360:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,Taxi
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 361:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,Taxi,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 362:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,Taxi,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 363:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.688680,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,Taxi,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 364:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,Taxi,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 365:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,Taxi
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 366:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,Taxi,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 367:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,80.537793,Pickup1,94.323416,46.383045,26.960210
1,86.024375,189.343150,88.900079,45.587051,Taxi
2,153.227124,162.030179,81.791483,43.980844,26.510472
3,174.967463,145.008830,78.795012,43.985099,26.265538
4,Pickup2,136.456902,78.108432,43.883632,26.068911




R4 -> Optimal Policy for episode - 368:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,Taxi
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 369:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.65748,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,Taxi,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.84803,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 370:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,Taxi,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 371:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,Taxi,82.322978,68.715045




R4 -> Optimal Policy for episode - 372:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.23307,141.801282,113.978739
4,26.109623,73.893449,54.96944,Taxi,103.413545




R4 -> Optimal Policy for episode - 373:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,Taxi,23.578512,13.522197




R4 -> Optimal Policy for episode - 374:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,Taxi,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 375:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,Taxi,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 376:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,Taxi,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 377:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,Taxi,68.715045




R4 -> Optimal Policy for episode - 378:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,Taxi
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 379:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,Taxi,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 380:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,Taxi
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 381:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,Taxi,23.578512,13.522197




R4 -> Optimal Policy for episode - 382:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,Taxi,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 383:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 384:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,Taxi,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 385:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,Taxi,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 386:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,Taxi,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 387:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,Taxi,Pickup1




R4 -> Optimal Policy for episode - 388:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.203300,Pickup1,90.674462,47.446994,55.214400
1,Taxi,182.138509,85.238669,46.925166,64.575437
2,44.812990,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,39.369126,109.260468,66.161285,46.971501,Pickup2




R4 -> Optimal Policy for episode - 389:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 390:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,Taxi,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 391:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,Taxi,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 392:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,Taxi
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 393:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,Taxi
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 394:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,Taxi,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 395:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,Taxi,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 396:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.203300,Pickup1,90.674462,47.446994,55.214400
1,46.932966,182.138509,85.238669,46.925166,64.575437
2,44.812990,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,Taxi,109.260468,66.161285,46.971501,Pickup2




R4 -> Optimal Policy for episode - 397:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,Taxi,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 398:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.553248,Pickup1,49.685466,31.54913,62.669241
1,30.696452,119.688790,46.135958,33.419992,77.736214
2,29.27604,Taxi,47.880669,36.125636,97.685713
3,27.52354,83.217543,44.978743,38.717843,123.903572
4,25.657834,71.577093,44.049447,39.600523,Pickup2




R4 -> Optimal Policy for episode - 399:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,Taxi,Pickup1




R4 -> Optimal Policy for episode - 400:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,Taxi
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 401:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,Taxi
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 402:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,Taxi,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 403:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 404:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,Taxi,68.715045




R4 -> Optimal Policy for episode - 405:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,Taxi,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 406:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,Taxi,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 407:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,Taxi,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 408:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,103.842143,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 409:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 410:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,Taxi,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 411:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,Taxi




R4 -> Optimal Policy for episode - 412:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,Taxi
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 413:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,Taxi,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 414:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,Taxi
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 415:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,Taxi,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 416:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,Taxi,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 417:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,Taxi,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 418:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.553248,Pickup1,49.685466,31.54913,62.669241
1,30.696452,119.688790,46.135958,33.419992,77.736214
2,29.276040,99.329006,47.880669,36.125636,97.685713
3,Taxi,83.217543,44.978743,38.717843,123.903572
4,25.657834,71.577093,44.049447,39.600523,Pickup2




R4 -> Optimal Policy for episode - 419:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,Taxi,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 420:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,Taxi,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 421:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.203300,Pickup1,90.674462,47.446994,55.214400
1,Taxi,182.138509,85.238669,46.925166,64.575437
2,44.812990,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,39.369126,109.260468,66.161285,46.971501,Pickup2




R4 -> Optimal Policy for episode - 422:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,Taxi,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 423:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,Taxi




R4 -> Optimal Policy for episode - 424:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 425:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,Taxi,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 426:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,Taxi,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 427:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,Taxi,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 428:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,Taxi,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 429:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.430843,69.482578,52.41298,Pickup2,138.543684
1,65.064169,82.8094,60.306932,197.809772,132.854667
2,145.559645,99.476453,70.122339,166.675334,123.943436
3,Taxi,106.000156,70.476764,141.373933,113.507935
4,Pickup1,109.834807,70.578182,122.855771,103.025876




R4 -> Optimal Policy for episode - 430:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.86227,60.604278
1,97.754167,123.233892,75.826565,53.30909,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,Taxi,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 431:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,Taxi,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 432:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,Taxi,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 433:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,Taxi,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 434:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,Taxi,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 435:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,Taxi,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 436:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,Taxi
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 437:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,Taxi
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 438:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,Taxi,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 439:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.068927,Pickup2,113.165964
1,0.0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,115.424107,133.481447
3,-0.149726,1.294357,Taxi,100.701833,155.908685
4,-0.196035,1.437771,13.896039,91.060312,Pickup1




R4 -> Optimal Policy for episode - 440:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,Taxi,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 441:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,Taxi
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 442:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,Taxi,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 443:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,Taxi,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 444:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,Taxi,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 445:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,Taxi
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 446:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.348700,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,Taxi,79.271361,66.96289




R4 -> Optimal Policy for episode - 447:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,Taxi,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 448:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,Taxi,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 449:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,Taxi,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 450:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,Taxi,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 451:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,Taxi,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 452:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,Taxi,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 453:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,Taxi,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 454:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.269420,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.044770,94.929996,76.076561
4,25.929107,72.915738,Taxi,82.473170,69.036577




R4 -> Optimal Policy for episode - 455:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,Taxi,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 456:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,Taxi,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 457:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,Taxi,Pickup1




R4 -> Optimal Policy for episode - 458:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,Taxi
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 459:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,Taxi
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 460:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,Taxi,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 461:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,Taxi,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 462:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.86227,60.604278
1,Taxi,123.233892,75.826565,53.30909,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 463:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,Taxi,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 464:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,Taxi,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 465:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,Taxi
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 466:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,Taxi
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 467:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,Taxi,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 468:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,Taxi,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 469:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,Taxi,Pickup1




R4 -> Optimal Policy for episode - 470:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.86227,60.604278
1,97.754167,123.233892,75.826565,53.30909,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,60.952708,103.306512
4,Pickup2,Taxi,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 471:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,Taxi,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 472:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 473:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.348700,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,Taxi,79.271361,66.96289




R4 -> Optimal Policy for episode - 474:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,Taxi
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 475:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.23307,141.801282,113.978739
4,26.109623,Taxi,54.96944,123.096978,103.413545




R4 -> Optimal Policy for episode - 476:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,Taxi,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 477:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,Taxi
3,27.963495,85.530115,56.23307,141.801282,113.978739
4,26.109623,73.893449,54.96944,123.096978,103.413545




R4 -> Optimal Policy for episode - 478:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,Taxi
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 479:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,Taxi,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 480:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,Taxi
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 481:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,Taxi,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 482:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,Taxi,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 483:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,Taxi,66.96289




R4 -> Optimal Policy for episode - 484:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.688680,119.645086,45.736145,29.420721,56.152625
2,Taxi,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 485:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,Taxi,34.185877,Pickup2




R4 -> Optimal Policy for episode - 486:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,Taxi,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 487:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,Taxi,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 488:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.657480,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,Taxi,61.660276,48.768065,129.938081
4,Pickup2,108.848030,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 489:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,Taxi,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 490:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,Taxi,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 491:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,Taxi,69.036577




R4 -> Optimal Policy for episode - 492:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.553248,Pickup1,49.685466,31.549130,62.669241
1,30.696452,119.688790,46.135958,33.419992,77.736214
2,29.27604,99.329006,47.880669,36.125636,97.685713
3,27.52354,83.217543,44.978743,Taxi,123.903572
4,25.657834,71.577093,44.049447,39.600523,Pickup2




R4 -> Optimal Policy for episode - 493:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.068927,Pickup2,113.165964
1,0.0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,115.424107,133.481447
3,-0.149726,1.294357,13.002945,100.701833,155.908685
4,-0.196035,1.437771,13.896039,91.060312,Pickup1




R4 -> Optimal Policy for episode - 494:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 495:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,Taxi,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 496:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,Taxi,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 497:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,Taxi,Pickup2




R4 -> Optimal Policy for episode - 498:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,Taxi,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 499:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,Taxi,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 500:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,Taxi




R4 -> Optimal Policy for episode - 501:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,Taxi,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 502:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,Taxi,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 503:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,Taxi,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 504:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,253.048695,Taxi,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 505:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,Taxi,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 506:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,Taxi,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 507:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,80.537793,Pickup1,94.323416,46.383045,26.960210
1,86.024375,189.343150,88.900079,45.587051,26.839746
2,153.227124,162.030179,81.791483,43.980844,26.510472
3,174.967463,145.008830,78.795012,43.985099,26.265538
4,Pickup2,136.456902,78.108432,43.883632,Taxi




R4 -> Optimal Policy for episode - 508:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,Taxi,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 509:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,Taxi,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 510:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 511:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,Taxi
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 512:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,Taxi,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 513:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,Taxi,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 514:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,Taxi,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 515:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,Taxi,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 516:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 517:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,Taxi,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 518:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,Taxi,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 519:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,Taxi,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 520:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,Taxi
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 521:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,Taxi,33.512984,19.413184
4,Pickup1,108.628708,60.506390,33.620854,19.43144




R4 -> Optimal Policy for episode - 522:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,Taxi,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 523:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,Taxi,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 524:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,Taxi,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 525:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.718860,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,Taxi,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 526:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,Taxi,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 527:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,Taxi,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 528:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,Taxi
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 529:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,Taxi,19.43144




R4 -> Optimal Policy for episode - 530:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,Taxi
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 531:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,Taxi,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 532:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.688680,119.645086,45.736145,29.420721,56.152625
2,Taxi,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 533:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,Taxi,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 534:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,Taxi,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 535:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,Taxi,63.123016,47.86227,60.604278
1,97.754167,123.233892,75.826565,53.30909,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 536:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.150820,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,Taxi,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 537:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,Taxi,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 538:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,Taxi,66.96289




R4 -> Optimal Policy for episode - 539:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 540:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.269420,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,Taxi,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 541:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,Taxi,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 542:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,Taxi
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 543:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,Taxi,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 544:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 545:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,Taxi,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 546:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,Taxi,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 547:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,Taxi,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 548:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,Taxi,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 549:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,Taxi,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 550:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,Taxi,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 551:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.718860,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,Taxi,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 552:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,Taxi,82.322978,68.715045




R4 -> Optimal Policy for episode - 553:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 554:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,Taxi,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 555:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,Taxi,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 556:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,Taxi
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 557:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,Taxi




R4 -> Optimal Policy for episode - 558:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,Taxi
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 559:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,218.547348,Taxi,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 560:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,Taxi




R4 -> Optimal Policy for episode - 561:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,Taxi
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 562:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,Taxi,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 563:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 564:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,Taxi
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 565:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,Taxi
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 566:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,Taxi,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 567:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,Taxi,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 568:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,Taxi
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 569:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,Taxi,Pickup1




R4 -> Optimal Policy for episode - 570:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,Taxi




R4 -> Optimal Policy for episode - 571:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,Taxi,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 572:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,Taxi
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 573:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 574:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,Taxi,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 575:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,Taxi,69.036577




R4 -> Optimal Policy for episode - 576:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,Taxi




R4 -> Optimal Policy for episode - 577:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,Taxi,33.620854,19.43144




R4 -> Optimal Policy for episode - 578:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,Taxi,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 579:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,Taxi,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 580:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 581:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,Taxi




R4 -> Optimal Policy for episode - 582:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,Taxi
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 583:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,Taxi,Pickup1




R4 -> Optimal Policy for episode - 584:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,Taxi,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 585:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,Taxi,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 586:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,Taxi,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 587:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,Taxi,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 588:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,Taxi,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 589:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 590:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,Taxi,33.620854,19.43144




R4 -> Optimal Policy for episode - 591:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,Taxi,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 592:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.101247,0.488238,6.292824,Pickup2,136.927617
1,0.0,1.025096,10.620903,195.107667,131.308399
2,-0.06462,1.577359,18.040111,164.413396,131.017930
3,-0.103109,Taxi,18.307493,140.365767,142.671756
4,-0.128396,2.265851,19.471006,123.940011,Pickup1




R4 -> Optimal Policy for episode - 593:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,Taxi,34.185877,Pickup2




R4 -> Optimal Policy for episode - 594:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,Taxi,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 595:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,Taxi,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 596:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,Taxi,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 597:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,Taxi
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.573330
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 598:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,80.537793,Pickup1,94.323416,46.383045,26.96021
1,86.024375,189.343150,88.900079,45.587051,26.839746
2,153.227124,Taxi,81.791483,43.980844,26.510472
3,174.967463,145.008830,78.795012,43.985099,26.265538
4,Pickup2,136.456902,78.108432,43.883632,26.068911




R4 -> Optimal Policy for episode - 599:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,Taxi,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 600:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.068927,Pickup2,113.165964
1,0.0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,115.424107,133.481447
3,-0.149726,1.294357,13.002945,Taxi,155.908685
4,-0.196035,1.437771,13.896039,91.060312,Pickup1




R4 -> Optimal Policy for episode - 601:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,Taxi
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 602:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.657480,48.246968,40.536083,83.184057
2,145.335939,Taxi,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.848030,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 603:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,Taxi,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 604:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.65748,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,Taxi,48.768065,129.938081
4,Pickup2,108.84803,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 605:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,Taxi,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 606:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,Taxi,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 607:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,Taxi,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 608:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,Taxi,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 609:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,Taxi,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 610:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 611:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,Taxi,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 612:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,Taxi,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 613:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,Taxi
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 614:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,Taxi,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 615:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,Taxi,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 616:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.657480,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,Taxi,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 617:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,Taxi,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 618:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 619:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,Taxi,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 620:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,Taxi,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 621:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,Taxi,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 622:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,Taxi
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 623:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,Taxi,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 624:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.150820,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,Taxi,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 625:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,Taxi,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 626:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,Taxi,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 627:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,Taxi,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 628:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,Taxi,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 629:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,Taxi
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 630:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 631:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,Taxi,33.620854,19.43144




R4 -> Optimal Policy for episode - 632:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,Taxi,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 633:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 634:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,Taxi,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 635:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,Taxi,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 636:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,Taxi,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 637:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,Taxi,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 638:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.86227,60.604278
1,97.754167,123.233892,75.826565,53.30909,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,Taxi,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 639:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,Taxi,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 640:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,Taxi,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 641:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,Taxi,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 642:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,Taxi,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 643:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,Taxi,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 644:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,Taxi,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 645:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,Taxi,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 646:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,Taxi,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 647:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,Taxi,34.228066,19.790236




R4 -> Optimal Policy for episode - 648:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,Taxi,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 649:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,Taxi
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 650:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,Taxi,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 651:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,Taxi,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 652:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,Taxi




R4 -> Optimal Policy for episode - 653:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 654:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,Taxi,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 655:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,Taxi
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 656:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.23307,141.801282,Taxi
4,26.109623,73.893449,54.96944,123.096978,103.413545




R4 -> Optimal Policy for episode - 657:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,Taxi,13.522197




R4 -> Optimal Policy for episode - 658:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,Taxi,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 659:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.101247,Taxi,6.292824,Pickup2,136.927617
1,0.0,1.025096,10.620903,195.107667,131.308399
2,-0.06462,1.577359,18.040111,164.413396,131.017930
3,-0.103109,2.055178,18.307493,140.365767,142.671756
4,-0.128396,2.265851,19.471006,123.940011,Pickup1




R4 -> Optimal Policy for episode - 660:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,Taxi,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 661:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,Taxi,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 662:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,Taxi,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 663:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,Taxi
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.23307,141.801282,113.978739
4,26.109623,73.893449,54.96944,123.096978,103.413545




R4 -> Optimal Policy for episode - 664:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,Taxi,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 665:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,Taxi
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 666:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,Taxi,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 667:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,Taxi,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 668:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.65748,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,Taxi,129.938081
4,Pickup2,108.84803,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 669:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,Taxi




R4 -> Optimal Policy for episode - 670:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,Taxi,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 671:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,Taxi,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 672:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,Taxi,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 673:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,Taxi
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 674:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,Taxi,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 675:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 676:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 677:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,Taxi,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 678:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,Taxi
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 679:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,Taxi,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 680:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,Taxi,13.522197




R4 -> Optimal Policy for episode - 681:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,Taxi




R4 -> Optimal Policy for episode - 682:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 683:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,Taxi,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 684:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,Taxi,19.790236




R4 -> Optimal Policy for episode - 685:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,Taxi,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 686:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,Taxi,33.620854,19.43144




R4 -> Optimal Policy for episode - 687:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,Taxi,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 688:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.106800,121.907553,67.554235,198.728759,133.452287
2,Taxi,101.623068,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.23307,141.801282,113.978739
4,26.109623,73.893449,54.96944,123.096978,103.413545




R4 -> Optimal Policy for episode - 689:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.430843,69.482578,52.41298,Pickup2,138.543684
1,65.064169,Taxi,60.306932,197.809772,132.854667
2,145.559645,99.476453,70.122339,166.675334,123.943436
3,168.563434,106.000156,70.476764,141.373933,113.507935
4,Pickup1,109.834807,70.578182,122.855771,103.025876




R4 -> Optimal Policy for episode - 690:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,Taxi,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 691:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,Taxi,67.268405
1,64.809821,81.65748,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.84803,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 692:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,Taxi,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 693:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,Taxi,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 694:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,Taxi,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 695:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,Taxi,19.790236




R4 -> Optimal Policy for episode - 696:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,Taxi,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 697:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,Taxi
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 698:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.233070,141.801282,113.978739
4,26.109623,73.893449,Taxi,123.096978,103.413545




R4 -> Optimal Policy for episode - 699:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,Taxi,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 700:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,Taxi,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 701:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,Taxi,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 702:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,Taxi,37.657928,35.098762,67.268405
1,64.809821,81.657480,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.848030,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 703:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,Taxi
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 704:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,Taxi,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 705:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.348700,108.410339,80.890192
3,-0.163008,1.074303,Taxi,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 706:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 707:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,Taxi
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 708:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,Taxi
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 709:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.101247,0.488238,6.292824,Pickup2,136.927617
1,0.0,1.025096,10.620903,195.107667,Taxi
2,-0.06462,1.577359,18.040111,164.413396,131.017930
3,-0.103109,2.055178,18.307493,140.365767,142.671756
4,-0.128396,2.265851,19.471006,123.940011,Pickup1




R4 -> Optimal Policy for episode - 710:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,Taxi
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 711:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,Taxi
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 712:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,Taxi,34.185877,Pickup2




R4 -> Optimal Policy for episode - 713:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,Taxi,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 714:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,Taxi,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 715:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,Taxi,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 716:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,Taxi,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 717:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.718860,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,Taxi,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 718:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,Taxi
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 719:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,Taxi




R4 -> Optimal Policy for episode - 720:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,Taxi,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 721:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.150820,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,Taxi,44.466551,Pickup1




R4 -> Optimal Policy for episode - 722:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,Taxi,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 723:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.688680,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,Taxi,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 724:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,Taxi,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 725:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,92.820734,Pickup1,72.977174,42.812832,27.649014
1,103.031934,142.257746,77.614576,45.693855,28.59528
2,218.487168,148.127999,88.150059,Taxi,29.295483
3,252.940416,157.670652,90.329694,50.782818,29.623299
4,Pickup2,163.443354,91.260965,50.932411,29.647909




R4 -> Optimal Policy for episode - 726:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,Taxi,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 727:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,Taxi,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 728:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,Taxi,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 729:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,Taxi




R4 -> Optimal Policy for episode - 730:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.23307,141.801282,113.978739
4,26.109623,73.893449,54.96944,Taxi,103.413545




R4 -> Optimal Policy for episode - 731:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,Taxi,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 732:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.86227,60.604278
1,97.754167,123.233892,75.826565,53.30909,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,Taxi,61.778079,Pickup1




R4 -> Optimal Policy for episode - 733:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,Taxi




R4 -> Optimal Policy for episode - 734:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,Taxi,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 735:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,Taxi
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 736:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,Taxi,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 737:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,Taxi,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 738:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,Taxi,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 739:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,Taxi,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 740:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,Taxi,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 741:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,Taxi
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 742:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.23307,141.801282,113.978739
4,26.109623,73.893449,54.96944,Taxi,103.413545




R4 -> Optimal Policy for episode - 743:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,Taxi,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 744:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.068927,Pickup2,113.165964
1,0.0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,115.424107,133.481447
3,-0.149726,1.294357,13.002945,100.701833,155.908685
4,-0.196035,1.437771,13.896039,Taxi,Pickup1




R4 -> Optimal Policy for episode - 745:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,Taxi
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 746:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,Taxi,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 747:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,Taxi,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 748:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.65748,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.84803,Taxi,49.889399,Pickup1




R4 -> Optimal Policy for episode - 749:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.65748,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,Taxi
4,Pickup2,108.84803,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 750:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,Taxi,66.96289




R4 -> Optimal Policy for episode - 751:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 752:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,Taxi,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 753:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 754:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,Taxi,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 755:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,Taxi,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 756:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,Taxi,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 757:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,Taxi,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 758:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,Taxi,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 759:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.1068,Taxi,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.23307,141.801282,113.978739
4,26.109623,73.893449,54.96944,123.096978,103.413545




R4 -> Optimal Policy for episode - 760:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 761:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,Taxi,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 762:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,92.820734,Pickup1,72.977174,42.812832,27.649014
1,103.031934,142.257746,77.614576,45.693855,28.59528
2,218.487168,148.127999,88.150059,48.729943,29.295483
3,252.940416,157.670652,90.329694,Taxi,29.623299
4,Pickup2,163.443354,91.260965,50.932411,29.647909




R4 -> Optimal Policy for episode - 763:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,Taxi
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 764:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,Taxi,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 765:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,Taxi,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 766:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.553248,Pickup1,49.685466,31.54913,62.669241
1,Taxi,119.688790,46.135958,33.419992,77.736214
2,29.276040,99.329006,47.880669,36.125636,97.685713
3,27.523540,83.217543,44.978743,38.717843,123.903572
4,25.657834,71.577093,44.049447,39.600523,Pickup2




R4 -> Optimal Policy for episode - 767:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,Taxi,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 768:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.2033,Pickup1,90.674462,47.446994,55.214400
1,46.932966,182.138509,85.238669,46.925166,64.575437
2,44.81299,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,39.369126,109.260468,66.161285,Taxi,Pickup2




R4 -> Optimal Policy for episode - 769:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,Taxi,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 770:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.65748,48.246968,40.536083,83.184057
2,Taxi,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.84803,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 771:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,Taxi,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 772:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,Taxi,Pickup1




R4 -> Optimal Policy for episode - 773:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,Taxi,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 774:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,Taxi,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 775:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,Taxi,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 776:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,Taxi,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 777:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,Taxi,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 778:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,Taxi,34.228066,19.790236




R4 -> Optimal Policy for episode - 779:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,Taxi,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 780:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,Taxi,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 781:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,Taxi,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 782:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,Taxi,67.268405
1,64.809821,81.65748,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.84803,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 783:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,Taxi,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 784:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,Taxi




R4 -> Optimal Policy for episode - 785:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,80.537793,Pickup1,94.323416,46.383045,26.960210
1,86.024375,189.343150,88.900079,45.587051,26.839746
2,153.227124,162.030179,81.791483,43.980844,Taxi
3,174.967463,145.008830,78.795012,43.985099,26.265538
4,Pickup2,136.456902,78.108432,43.883632,26.068911




R4 -> Optimal Policy for episode - 786:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,Taxi,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 787:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,Taxi
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 788:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.269420,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.044770,94.929996,76.076561
4,25.929107,72.915738,Taxi,82.473170,69.036577




R4 -> Optimal Policy for episode - 789:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,Taxi
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 790:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.430843,69.482578,52.412980,Pickup2,138.543684
1,65.064169,82.8094,60.306932,197.809772,132.854667
2,145.559645,99.476453,70.122339,166.675334,123.943436
3,168.563434,106.000156,Taxi,141.373933,113.507935
4,Pickup1,109.834807,70.578182,122.855771,103.025876




R4 -> Optimal Policy for episode - 791:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,Taxi,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 792:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,Taxi,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 793:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,Taxi,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 794:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.269420,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.044770,94.929996,76.076561
4,25.929107,72.915738,Taxi,82.473170,69.036577




R4 -> Optimal Policy for episode - 795:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,Taxi,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 796:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 797:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 798:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,Taxi,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 799:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,Taxi
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 800:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,Taxi,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 801:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,Taxi,66.96289




R4 -> Optimal Policy for episode - 802:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-0.653572,Pickup2,96.384091
1,0.0,Taxi,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 803:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,Taxi,34.228066,19.790236




R4 -> Optimal Policy for episode - 804:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,Taxi,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 805:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,Taxi,82.322978,68.715045




R4 -> Optimal Policy for episode - 806:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,Taxi
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 807:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,Taxi,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 808:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,Taxi,60.604278
1,97.754167,123.233892,75.826565,53.309090,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 809:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,Taxi,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 810:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,Taxi,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 811:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,Taxi,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 812:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,Taxi,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 813:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,Taxi,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 814:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,Taxi,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 815:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 816:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,Taxi,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 817:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,Taxi,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 818:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,Taxi,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 819:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.553248,Pickup1,49.685466,31.54913,62.669241
1,30.696452,119.688790,46.135958,33.419992,77.736214
2,29.276040,99.329006,47.880669,36.125636,97.685713
3,27.523540,83.217543,44.978743,38.717843,123.903572
4,Taxi,71.577093,44.049447,39.600523,Pickup2




R4 -> Optimal Policy for episode - 820:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,Taxi,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 821:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,Taxi
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 822:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,68.181456,37.657928,35.098762,67.268405
1,64.809821,81.65748,48.246968,Taxi,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.84803,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 823:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,Taxi,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 824:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,Taxi
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 825:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,Taxi,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 826:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,Taxi
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 827:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,Taxi,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 828:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.101247,0.488238,6.292824,Pickup2,136.927617
1,0.0,1.025096,10.620903,195.107667,131.308399
2,-0.06462,1.577359,18.040111,164.413396,131.017930
3,-0.103109,2.055178,18.307493,140.365767,142.671756
4,-0.128396,Taxi,19.471006,123.940011,Pickup1




R4 -> Optimal Policy for episode - 829:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,Taxi
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 830:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,Taxi,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 831:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,Taxi,34.185877,Pickup2




R4 -> Optimal Policy for episode - 832:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,Taxi
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 833:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,Taxi
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 834:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,Taxi,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 835:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,Taxi,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 836:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 837:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,Taxi,85.096285,Pickup1




R4 -> Optimal Policy for episode - 838:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.101247,0.488238,6.292824,Pickup2,136.927617
1,0,1.025096,10.620903,195.107667,131.308399
2,-0.064620,1.577359,18.040111,164.413396,131.017930
3,Taxi,2.055178,18.307493,140.365767,142.671756
4,-0.128396,2.265851,19.471006,123.940011,Pickup1




R4 -> Optimal Policy for episode - 839:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,Taxi
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 840:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,Taxi,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 841:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,Taxi
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 842:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 843:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,Taxi,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 844:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,92.820734,Pickup1,72.977174,42.812832,27.649014
1,103.031934,142.257746,77.614576,45.693855,28.59528
2,218.487168,148.127999,88.150059,48.729943,29.295483
3,252.940416,Taxi,90.329694,50.782818,29.623299
4,Pickup2,163.443354,91.260965,50.932411,29.647909




R4 -> Optimal Policy for episode - 845:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,Taxi,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 846:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,Taxi




R4 -> Optimal Policy for episode - 847:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,Taxi,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 848:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,Taxi,Pickup1




R4 -> Optimal Policy for episode - 849:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,Taxi,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 850:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,Taxi,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 851:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,218.547348,Taxi,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 852:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,Taxi,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 853:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,Taxi,66.96289




R4 -> Optimal Policy for episode - 854:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,Taxi,85.096285,Pickup1




R4 -> Optimal Policy for episode - 855:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,Taxi
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 856:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,139.160553
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,Taxi,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.23307,141.801282,113.978739
4,26.109623,73.893449,54.96944,123.096978,103.413545




R4 -> Optimal Policy for episode - 857:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 858:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,Taxi,23.578512,13.522197




R4 -> Optimal Policy for episode - 859:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.553248,Pickup1,49.685466,31.54913,62.669241
1,30.696452,119.688790,46.135958,33.419992,77.736214
2,29.27604,99.329006,47.880669,36.125636,97.685713
3,27.52354,83.217543,Taxi,38.717843,123.903572
4,25.657834,71.577093,44.049447,39.600523,Pickup2




R4 -> Optimal Policy for episode - 860:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,80.537793,Pickup1,94.323416,46.383045,26.96021
1,86.024375,189.343150,88.900079,45.587051,26.839746
2,153.227124,162.030179,81.791483,43.980844,26.510472
3,174.967463,145.008830,78.795012,43.985099,26.265538
4,Pickup2,136.456902,Taxi,43.883632,26.068911




R4 -> Optimal Policy for episode - 861:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.348700,108.410339,80.890192
3,-0.163008,1.074303,Taxi,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 862:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,Taxi,34.228066,19.790236




R4 -> Optimal Policy for episode - 863:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,Taxi,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 864:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.553248,Pickup1,49.685466,31.54913,62.669241
1,30.696452,119.688790,46.135958,33.419992,77.736214
2,Taxi,99.329006,47.880669,36.125636,97.685713
3,27.523540,83.217543,44.978743,38.717843,123.903572
4,25.657834,71.577093,44.049447,39.600523,Pickup2




R4 -> Optimal Policy for episode - 865:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,Taxi,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.350890,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 866:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,Taxi,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 867:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,Taxi,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 868:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,Taxi,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 869:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.688680,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,Taxi,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 870:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.101247,0.488238,6.292824,Pickup2,136.927617
1,0.0,1.025096,10.620903,195.107667,131.308399
2,-0.06462,Taxi,18.040111,164.413396,131.017930
3,-0.103109,2.055178,18.307493,140.365767,142.671756
4,-0.128396,2.265851,19.471006,123.940011,Pickup1




R4 -> Optimal Policy for episode - 871:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,Taxi,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 872:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,Taxi,84.647714,70.373403




R4 -> Optimal Policy for episode - 873:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,Taxi,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 874:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,Taxi,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 875:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.718860,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,Taxi,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 876:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.2033,Pickup1,90.674462,47.446994,55.214400
1,46.932966,182.138509,85.238669,46.925166,Taxi
2,44.81299,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,39.369126,109.260468,66.161285,46.971501,Pickup2




R4 -> Optimal Policy for episode - 877:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,Taxi
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 878:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,Taxi




R4 -> Optimal Policy for episode - 879:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,Taxi
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 880:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,Taxi,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 881:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.269420,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,Taxi,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 882:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,Taxi,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 883:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 884:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,84.647714,70.373403




R4 -> Optimal Policy for episode - 885:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,Taxi
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 886:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,Taxi,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 887:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,Taxi,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 888:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.348700,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,Taxi,79.271361,66.96289




R4 -> Optimal Policy for episode - 889:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,Taxi,148.504791,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 890:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 891:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,Taxi
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 892:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,Taxi,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 893:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.068927,Pickup2,113.165964
1,0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,115.424107,133.481447
3,-0.149726,1.294357,13.002945,100.701833,155.908685
4,Taxi,1.437771,13.896039,91.060312,Pickup1




R4 -> Optimal Policy for episode - 894:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,Taxi,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 895:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,Taxi
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 896:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 897:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,Taxi,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 898:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 899:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,Taxi,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 900:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,Taxi,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 901:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,Taxi,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 902:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,Taxi,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,97.907948,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 903:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,Taxi
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 904:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,Taxi,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 905:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.348700,108.410339,80.890192
3,-0.163008,1.074303,Taxi,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 906:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,Taxi,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 907:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,Taxi,66.96289




R4 -> Optimal Policy for episode - 908:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,Taxi
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 909:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,Taxi
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 910:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,Taxi,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 911:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.977520,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,Taxi,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 912:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.170943,Taxi,37.657928,35.098762,67.268405
1,64.809821,81.657480,48.246968,40.536083,83.184057
2,145.335939,98.422843,60.041271,45.333483,103.565038
3,168.342160,104.996313,61.660276,48.768065,129.938081
4,Pickup2,108.848030,62.338875,49.889399,Pickup1




R4 -> Optimal Policy for episode - 913:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,Taxi,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 914:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,Taxi,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 915:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 916:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,Taxi
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 917:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.348700,108.410339,80.890192
3,-0.163008,1.074303,Taxi,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 918:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 919:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,Taxi,34.185877,Pickup2




R4 -> Optimal Policy for episode - 920:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,Taxi
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 921:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,Taxi,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 922:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,Taxi
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 923:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,Taxi,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 924:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,67.97752,36.136603,24.6696,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 925:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,Taxi,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 926:
Pick Up points :  [(4, 0), (4, 4)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,Taxi,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,145.319645,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 927:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,Taxi
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 928:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,Taxi,69.036577




R4 -> Optimal Policy for episode - 929:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.2033,Pickup1,90.674462,47.446994,55.214400
1,46.932966,182.138509,85.238669,Taxi,64.575437
2,44.81299,151.544655,76.898016,45.720768,77.306801
3,42.180295,126.997302,67.636996,46.513663,94.627266
4,39.369126,109.260468,66.161285,46.971501,Pickup2




R4 -> Optimal Policy for episode - 930:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,Taxi,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 931:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,Taxi
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.962890




R4 -> Optimal Policy for episode - 932:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 933:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.155244,68.113478,37.15082,31.622375,50.622927
1,64.794032,81.587169,47.700373,36.509772,61.592889
2,Taxi,98.35089,59.462297,40.749113,75.412669
3,168.325824,104.923499,61.059377,43.683038,93.096449
4,Pickup2,108.774923,61.728046,44.466551,Pickup1




R4 -> Optimal Policy for episode - 934:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.948095,Pickup1,72.251805,Pickup2,Taxi
1,31.1068,121.907553,67.554235,198.728759,133.452287
2,29.702076,101.623068,60.397163,167.344239,124.489152
3,27.963495,85.530115,56.23307,141.801282,113.978739
4,26.109623,73.893449,54.96944,123.096978,103.413545




R4 -> Optimal Policy for episode - 935:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,Taxi
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 936:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,Taxi,34.228066,19.790236




R4 -> Optimal Policy for episode - 937:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,Taxi,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 938:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,Taxi,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 939:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.718860,5.567613,32.815022
1,0.0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,Taxi,12.76418,Pickup1




R4 -> Optimal Policy for episode - 940:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 941:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,Taxi
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 942:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.408874,103.842143,70.259467,Pickup2,93.23293
1,97.851664,123.677958,81.118223,133.666672,89.453705
2,218.547348,148.504791,93.240457,113.236992,83.57333
3,253.048695,158.164121,94.709015,Taxi,76.892102
4,Pickup1,163.928131,95.308751,86.767176,70.287123




R4 -> Optimal Policy for episode - 943:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,Taxi
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 944:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.101247,0.488238,6.292824,Pickup2,136.927617
1,0.0,1.025096,10.620903,195.107667,131.308399
2,-0.06462,1.577359,18.040111,164.413396,131.017930
3,-0.103109,2.055178,Taxi,140.365767,142.671756
4,-0.128396,2.265851,19.471006,123.940011,Pickup1




R4 -> Optimal Policy for episode - 945:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,Taxi,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 946:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,Taxi,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 947:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,Taxi,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 948:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,Taxi,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 949:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.068927,Pickup2,113.165964
1,0.0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,Taxi,133.481447
3,-0.149726,1.294357,13.002945,100.701833,155.908685
4,-0.196035,1.437771,13.896039,91.060312,Pickup1




R4 -> Optimal Policy for episode - 950:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,Taxi
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 951:
Pick Up points :  [(0, 1), (0, 3)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,Taxi,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 952:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,Taxi,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 953:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,Taxi,69.036577




R4 -> Optimal Policy for episode - 954:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,Taxi,85.096285,Pickup1




R4 -> Optimal Policy for episode - 955:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,Taxi,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 956:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,Taxi,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 957:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.250540,99.156665,46.197079,22.522708,13.362443
3,Taxi,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 958:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,Taxi,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 959:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,Taxi,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 960:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,Taxi,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 961:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.669600,17.331972
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,Taxi,19.10793
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.43144




R4 -> Optimal Policy for episode - 962:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,Taxi,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 963:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,80.537793,Pickup1,94.323416,46.383045,26.96021
1,86.024375,189.343150,88.900079,45.587051,26.839746
2,Taxi,162.030179,81.791483,43.980844,26.510472
3,174.967463,145.008830,78.795012,43.985099,26.265538
4,Pickup2,136.456902,78.108432,43.883632,26.068911




R4 -> Optimal Policy for episode - 964:
Pick Up points :  [(4, 0), (0, 1)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,92.820734,Pickup1,72.977174,42.812832,27.649014
1,103.031934,Taxi,77.614576,45.693855,28.59528
2,218.487168,148.127999,88.150059,48.729943,29.295483
3,252.940416,157.670652,90.329694,50.782818,29.623299
4,Pickup2,163.443354,91.260965,50.932411,29.647909




R4 -> Optimal Policy for episode - 965:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.553248,Pickup1,49.685466,31.54913,62.669241
1,30.696452,119.688790,46.135958,33.419992,77.736214
2,29.27604,99.329006,47.880669,36.125636,97.685713
3,27.52354,83.217543,44.978743,38.717843,Taxi
4,25.657834,71.577093,44.049447,39.600523,Pickup2




R4 -> Optimal Policy for episode - 966:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,Taxi,46.035137
1,30.68868,119.645086,45.736145,29.420721,56.152625
2,29.26747,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.64695,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 967:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,Taxi,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 968:
Pick Up points :  [(4, 4), (0, 1)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,49.304559,28.110986,46.035137
1,30.688680,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,25.646950,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 969:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.101247,0.488238,6.292824,Pickup2,136.927617
1,0,1.025096,10.620903,195.107667,131.308399
2,-0.064620,1.577359,18.040111,164.413396,131.017930
3,Taxi,2.055178,18.307493,140.365767,142.671756
4,-0.128396,2.265851,19.471006,123.940011,Pickup1




R4 -> Optimal Policy for episode - 970:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,Taxi,68.715045




R4 -> Optimal Policy for episode - 971:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,Taxi,82.322978,68.715045




R4 -> Optimal Policy for episode - 972:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,29.288676,18.534011
1,74.401081,130.208196,56.760049,30.779163,Taxi
2,146.488785,117.019761,62.360386,32.701207,19.556280
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 973:
Pick Up points :  [(0, 3), (4, 0)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,Taxi




R4 -> Optimal Policy for episode - 974:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,Taxi,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 975:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0,-0.302113,-7.778377,6.198853,42.961035
2,Taxi,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 976:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.86227,60.604278
1,97.754167,123.233892,75.826565,53.30909,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,Taxi,61.778079,Pickup1




R4 -> Optimal Policy for episode - 977:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.101247,0.488238,6.292824,Pickup2,136.927617
1,0,1.025096,10.620903,195.107667,131.308399
2,-0.064620,1.577359,18.040111,164.413396,131.017930
3,Taxi,2.055178,18.307493,140.365767,142.671756
4,-0.128396,2.265851,19.471006,123.940011,Pickup1




R4 -> Optimal Policy for episode - 978:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.5312,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,29.25054,99.156665,46.197079,22.522708,13.362443
3,27.494805,Taxi,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 979:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,Taxi
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 980:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 981:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.068927,Pickup2,113.165964
1,0.0,0.344218,4.875437,134.954974,119.702382
2,-0.088932,0.866396,12.666199,Taxi,133.481447
3,-0.149726,1.294357,13.002945,100.701833,155.908685
4,-0.196035,1.437771,13.896039,91.060312,Pickup1




R4 -> Optimal Policy for episode - 982:
Pick Up points :  [(0, 1)]
{(0, 1)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.531200,Pickup1,48.550972,21.308604,12.855466
1,30.673316,119.558723,44.945519,21.518895,13.089208
2,Taxi,99.156665,46.197079,22.522708,13.362443
3,27.494805,83.009065,43.230913,23.653547,13.527906
4,25.625484,71.342034,42.283009,23.578512,13.522197




R4 -> Optimal Policy for episode - 983:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,64.931539,82.211609,54.579964,Taxi,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 984:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,92.820734,Pickup1,72.977174,Taxi,27.649014
1,103.031934,142.257746,77.614576,45.693855,28.59528
2,218.487168,148.127999,88.150059,48.729943,29.295483
3,252.940416,157.670652,90.329694,50.782818,29.623299
4,Pickup2,163.443354,91.260965,50.932411,29.647909




R4 -> Optimal Policy for episode - 985:
Pick Up points :  [(4, 4), (4, 0)]
{(4, 4), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,86.307721,103.298405,63.123016,47.86227,60.604278
1,Taxi,123.233892,75.826565,53.30909,71.708829
2,218.469730,148.124636,89.265413,57.889224,85.598271
3,252.972596,157.815094,91.530978,60.952708,103.306512
4,Pickup2,163.589514,92.482577,61.778079,Pickup1




R4 -> Optimal Policy for episode - 986:
Pick Up points :  [(0, 3), (4, 4)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,Taxi,-0.653572,Pickup2,96.384091
1,0.0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 987:
Pick Up points :  [(4, 0)]
{(4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.123846,67.97752,36.136603,24.6696,Taxi
1,64.762455,81.446548,46.607184,28.457149,18.410554
2,145.287056,98.206985,58.304349,31.580375,19.107930
3,168.293152,104.777873,59.857579,33.512984,19.413184
4,Pickup1,108.628708,60.50639,33.620854,19.431440




R4 -> Optimal Policy for episode - 988:
Pick Up points :  [(4, 0), (0, 3)]
{(4, 0), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,57.296550,68.840399,45.891897,Pickup2,92.324382
1,Taxi,82.211609,54.579964,132.074122,88.536881
2,145.436261,98.908214,64.982411,111.383251,82.609772
3,168.440800,105.447188,65.720577,94.603109,75.676104
4,Pickup1,109.287251,66.005193,82.322978,68.715045




R4 -> Optimal Policy for episode - 989:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,Taxi,0.0,-1.056112,Pickup1,90.622212
1,0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,1.196706,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 990:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,0,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,Taxi,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 991:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,48.425656,Pickup1,103.126169,Pickup2,94.25981
1,47.164521,183.400591,97.005245,135.204294,90.449543
2,45.058355,152.983575,87.615878,114.213688,84.460409
3,42.440327,128.523555,76.821295,97.106267,77.431513
4,39.642445,110.814986,74.140355,Taxi,70.373403




R4 -> Optimal Policy for episode - 992:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,72.915738,49.886511,82.473170,Taxi




R4 -> Optimal Policy for episode - 993:
Pick Up points :  [(0, 1), (4, 4)]
{(0, 1), (4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.545841,Pickup1,49.304559,28.110986,46.035137
1,30.688680,119.645086,45.736145,29.420721,56.152625
2,29.267470,99.271019,47.313034,31.548779,69.538046
3,27.513878,83.147333,44.389031,33.640524,87.066704
4,Taxi,71.497886,43.453215,34.185877,Pickup2




R4 -> Optimal Policy for episode - 994:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-0.653572,Pickup2,96.384091
1,Taxi,0.249026,4.221366,130.947302,97.993771
2,-0.092559,0.760348,11.942958,110.978767,105.168296
3,-0.156789,1.177676,12.204007,95.568564,118.823544
4,-0.206357,1.310144,13.022303,85.096285,Pickup1




R4 -> Optimal Policy for episode - 995:
Pick Up points :  [(0, 3), (0, 1)]
{(0, 1), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,31.795152,Pickup1,63.609344,Pickup2,92.864901
1,30.947679,121.043281,59.26942,132.862904,89.058044
2,29.535663,100.698342,54.723349,111.937961,83.081352
3,27.789795,84.572848,51.04477,94.929996,76.076561
4,25.929107,Taxi,49.886511,82.473170,69.036577




R4 -> Optimal Policy for episode - 996:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-13.71886,5.567613,32.815022
1,0.0,Taxi,-7.778377,6.198853,42.961035
2,-0.134346,-0.459090,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.76418,Pickup1




R4 -> Optimal Policy for episode - 997:
Pick Up points :  [(0, 3)]
{(0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0,-1.056112,Pickup1,90.622212
1,0.0,0.168138,3.727315,129.154253,86.829298
2,-0.095727,0.667766,11.3487,108.410339,80.890192
3,-0.163008,1.074303,11.504844,91.582841,73.940486
4,-0.215481,Taxi,12.237289,79.271361,66.96289




R4 -> Optimal Policy for episode - 998:
Pick Up points :  [(4, 4)]
{(4, 4)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.117813,0.0,-13.71886,5.567613,32.815022
1,0.0,-0.302113,-7.778377,Taxi,42.961035
2,-0.134346,-0.45909,0.507467,8.256236,56.520630
3,-0.247015,-0.426061,0.794167,11.403799,74.298082
4,-0.339642,-0.430636,0.976995,12.764180,Pickup1




R4 -> Optimal Policy for episode - 999:
Pick Up points :  [(0, 1), (4, 0)]
{(0, 1), (4, 0)} pickedup


Unnamed: 0,0,1,2,3,4
0,68.223628,Pickup1,56.132085,Taxi,18.534011
1,74.401081,130.208196,56.760049,30.779163,19.096135
2,146.488785,117.019761,62.360386,32.701207,19.55628
3,168.872977,111.041094,61.582046,34.217334,19.786935
4,Pickup2,110.499202,61.580103,34.228066,19.790236




R4 -> Optimal Policy for episode - 1000:
Pick Up points :  [(4, 4), (0, 3)]
{(4, 4), (0, 3)} pickedup


Unnamed: 0,0,1,2,3,4
0,-0.101247,0.488238,6.292824,Pickup2,136.927617
1,0.0,1.025096,10.620903,195.107667,131.308399
2,-0.06462,1.577359,18.040111,164.413396,131.017930
3,-0.103109,2.055178,18.307493,140.365767,142.671756
4,-0.128396,2.265851,19.471006,Taxi,Pickup1





**Compute the fraction of the time that the agent prefers the premium customer over the regular customers over those episodes where both types of customers have made requests for pickup**

In [6]:
# Compute the fraction of the time that the agent prefers the premium customer
premium_preference_fraction = premium_preference_count / 1000
print(f"Premium Preference Fraction (R4): {premium_preference_fraction:.2%}")

Premium Preference Fraction (R4): 7.50%


Yeshwanth created the Value Iteration algorithm and the Reinforcement Learning method to determine the best course of action for the scenarios (R1-R4) that are provided.
Using either numeric values in each cell or arrows to depict paths, he created a visual depiction of the ideal policy based on the Value Iteration outcomes.
Participated in adding comments to explain the reasoning behind decisions and any complicated parts, as well as implementing the functionality to create random episodes with clients and taxi positions based on the given probabilities.
Anisha has made sure that the activities of the reinforcement learning agent comply with the project's environmental standards by integrating it into the stated environment while considering the grid structure, client pickup locations, and dangerous zones.
She carried out the episode execution and made sure that, when needed, the best policy was shown for each situation (R1-R4). Moreover, produced outputs in the Collab environment that were ordered and comprehensible, and incorporated them into the final PDF.
Before beginning the implementation, both team members worked closely together to develop the overall project strategy.


Yeshwanth Pulapa id - 11643259
Peddireddy Anisha Reddy  id - 11649037