In [52]:
import pandas as pd
import numpy as np
import os
import json
import matplotlib.path as mplPath
import pickle
import sys
import itertools
import gmplot
import matplotlib.colors as colors
import matplotlib.cm as cmx
import matplotlib.pyplot as plt
from collections import defaultdict 
from geojson import MultiPolygon
from shapely.geometry import Polygon
from plotly.graph_objs import Scattermapbox, Marker, Layout, Data
import warnings
warnings.filterwarnings('ignore')

In [5]:
class MDP(object):

    def __init__(self, driver_id, taxi_data, ride_areas, neighborhoods, 
                 interval_size, change_pairs, aggregate=False):
        """Initializing the MDP to have needed statistics.
        
        :param driver_id: Hack license from the data for the specified driver 
        or list of hack licenses for a group of drivers.
        :param ride_areas: list of integer ride keys that correspond to a 
        neighborhood in New York.
        :param neighborhoods: list of numpy arrays of the polygon of coordinates
        that define a neighborhood. Each array is a certain dimension N x 2
        where N varies. The first column contains the latitude points and the
        second column contains the longitude points.
        :param interval_size: Size of the reward intervals in dollars.
        :param aggregate: Boolean variable to indicate whether to aggregate a 
        group of drivers if this field is true.
        """

        self.driver_id = driver_id
        self.neighborhoods = neighborhoods
        self.interval_size = interval_size
        self.change_pairs = change_pairs
        self.ride_areas = ride_areas

        # Dictionary to hold the mapping from area key to matrix index.
        self.mapping = {self.ride_areas[v]: v for v in range(len(self.ride_areas))}
        
        # Dictionary to hold the mapping from matrix index to area key.
        self.inverse_mapping = {v:k for k,v in self.mapping.items()}
                
        self.nodes = self.inverse_mapping.keys()
        
        if aggregate:
            self.data = taxi_data.loc[taxi_data['hack_license'].isin(list(driver_id))]
        else:
            self.data = taxi_data.loc[taxi_data['hack_license'] == driver_id]
            
      #  self.data_ = self.data.copy()
            
        # Changing the areas to correspond to self.nodes.
        self.data['start_trip_area'] = self.data['start_trip_area'].apply(lambda x: self.mapping[x])
        self.data['end_trip_area'] = self.data['end_trip_area'].apply(lambda x: self.mapping[x])
        self.data['prev_trip_area'] = self.data['prev_trip_area'].apply(lambda x: self.mapping[x])
        
        # Matrix to hold the probability of starting a trip in each area.
        self.demand = np.zeros((len(self.nodes), 1))

        self.trans_prob = np.zeros((len(self.nodes), len(self.nodes)))   

        self.search_time = np.zeros((len(self.nodes), 1))
        self.node_earn_rate = np.zeros((len(self.nodes), 1))
        self.search_rewards = np.zeros((len(self.nodes), 1))

        self.drive_time_avg = np.zeros((len(self.nodes), len(self.nodes)))
        self.drive_time_std = np.zeros((len(self.nodes), len(self.nodes)))

        self.fare_avg = np.zeros((len(self.nodes), len(self.nodes)))
        self.fare_std = np.zeros((len(self.nodes), len(self.nodes)))

        self.earn_rate_avg = np.zeros((len(self.nodes), len(self.nodes)))
        self.earn_rate_std = np.zeros((len(self.nodes), len(self.nodes)))

        self.full_reward_avg = np.zeros((len(self.nodes), len(self.nodes)))
        self.full_reward_std = np.zeros((len(self.nodes), len(self.nodes)))

        self.empty_drive_reward = np.zeros((len(self.nodes), len(self.nodes)))
        self.empty_reward_avg = np.zeros((len(self.nodes), len(self.nodes)))

        self.empty_policy = []

        # Median daily earnings value for driver or set of drivers.
        self.ref = None

        self.traj=None
        self.T=30
        self.state=None
        self.N=None

        self.states = []
        self.final = []
        self.state2num = {}
        self.num2state = {}
        self.X = []

        self.actions = []
        self.U = []
        self.m = None
        
        self.reward_intervals = []

        self.transitions = {}

        self.rewards = {}

        self.create_action_space()

        self.create_state_space(aggregate)
        
        self.calculate_params()

        # The initial state will always be the node with highest demand, meaning
        # the most rides beginning in it, the taxi being empty, and the reward in interval [0,20).
        self.initial = (max(enumerate(self.demand.tolist()), key=lambda x: x[1])[0], 
                              'e', self.reward_intervals[0])
        
        self.P = np.zeros((self.n, self.m, self.n))
        self.R = np.zeros((self.n, self.m, self.n))
        
        self.get_trans_and_rewards()
                        
        self.check_probabilities()

        del self.data
        

    def step(self,a):

        self.traj.append([self.state,a])
        self.N[self.state,a]+=1
        self.state=int(np.random.choice(self.X,1,False,list(self.P[self.state,a,:])))

        return int(self.state)


    def initialize(self,state=0):

        self.state=0
        self.traj=[]
        self.N=np.zeros((self.n,self.m))


    def create_action_space(self):
        """Creating the complete action space for the MDP. 
        
        The action space contains transitions from a location i to location j.
        
        :return: Nothing is returned but the class attribute self.actions is updated.
        """
        
        self.actions = [i for i in self.nodes]
        
        self.U = self.actions
        
        self.m = len(self.U)
        
        
    def create_state_space(self, aggregate):
        """Creating the full state space for the MDP.
        
        The complete state space is X = {N x S x R}\X_na where N is the index
        set of the zones or nodes in the city with N nodes, S = {e, f} are the
        states indicating if the taxi is empty or full, and R is the discretized
        cumulative fare value space. The states that are not allowed are 
        X_na = {(i, f, r)|r in R_terminal, i in N}.
        
        :param aggregate: Boolean variable for whether the MDP is over a set of 
        drivers or to aggregate for.

        :return: Nothing is returned but the class attributes self.ref, 
        self.reward_intervals, and self.states are updated.
        """
        
        self.get_ref(aggregate)
        self.create_reward_intervals()
        
        i = 0
        for state in itertools.product(self.nodes, ['e', 'f'], self.reward_intervals):
            
            # States that are not allowed.
            if state[1] == 'f' and float('inf') in state[2]:
                pass
            else:
                self.states.append(state)
    
                self.num2state[i] = state
                self.state2num[state] = i
                self.X.append(i)

                if state[1] == 'e' and float('inf') in state[2]:
                    self.final.append(i)

                i += 1

        self.n = len(self.X)

        
    def get_ref(self, aggregate):  
        """Calculating the reference point by finding the median daily earnings.

        :param aggregate: Boolean variable for whether the MDP is over a set of 
        drivers or to aggregate for.

        :return: Nothing is returned but the class attribute is updated.
        """

        self.data['pickup_date'] = self.data['pickup_datetime'].apply(lambda x: x.date())
        
        dates = sorted(self.data['pickup_date'].unique())
        
        daily_earnings = []

        if aggregate:
            for date in dates:
                driver_day = self.data.loc[self.data['pickup_date'] == date]
                active_drivers = driver_day['hack_license'].unique()
                daily_earnings.append(sum(driver_day['profit'])/float((len(active_drivers))))
        else:
            for date in dates:
                driver_day = self.data.loc[self.data['pickup_date'] == date]
                daily_earnings.append(sum(driver_day['profit']))

        self.ref = np.median(np.array(daily_earnings)) 
        
        
    def create_reward_intervals(self):
        """Creating rewards intervals until the reference point.
        
        :return: Nothing is returned but self.reward_intervals is set.
        """
        
        reward = self.interval_size

        while reward < self.ref:
            self.reward_intervals.append((reward - self.interval_size, reward))
            reward += self.interval_size

        self.reward_intervals.append((reward - self.interval_size, self.ref))
        
        # This reward is the terminal reward state.
        self.reward_intervals.append((self.ref, float('inf')))

        
    def calculate_params(self):
        """Getting parameters for the MDP.

        :return: Nothing is returned but self.search_time.avg,
        self.trans_prob, self.drive_time_avg,
        self.drive_time.std, self.fare_avg, self.fare_std, self.earn_rate_avg,
        self.earn_rate_std, self.full_reward_avg, self.full_reward_std, 
        and self.empty_reward_avg are all set.
        """


        for start in sorted(self.nodes):
            area_start = self.data.loc[self.data['start_trip_area'] == start]

            self.demand[start] = len(area_start)/float(len(self.data))

            t_search_trips = self.data.loc[(self.data['prev_trip_area'] == start)
                                           & (self.data['start_trip_area'] == start)]
            t_search_trips = t_search_trips.loc[t_search_trips['day_start'] == False]
            t_search_trips = t_search_trips.loc[t_search_trips['seek_time'] <= 20]
            
            for end in sorted(self.nodes):
                area_end = area_start.loc[area_start['end_trip_area'] == end]

                if len(area_end) == 0:
                    trans_prob = 0.0
                    drive_avg = None
                    drive_std = None
                    fare_avg = 0.0
                    fare_std = 0.0
                    earn_rate_avg = None
                    earn_rate_std = None
                else:
                    trans_prob = len(area_end)/float(len(area_start))
                    drive_avg = area_end['trip_time'].mean()
                    drive_std = area_end['trip_time'].std()
                    fare_avg = area_end['profit'].mean()
                    fare_std = area_end['profit'].std()
                    earn_rate_avg = area_end['earn_rate'].mean()
                    earn_rate_std = area_end['earn_rate'].std()

                self.trans_prob[start, end] = trans_prob
                
                self.drive_time_avg[start, end] = drive_avg
                self.drive_time_std[start, end] = drive_std
                
                self.fare_avg[start, end] = fare_avg
                self.fare_std[start, end] = fare_std
                
                self.earn_rate_avg[start, end] = earn_rate_avg
                self.earn_rate_std[start, end] = earn_rate_std
                
                self.full_reward_avg[start, end] = fare_avg
                self.full_reward_std[start, end] = fare_std

                if earn_rate_avg is not None:
                    self.empty_drive_reward[start, end] = -drive_avg/float(earn_rate_avg**-1)
                else:
                    self.empty_drive_reward[start, end] = 0.0

            if len(t_search_trips) == 0 or np.isnan(self.earn_rate_avg[start, start]) \
                or self.earn_rate_avg[start,start] == 0:

                self.search_rewards[start] = 0
            else:
                self.search_rewards[start] = -t_search_trips['seek_time'].mean()/float(self.earn_rate_avg[start, start]**-1)


        self.empty_search = np.where(self.search_rewards == 0)[0].tolist()
        self.search_rewards[self.search_rewards == 0] = np.min(self.search_rewards)
        
        empty_drive = np.where(self.empty_drive_reward == 0.0)
        empty_row_index = empty_drive[0]
        empty_col_index = empty_drive[1]

        self.empty_drive = [(empty_row_index[i], empty_col_index[i]) for i in range(len(empty_row_index))]
        self.empty_drive_reward[self.empty_drive_reward == 0] = np.min(self.empty_drive_reward)

        self.empty_reward_avg = self.empty_drive_reward + self.search_rewards.T

        for node in sorted(self.nodes):
            # In the case of the i to i transition, it is only the search time.
            # This line is removing the empty_drive time reward.
            self.empty_reward_avg[node, node] -= self.empty_drive_reward[node, node]
    
        
    def get_trans_and_rewards(self):
        """Finding the transition probabilities and rewards for the MDP.

        :return: Nothing is returned but self.transitions, self.P, self.rewards, 
        and self.R are updated.
        """
        
        for transition in itertools.product(self.states, self.actions, self.states):
            state1 = transition[0]
            action = transition[1]
            state2 = transition[2]
            
            state_num1 = self.state2num[state1]
            state_num2 = self.state2num[state2]

            if state1[1] == 'e' and state2[1] == 'e':

                # If in final state, you are guaranteed to stay there.
                if float('inf') in state1[2] and float('inf') in state2[2] \
                                             and state1[0] == state2[0]:

                    self.transitions[transition] = 1
                    self.P[state_num1, action, state_num2] = 1
                else:
                    self.transitions[transition] = 0
                    self.P[state_num1, action, state_num2] = 0
                
                # The reward is always 0 in this case.
                self.rewards[transition] = 0
                self.R[state_num1, action, state_num2] = 0

            elif state1[1] == 'e' and state2[1] == 'f':

                # This is case of driver picking someone up after a trip.
                if state1[2] == state2[2] and float('inf') not in state1[2] and action == state2[0]:

                    self.transitions[transition] = 1
                    self.P[state_num1, action, state_num2] = 1

                    self.rewards[transition] = self.empty_reward_avg[state1[0], state2[0]] 
                    self.R[state_num1, action, state_num2] = self.empty_reward_avg[state1[0], state2[0]]

                else:
                    self.transitions[transition] = 0
                    self.P[state_num1, action, state_num2] = 0
                    
                    self.rewards[transition] = 0
                    self.R[state_num1, action, state_num2] = 0

            elif state1[1] == 'f' and state2[1] == 'f':
                
                # Never transition from full to full.
                self.transitions[transition] = 0
                self.P[state_num1, action, state_num2] = 0
                
                self.rewards[transition] = 0
                self.R[state_num1, action, state_num2] = 0

            elif state1[1] == 'f' and state2[1] == 'e':

                # This is the case some fare is gained from the trip.
                if state2[2][0] >= state1[2][0]:

                    """
                    This piece of code is finding the probability: 
                    P((i,f,r), u, (j,f,r')) = P_dest(i,j)P(a_l - E[F(i,j)] <= r <= b_l - E[F(i,j)])
                    The following code uses that r is assumed to be uniformly 
                    distributed on the interval of the reward state.
                    """

                    trans_reward = state2[2]

                    # Lower bound on reward being transitioned to.
                    a_l = trans_reward[0]

                    # Upper bound on reward being transitioned to.
                    b_l = trans_reward[1]

                    curr_reward = state1[2]

                    # Lower bound on current reward.
                    a_i = curr_reward[0]

                    # Upper bound on current reward.
                    b_i = curr_reward[1]

                    start = state1[0] 
                    end = state2[0]

                    p_dest = self.trans_prob[start, end]
                    e_fare = self.fare_avg[start, end]

                    x_1 = b_l - e_fare
                    x_2 = a_l - e_fare

                    # CDF of the upper bound.
                    if x_1 < a_i:
                        F_1 = 0
                    elif x_1 < b_i:
                        F_1 = (x_1 - a_i)/float(b_i - a_i)
                    else:
                        F_1 = 1

                    # CDF of the lower bound.
                    if x_2 < a_i:
                        F_2 = 0
                    elif x_2 < b_i:
                        F_2 = (x_2 - a_i)/float(b_i - a_i)
                    else:
                        F_2 = 1

                    p_reward = F_1 - F_2

                    self.transitions[transition] = p_dest * p_reward
                    self.P[state_num1, action, state_num2] = p_dest * p_reward
                    
                    if float('inf') in state2[2]:
                        self.rewards[transition] = 1000
                        self.R[state_num1, action, state_num2] = 1000
                    else:
                        self.rewards[transition] = self.full_reward_avg[state1[0], state2[0]]
                        self.R[state_num1, action, state_num2] = self.full_reward_avg[state1[0], state2[0]]

                else:
                    self.transitions[transition] = 0   
                    self.P[state_num1, action, state_num2] = 0
                    
                    self.rewards[transition] = 0
                    self.R[state_num1, action, state_num2] = 0
                    
                    
    def check_probabilities(self):
        """This function is to ensure that all probability functions are valid.
        
        To ensure the MDP is correct this function contains checks that each of
        the probability functions are valid, for transitions and policies.
        
        :return: Nothing but assert error if probabilities are incorrect.
        """
        
        # Checking valid density function for the transition probability.
        for state in xrange(self.n):
            for action in xrange(self.m):
                assert abs(sum(self.P[state, action, :]) - 1) < 1e-3, 'Transitions do not sum to 1'

        # Checking that there is no None values of any of the MDP.
        assert True not in pd.isnull(self.P), 'None value in transitions'
        assert True not in pd.isnull(self.R), 'None value in rewards'
        assert True not in pd.isnull(self.U), 'None value in actions'

In [83]:
import time

In [84]:
dropbox_dir = '/Users/tfiez/Dropbox/riskTaxi'
taxi_data, driver_areas, neighborhoods, change_pairs = load_data(dropbox_dir)

start = time.time()
end = time.time()
print end - start

200.497710943


In [94]:
taxi_data.to_csv('taxi_data.csv', sep=',')

In [90]:
import pickle

In [93]:
pickle.dump(neighborhoods, open("neighorhoods.p", "wb" ))
pickle.dump(driver_areas, open("driver_areas.p", "wb" ))
pickle.dump(change_pairs, open("change_pairs.p", "wb" ))

In [None]:
pickle.dump()

In [87]:
driver_areas

{2010001271: {69: 7.3873873873873865,
  70: 3.783783783783784,
  74: 8.82882882882883,
  75: 1.6666666666666667,
  80: 29.72972972972973,
  81: 12.117117117117116,
  162: 7.072072072072072,
  165: 18.423423423423422,
  167: 3.6036036036036037,
  171: 7.3873873873873865},
 2010002704: {70: 6.103896103896104,
  74: 9.35064935064935,
  75: 2.4025974025974026,
  80: 36.81818181818181,
  81: 8.928571428571429,
  155: 0.7142857142857143,
  162: 11.720779220779221,
  165: 13.376623376623375,
  171: 9.285714285714286,
  174: 1.2987012987012987},
 2010002920: {69: 7.648351648351648,
  70: 8.87912087912088,
  74: 5.450549450549451,
  80: 32.175824175824175,
  81: 11.296703296703296,
  83: 2.3736263736263736,
  162: 14.68131868131868,
  165: 10.593406593406593,
  171: 5.4945054945054945,
  174: 1.4065934065934065},
 2010003240: {9: 10.889645114244045,
  70: 12.348079727758872,
  74: 3.3543996110841032,
  80: 24.06417112299465,
  81: 10.06319883325231,
  83: 1.750121536217793,
  162: 21.2445308701

In [86]:
taxi_data

Unnamed: 0,hack_license,pickup_datetime,dropoff_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,tolls_amount,total_amount,date,hour,profit,trip_time,start_trip_area,end_trip_area,earn_rate,inverse_earn_rate,prev_trip_area,seek_time,day_start
0,2010007770,2010-03-31 14:07:35,2010-03-31 14:11:41,-73.996170,40.723503,-74.005493,40.726437,0.00,5.50,2010-03-31,14,5.50,4.100000,162,162,1.341463,0.745455,162,0.000000,True
1,2010007579,2010-03-31 14:11:00,2010-03-31 14:20:00,-73.990532,40.738674,-73.959320,40.770729,0.00,10.70,2010-03-31,14,10.70,9.000000,80,165,1.188889,0.841121,80,0.000000,True
2,2010007770,2010-03-31 14:16:40,2010-03-31 14:25:36,-74.007721,40.725033,-73.988243,40.759418,0.00,10.92,2010-03-31,14,10.92,8.933333,162,9,1.222388,0.818071,162,4.983333,False
3,2010007519,2010-03-31 14:21:00,2010-03-31 14:32:00,-73.969475,40.761173,-73.989288,40.777843,0.00,9.70,2010-03-31,14,9.70,11.000000,80,74,0.881818,1.134021,80,0.000000,True
4,2010002920,2010-03-31 14:34:00,2010-03-31 14:43:00,-73.987953,40.728725,-74.006447,40.743912,0.00,10.70,2010-03-31,14,10.70,9.000000,70,80,1.188889,0.841121,70,0.000000,True
5,2010007519,2010-03-31 14:42:00,2010-03-31 14:47:00,-73.997803,40.751411,-74.006737,40.739384,0.00,5.90,2010-03-31,14,5.90,5.000000,80,162,1.180000,0.847458,74,10.000000,False
6,2010002920,2010-03-31 14:49:00,2010-03-31 14:55:00,-74.000076,40.734756,-74.004494,40.717167,0.00,7.10,2010-03-31,14,7.10,6.000000,162,171,1.183333,0.845070,80,6.000000,False
7,2010007519,2010-03-31 14:59:00,2010-03-31 15:20:00,-73.997635,40.746300,-73.964127,40.803154,0.00,19.46,2010-03-31,14,19.46,21.000000,80,74,0.926667,1.079137,162,12.000000,False
8,2010002920,2010-03-31 15:01:00,2010-03-31 15:06:00,-74.001633,40.724751,-73.989380,40.726067,0.00,6.50,2010-03-31,15,6.50,5.000000,162,70,1.300000,0.769231,171,6.000000,False
9,2010002920,2010-03-31 15:12:00,2010-03-31 15:19:00,-73.987450,40.720238,-74.004280,40.721874,0.00,8.20,2010-03-31,15,8.20,7.000000,70,162,1.171429,0.853659,70,6.000000,False


In [None]:
taxi_data, driver_areas, neighborhoods, change_pairs = load_data(dropbox_dir)

for interval_size in [100]:
    for driver in driver_areas.keys():
        driver_mdp = MDP(driver, taxi_data, driver_areas[driver].keys(), 
                         neighborhoods, change_pairs, interval_size)

        with open(os.path.join(dropbox_dir + '/data/MDP_SIM_NEW/', 'r' + str(interval_size) + 
                               '_driver_' + str(driver) + '.pkl'), 'wb') as f:

            pickle.dump(driver_mdp, f)

In [None]:
def main():
    args = sys.argv
    argc = len(args)

    # Insert path to dropbox location for riskTaxi.
    tanner_dir = '/Users/tfiez/Dropbox/riskTaxi'

    if args[1].lower() == 'tanner':
        dropbox_dir = tanner_dir

    if args[2] == 'default':
        taxi_data, driver_areas, neighborhoods, change_pairs = load_data(dropbox_dir)

        for interval_size in [100]:
            for driver in driver_areas.keys():
                driver_mdp = MDP(driver, taxi_data, driver_areas[driver].keys(), 
                                 neighborhoods, change_pairs, interval_size)

                with open(os.path.join(dropbox_dir + '/data/MDP_SIM_NEW/', 'r' + str(interval_size) + 
                                       '_driver_' + str(driver) + '.pkl'), 'wb') as f:

                    pickle.dump(driver_mdp, f)


    elif args[2] == 'sample':

        interval_size = 100

        for samples in [100, 250, 500, 1000, 2500, 5000, 10000]:
            taxi_data, driver_areas, neighborhoods, change_pairs = load_data(dropbox_dir, driver_ids=None,
                                                                             aggregate=True, num_samples=samples)

            taxi_data.to_csv(os.path.join(dropbox_dir + '/data/TAXI_DATA/', str(samples) + '_samples_taxi_data' + '.csv'), index=False)

            driver = driver_areas.keys()[0]

            driver_mdp = MDP(driver, taxi_data, driver_areas[driver].keys(), neighborhoods, change_pairs,
                             interval_size, aggregate=True)

            N = get_policy(mdp=driver_mdp, driver=tuple(driver_mdp.driver_id), data=taxi_data.copy())

            np.savetxt(os.path.join(dropbox_dir + '/data/MDP_SIM_POLICY_V4/', 'r' + 
                       str(interval_size) + '_sample_' + str(samples) + '.csv'), N, delimiter=',')

            with open(os.path.join(dropbox_dir + '/data/MDP_SIM_NEW_V4/', 'r' + 
                      str(interval_size) + '_sample_' + str(samples) + '.pkl'), 'wb') as f:
                
                pickle.dump(driver_mdp, f)

    else:
        print('Bad Input')
        exit()


if __name__ == "__main__":
    main()

In [None]:
def get_node_counts(mdp, driver, data):
    """Finding the policy for the agent for node movement.

    :param mdp: MDP object created for some set of taxi drivers.
    :param driver: Driver id.
    :param data: Data including the driver data.

    :return: N, the policy for the driver.
    """

    data = data.loc[data['hack_license'] == driver]

    data['pickup_datetime'] = pd.to_datetime(data['pickup_datetime'])
    data['dropoff_datetime'] = pd.to_datetime(data['dropoff_datetime'])

    data['date'] = data['pickup_datetime'].apply(lambda x: x.date())

    data['next_trip_area'] = data.groupby(['hack_license', 'date'])['start_trip_area'].shift(-1)

    data.dropna(inplace=True)

    # Starting and ending areas of policy decision following each transaction.
    data['start_choice'] = data['end_trip_area'].apply(lambda x: mdp.mapping[x])
    data['end_choice'] = data['next_trip_area'].apply(lambda x: mdp.mapping[x])

    N = np.zeros((mdp.m, mdp.m))

    counts = data.groupby(['start_choice', 'end_choice'])['hack_license'].count()
    counts = counts.reset_index(level=[0,1])

    for start_node in mdp.nodes:
        for end_node in mdp.nodes:
            value = counts.loc[(counts['start_choice'] == start_node) & (counts['end_choice'] == end_node)]['hack_license'].values
            if not value:
                N[start_node, end_node] = 0
            else:
                N[start_node, end_node] = value[0]

    return N

In [None]:
def get_policy(mdp, driver=None, data=None):
    """Finding the policy for the agent.

    :param mdp: MDP object created for some set of taxi drivers.
    :param driver: Driver id.
    :param data: Data including the driver data.

    :return: N, the policy for the driver.
    """

    # Case in which the mdp contains the data for all the driver ids.
    if data is None and driver is None:
        data = mdp.data_
        driver = mdp.driver_id

    # Case in which the mdp contains the data for the drivers being selected.
    elif data is None and driver is not None:
        data = mdp.data_

    # Case in which we want policy for drivers that were not used in making the mdp.
    elif data is not None and driver is not None:
        pass

    if isinstance(driver, tuple):
        data = data.loc[data['hack_license'].isin(driver)]
    else:
        data = data.loc[data['hack_license'] == driver]

    data['date'] = data['pickup_datetime'].apply(lambda x: x.date())

    data['cum_rewards'] = pd.Series([None for row in xrange(len(data))], index=data.index)

    # Tracking the daily cumulative rewards at each transaction.
    data['cum_rewards'] = data.groupby(['hack_license', 'date'])['profit'].cumsum()

    # Label indicating what reward interval earnings are at following a transaction.
    data['reward_interval'] = data['cum_rewards'].apply(lambda y: mdp.reward_intervals.index(filter(lambda x: x[0] <= y < x[1], 
                                                                                                     mdp.reward_intervals)[0]))

    data['next_trip_area'] = data.groupby(['hack_license', 'date'])['start_trip_area'].shift(-1)
    data.dropna(inplace=True)

    # Starting and ending areas of policy decision following each transaction.
    data['start_choice'] = data['end_trip_area'].apply(lambda x: mdp.mapping[x])
    data['end_choice'] = data['next_trip_area'].apply(lambda x: mdp.mapping[x])

    # Finding the policy for the data.
    N = np.zeros((mdp.n, mdp.m))

    for state in mdp.states:
        state_num = mdp.state2num[state]

        # Empty and not in final reward indicates a choice is being made.
        if state[1] == 'e' and state[2] != mdp.reward_intervals[-1]:

            state_num = mdp.state2num[state]

            start_choice = state[0]

            reward_interval = mdp.reward_intervals.index(state[2])

            final_reward = mdp.reward_intervals.index(mdp.reward_intervals[-1])

            for action in mdp.actions:
                N[state_num, action] = len(data.loc[(data['reward_interval'] == reward_interval) & 
                                                   (data['start_choice'] == start_choice) & 
                                                   (data['end_choice'] == action) & 
                                                   (data['reward_interval'] != final_reward)])
        else:
            N[state_num, :] = 1/float(len(mdp.actions))

    empty_rows = np.where(~N.any(axis=1))[0].tolist()

    if not empty_rows:
        pass
    else:
        for row in empty_rows:
            N[row] = 1

    return N