# Uber:   Given locations of customers and drivers, make an "optimal" assignment.

## Use pandas to read in excel file, `Customers.xlsx` of customer addresses

In [1]:
from pprint import pprint
import pandas as pd

In [2]:
# We use the pandas package to read in the spreadsheet into a pandas dataframe object
customers_df = pd.read_excel('Customers.xlsx')

In [3]:
customers_df

Unnamed: 0,name,lat,lng
0,C1,33.780775,-84.386301
1,C2,33.77692,-84.38978
2,C3,33.773154,-84.397016


## Pandas is a powerful package with many data handling capabilities. But we will stick to standard python when possible.  So we will first convert this pandas dataframe object into a python List of Dictionaries

In [4]:
customer_records = customers_df.to_dict('records')
pprint(customer_records)

[{'lat': 33.780775, 'lng': -84.386301, 'name': 'C1'},
 {'lat': 33.77692, 'lng': -84.38978, 'name': 'C2'},
 {'lat': 33.773154, 'lng': -84.397016, 'name': 'C3'}]


### It will be useful for us to be able to look up information about a Customer based on the `name` of the customer.  So we create a more useful data structure we will name `customers`,  where Customer `name` is a KEY in a Dictionary.  The `name` KEY will retrieve *another* dictionary of information.  To begin with, that dictionary will have one KEY, `location`, which will provide a List with two components, `[lat,lng]`

In [5]:
customers = {}
for cust in customer_records:
    customers[cust['name']]={'location':[cust['lat'],cust['lng']]}
pprint(customers)

{'C1': {'location': [33.780775, -84.386301]},
 'C2': {'location': [33.77692, -84.38978]},
 'C3': {'location': [33.773154, -84.397016]}}


### So, for example, to get the location of customer C2 is now simple, we use `customers['C2']['location']`

In [6]:
# NOTE:  We are using literal string interpolation to make it easy to output variables within a string.  
#        See the `f` before the string?
print(f"The location of customer C2 is {customers['C2']['location']}")

The location of customer C2 is [33.77692, -84.38978]


### We will now read in the Driver locations from `Drivers.xlsx` and do the same as above, creating our main structure `drivers`

In [7]:
driver_records = pd.read_excel('Drivers.xlsx').to_dict('records')
pprint(driver_records)

[{'lat': 33.77845, 'lng': -84.400825, 'name': 'D1'},
 {'lat': 33.793711, 'lng': -84.317408, 'name': 'D2'},
 {'lat': 33.775306, 'lng': -84.396123, 'name': 'D3'}]


In [8]:
drivers = {}
for driver in driver_records:
    drivers[driver['name']]={'location':[driver['lat'],driver['lng']]}
pprint(drivers)

{'D1': {'location': [33.77845, -84.400825]},
 'D2': {'location': [33.793711, -84.317408]},
 'D3': {'location': [33.775306, -84.396123]}}


### We will create a new dictionary, `distances`, where the KEYS will be `CustName_DriverName` and the VALUES will be the distance between them, so for example, to get the distance between C1 and D2 we would be able to use `distances['C1_D2']`.

### We will use the `geopy` package to compute distances between two locations (it considers the curvature of the earth for us!)

In [9]:
from geopy.distance import geodesic

In [10]:
# For example, to use geopy to get the distance between customer C1 and driver D2
cust_location = customers['C1']['location']
driver_location = drivers['D2']['location']
print(f'The distance between the customers is: {geodesic(cust_location, driver_location).miles} miles')

The distance between the customers is: 4.063663522199328 miles


### Now let's create the `distances` dictionary

In [11]:
# We will store distances in a dictionary, with key 'CustomerName_DriverName'
distances = {}
# We first loop through each customer
for customer,customer_dict in customers.items():
    # We now loop through each driver
    for driver,driver_dict in drivers.items():
        var_name = f"{customer}_{driver}"
        distances[var_name] = geodesic(customer_dict['location'], driver_dict['location']).miles
pprint(distances)

{'C1_D1': 0.851121480286203,
 'C1_D2': 4.063663522199328,
 'C1_D3': 0.6794392260826512,
 'C2_D1': 0.6443747120057409,
 'C2_D2': 4.322742415347529,
 'C2_D3': 0.38164534587079507,
 'C3_D1': 0.4257844490992537,
 'C3_D2': 4.795555186316404,
 'C3_D3': 0.15697221526874605}


### We are now ready to form a Linear Program to find an optimal assignment.  

### There are a number of packages to do optimization, we will use `PuLP` that is open source and free, but works much the same way as a commercial product

In [12]:
from pulp import *

### We use the PuLP function `LpProblem` to define a model instance, with an objective we will want to Minimize

In [13]:
model = LpProblem("Uber Assignment",LpMinimize)

### We will define a varible for each Customer/Driver pair.  The variable names will be of the form, `X_CustName_DriverName`,  so `X_C1_D2` for example.  To have PuLP do this, we first make a List of these pairs,`all_pairs`, without the `X_` prepended.

In [14]:
# build a list of all variables as 'CustomerName_DriverName'
all_pairs = []
for customer in customers:
    for driver in drivers:
        pair_name = f"{customer}_{driver}"
        all_pairs.append(pair_name)     
pprint(all_pairs)

['C1_D1',
 'C1_D2',
 'C1_D3',
 'C2_D1',
 'C2_D2',
 'C2_D3',
 'C3_D1',
 'C3_D2',
 'C3_D3']


In [15]:
# Each arc variable is Binary 0,1
assignment_vars = LpVariable.dicts("X",all_pairs,cat='Binary')
pprint(assignment_vars)

{'C1_D1': X_C1_D1,
 'C1_D2': X_C1_D2,
 'C1_D3': X_C1_D3,
 'C2_D1': X_C2_D1,
 'C2_D2': X_C2_D2,
 'C2_D3': X_C2_D3,
 'C3_D1': X_C3_D1,
 'C3_D2': X_C3_D2,
 'C3_D3': X_C3_D3}


### We can now add our objective function to `model` which is to Minimize the sum of distance travelled

In [16]:
obj=''
for var in all_pairs:
    obj += distances[var]*assignment_vars[var]
model += lpSum(obj), "Cost of each Customer Driver Assignment"
pprint(model)

Uber Assignment:
MINIMIZE
0.851121480286203*X_C1_D1 + 4.063663522199328*X_C1_D2 + 0.6794392260826512*X_C1_D3 + 0.6443747120057409*X_C2_D1 + 4.322742415347529*X_C2_D2 + 0.38164534587079507*X_C2_D3 + 0.4257844490992537*X_C3_D1 + 4.795555186316404*X_C3_D2 + 0.15697221526874605*X_C3_D3 + 0.0
VARIABLES
0 <= X_C1_D1 <= 1 Integer
0 <= X_C1_D2 <= 1 Integer
0 <= X_C1_D3 <= 1 Integer
0 <= X_C2_D1 <= 1 Integer
0 <= X_C2_D2 <= 1 Integer
0 <= X_C2_D3 <= 1 Integer
0 <= X_C3_D1 <= 1 Integer
0 <= X_C3_D2 <= 1 Integer
0 <= X_C3_D3 <= 1 Integer



### We now need our "node" constraints, each customer is assignned to exactly one driver, and each driver is assigned exactly one customer.  NOW:  If we don't have an equal number of customers and drivers we can take care of this by adding a Dummy customer or driver, but we will ignore this side issue for now.

### To add these "node" constraints we need, for each customer a list of all "arcs" (variables) eminating from its node.  That is, we need to collect for say `C1` the variables, `C1_D1, C1_D2, ...`   We will collect these for each customer and add them to our `customers` dictionary.

In [17]:
for customer in customers:
    customers[customer]['variables']=[]
    for driver in drivers:
        var_name = f"{customer}_{driver}"
        customers[customer]['variables'].append(var_name)     
pprint(customers)

{'C1': {'location': [33.780775, -84.386301],
        'variables': ['C1_D1', 'C1_D2', 'C1_D3']},
 'C2': {'location': [33.77692, -84.38978],
        'variables': ['C2_D1', 'C2_D2', 'C2_D3']},
 'C3': {'location': [33.773154, -84.397016],
        'variables': ['C3_D1', 'C3_D2', 'C3_D3']}}


### Now same for drivers, we need all arcs leading into each driver node

In [18]:
for driver in drivers:
    drivers[driver]['variables']=[]
    for customer in customers:
        var_name = f"{customer}_{driver}"
        drivers[driver]['variables'].append(var_name)     
pprint(drivers)

{'D1': {'location': [33.77845, -84.400825],
        'variables': ['C1_D1', 'C2_D1', 'C3_D1']},
 'D2': {'location': [33.793711, -84.317408],
        'variables': ['C1_D2', 'C2_D2', 'C3_D2']},
 'D3': {'location': [33.775306, -84.396123],
        'variables': ['C1_D3', 'C2_D3', 'C3_D3']}}


### Now we add these constraints to our `model`

In [19]:
# Add constraint for each customer node
for cust in customers:
    constraint=None
    for var in customers[cust]['variables']:
        constraint += assignment_vars[var]
    model += lpSum(constraint) == 1, cust

In [20]:
# Add constraint for each driver node
for driver in drivers:
    constraint=None
    for var in drivers[driver]['variables']:
        constraint += assignment_vars[var]
    model += lpSum(constraint) == 1, driver

In [21]:
# Let's inspect our model
model

Uber Assignment:
MINIMIZE
0.851121480286203*X_C1_D1 + 4.063663522199328*X_C1_D2 + 0.6794392260826512*X_C1_D3 + 0.6443747120057409*X_C2_D1 + 4.322742415347529*X_C2_D2 + 0.38164534587079507*X_C2_D3 + 0.4257844490992537*X_C3_D1 + 4.795555186316404*X_C3_D2 + 0.15697221526874605*X_C3_D3 + 0.0
SUBJECT TO
C1: X_C1_D1 + X_C1_D2 + X_C1_D3 = 1

C2: X_C2_D1 + X_C2_D2 + X_C2_D3 = 1

C3: X_C3_D1 + X_C3_D2 + X_C3_D3 = 1

D1: X_C1_D1 + X_C2_D1 + X_C3_D1 = 1

D2: X_C1_D2 + X_C2_D2 + X_C3_D2 = 1

D3: X_C1_D3 + X_C2_D3 + X_C3_D3 = 1

VARIABLES
0 <= X_C1_D1 <= 1 Integer
0 <= X_C1_D2 <= 1 Integer
0 <= X_C1_D3 <= 1 Integer
0 <= X_C2_D1 <= 1 Integer
0 <= X_C2_D2 <= 1 Integer
0 <= X_C2_D3 <= 1 Integer
0 <= X_C3_D1 <= 1 Integer
0 <= X_C3_D2 <= 1 Integer
0 <= X_C3_D3 <= 1 Integer

In [22]:
# Let's solve the model and make sure it's status is good
model.solve()
print("Status:", LpStatus[model.status])

Status: Optimal


In [23]:
# Here is the solution
for v in model.variables():
    print(v.name, "=", v.varValue)

X_C1_D1 = 0.0
X_C1_D2 = 1.0
X_C1_D3 = 0.0
X_C2_D1 = 1.0
X_C2_D2 = 0.0
X_C2_D3 = 0.0
X_C3_D1 = 0.0
X_C3_D2 = 0.0
X_C3_D3 = 1.0


### Now that we have a solution, in the real world we need to disseminate that information, that would likely mean an update to our database to relect the assignment, and this will lead to a number of changes:
- Send this information to both the driver and customer
- Update live maps for admins and drivers and customer

### We will display the results on a Google Map

###  We will display the results on a Google Map

#### If you are using a local install, then you will need to install some packages, see https://buildmedia.readthedocs.org/media/pdf/jupyter-gmaps/latest/jupyter-gmaps.pdf for details.

#### You will also need a Google API key, which is also covered in this document

In [24]:
import gmaps
gmaps.configure(api_key='AIzaSyC9hzQ2ZmX2YuN38Ffm6eycJKVg0edaVY8')

### Let's plot our customers and driver locations on a Google Map.  First, we collect a List of these locations.  And along the way we make a list of information that will be displayed when a Customer or Driver is clicked on.

In [25]:
customer_marker_locations=[]
customer_info_boxes=[]
for cust in customers:
    customer_marker_locations.append(customers[cust]['location'])
    customer_info_boxes.append(cust)
pprint(customer_marker_locations)

driver_marker_locations=[]
driver_info_boxes=[]
for driver in drivers:
    driver_marker_locations.append(drivers[driver]['location'])
    driver_info_boxes.append(driver)
pprint(driver_marker_locations)

[[33.780775, -84.386301], [33.77692, -84.38978], [33.773154, -84.397016]]
[[33.77845, -84.400825], [33.793711, -84.317408], [33.775306, -84.396123]]


In [26]:
fig=gmaps.figure()
customers_layer = gmaps.symbol_layer(
    customer_marker_locations, info_box_content = customer_info_boxes, fill_color='red', stroke_color='red', scale=6)
fig.add_layer(customers_layer)
drivers_layer = gmaps.symbol_layer(
    driver_marker_locations, info_box_content = driver_info_boxes, fill_color='blue', stroke_color='blue', scale=6)
fig.add_layer(drivers_layer)
fig

Figure(layout=FigureLayout(height='420px'))

### Now, let's draw a line between all the optimal assignments

In [27]:
# Loop through all the assignment variables
for v in model.variables():
    # If the assignment is made, then draw a line
    if (v.varValue==1.0):
        # A little fancy footwork to strip out the customer and driver name from the variable name
        var_name_split = v.name.split("_")
        print(f"Our variable name split into strings and put into a List: {var_name_split}")
        # We can now grab the customer and driver name
        cust_name = var_name_split[1]
        driver_name = var_name_split[2]
        # Now we create a line between the customer and driver
        assignment_line = gmaps.Line(
          start=customers[cust_name]['location'],
          end=drivers[driver_name]['location'],
          stroke_weight=3.0
        )
        # and we add the line to our map
        drawing = gmaps.drawing_layer(features=[assignment_line])
        fig.add_layer(drawing)
fig

Our variable name split into strings and put into a List: ['X', 'C1', 'D2']
Our variable name split into strings and put into a List: ['X', 'C2', 'D1']
Our variable name split into strings and put into a List: ['X', 'C3', 'D3']


Figure(layout=FigureLayout(height='420px'))