Welcome to modelParameterBuilder. This file functions to read in your data from the spreadsheets in which they exist. If data is not in the expected format, this file will try to save you by throwing an error so you know as early as possible that something is wrong. Not all checking is currently supported. Read markup and comments on each function for more information.

In [177]:
import abc
from abc import abstractmethod
import pandas as pd
import numpy as np
import math

In [178]:
# Global Variables
BOX_TO_BAGS = 3
DEFAULT_CUSTOMER_BAGS = 2.0
DEFAULT_DRIVER_BAGS = -1
SECONDS_PER_MINUTE = 60.0
MINUTES_PER_HOUR = 60.0
DEFAULT_TIME_LIMIT = -1
BIKE_CONVERSION_TIME = 1/4.26966292135

Exception throwing capabilities for modelParamsBuilder.

In [179]:
# index out of bounds error for modelParamsBuilder
class indError(Exception):
    def __init__(self, m):
        self.message = m
    def __str__(self):
        return self.message

# a category of data was not the expected length
class SectionLengthError(Exception):
    def __init__(self, m):
        self.message = m
    def __str__(self):
        return self.message
    
# an address could not be found in the adjacency matrix
class AddressesError(Exception):
    def __init__(self, actual, section=None, currDriver=None, currAddress=None):
        self.message = "Error with address " + str(actual)
        if section is not None:
            self.message += " while " + str(section) + "."
        if currDriver is not None:
            self.message += " CurrDriver: " + str(currDriver) + "."
        if currAddress is not None:
            self.message += " CurrAddress index: " + str(currAddress)
    def __str__(self):
        return self.message

Functions which convert to and from various kinds of lists. Read comments on each function for more details.

In [180]:
"""
fromarr: should be an np.array containing strings
seperator: the string seperator mark
returns array where each index contains an array of the ints
that were in the old array as strings where each of these indices separated
wherever the separator indicated
Does not modify provided array
"""
def from_string_separated_np_array(fromarr, seperator, defaultVal, isAddress):
    toarr = []
    for i in range(len(fromarr)):
        if ((not isinstance(fromarr[i], str) and np.isnan(fromarr[i]) and not isAddress)):
            # not a string, not a number, and not an address
            toarr.append(defaultVal)
        elif fromarr[i] == 'nan':
            # item represented as nan but as a string
            toarr.append(defaultVal)
        elif (seperator == ''):
            # no separation needed, just append this item
            toarr.append(fromarr[i])
        elif (isinstance(fromarr[i], str) and seperator not in fromarr[i]):
            # no separation needed, append this item
            toarr.append(fromarr[i])
        elif (isinstance(fromarr[i], float) or isinstance(fromarr[i], int)):
            # if it's a number, just append it
            toarr.append(fromarr[i])
            toarr[i] = str(toarr[i])
        else:
            # separation time!
            next = np.array(fromarr[i].split(seperator), dtype=str)
            toarr.append(next)
    return toarr

"""
fromarr: should be an array where each index holds the required stops for a driver,
If the driver has no required stops, contains "-1"

addresses: the list of addresses that are valid stops. MUST be in the order that
they are for the adjacency matrix

return: matrix in which each row is a driver and each column is a stop in the
same order as the adjacency matrix. Contains 0 if driver does not have to stop
there and a 1 otherwise.
"""
def matrix_from_np_array(fromarr, addresses):
    for i in range(len(fromarr)):
        if ((not isinstance(fromarr[i], float)) and '-1' not in fromarr[i]):
                for k in range(len(fromarr[i])):
                    fromarr[i][k] = fromarr[i][k].split(", #")[0]
        elif (isinstance(fromarr[i], float) and fromarr[i] != '-1'):
            fromarr[i] = str(fromarr[i])
    
    reqStopsList = [[]]
    mostRec = ""
    for i in range(len(fromarr)):
        curr = []
        for j in range (len(addresses)):
            if (addresses[j] in fromarr[i]):
                if (mostRec != addresses[j]):
                    curr.append(1)
                else:
                    curr.append(0)
                mostRec = addresses[j]
            else:
                curr.append(0)
        if (i == 0):
            reqStopsList[i] = curr
        else:
            reqStopsList.append(curr)
    return reqStopsList


"""
Takes in an address and finds its location in a list of addresses.
If there is the possibility for duplicates, prev_add should be a list of
indices that have been seen before. If it has, this function will ignore
that index and keep searching for the next occurrence of address in 
addresses that has not yet been seen.
"""
def convert_address_to_index(address: str, addresses, prev_add):
    import re
    if (address not in addresses):
        raise AddressesError(address, "converting addresses to indices")
    for i in range(len(addresses)):
        curr = addresses[i]
        match = re.search('\.[0-9]', curr)
        if (match is not None):
            (j, _unused) = match.span()
            if (curr[:j] == address):
                if (i not in prev_add):
                    return i
        elif curr == address:
            if (i not in prev_add):
                return i

"""
Gets the next index from the adjacency matrix from the current position in arr
and returns the updated arr along with the index just added
"""
def getNextInd(arr, prev_add, addresses_with_virtual_addresses, i: int, j: int =-1):
    elem = arr[i]
    if j > -1:
        elem = arr[i][j]
    next = convert_address_to_index(elem, addresses_with_virtual_addresses, prev_add)
    if (next is not None):
        if j == -1:
            arr[i] = next
        else:
            arr[i][j] = next
            prev_add.append(next)
    return (arr, prev_add)

"""
Takes in an array of addresses to be parsed and an array of the
adjacency matrix's addresses including virtual addresses.

Returns an array of these addresses in the same order as before
but represented by their indices in the adjacency matrix instead of
their string values.
"""
def address_2d_array_init(arr, addresses_with_virtual_addresses):
    for i in range(len(arr)):
        prev_add = []
        if (isinstance(arr[i], str)) and ('nan' not in arr[i]):
            (arr, prev_add) = getNextInd(arr, prev_add, addresses_with_virtual_addresses, i)
        elif ((len(arr[i]) > 1) and ('nan' not in arr[i])):
            for j in range(len(arr[i])):
                (arr, prev_add) = getNextInd(arr, prev_add, addresses_with_virtual_addresses, i, j)
    return arr

check_addresses is a function to check that every address in arr can be found somewhere in an adjacency matrix addresses. If it cannot be found, it raises an error immediately and indicates the address that could not be found.

NOTE: This WILL NOT check to see if there is a matching number of occurrences of an address between arr and addresses. For example, if arr contains the address "Hello" five times, but the adjacency matrix only contains this address once, this function will not catch this mismatch.

In [181]:
# checks that each address in arr can be found in addresses
# section and curr are only important for throwing errors
def check_addresses(arr, addresses, section=None, curr=None):
    for i in range(len(arr)):
        if arr[i] not in addresses:
            raise AddressesError(arr[i], section, curr, i)

Converts everything to boxes. In the provided data, some amounts of food were listen in terms of boxes and others in terms of bags. This ensures that our model only has to work with one unit, which we have chosen to be bags.

Below that is a general purpose conversion method currently being used only to translate biker times into driver times.

In [182]:
# takes in a list of bags and boxes ordered the same as the addresses list
# returns an array where everything is counted by boxes
def convert_to_bags(bags, boxes):
    if (not(len(bags) == len(boxes))):
        raise indError("Bags and Boxes should be same length")
    toarr = []
    for i in range(len(bags)):
        if (math.isnan(boxes[i])):
            toarr.append(bags[i])
        else:
            toarr.append(bags[i] + BOX_TO_BAGS*boxes[i])
        if (math.isnan(toarr[i])):
            toarr[i] = int(DEFAULT_CUSTOMER_BAGS)
    toarr[0] = 0
    toarr = [int(elem) for elem in toarr]
    return toarr


def general_convert_units(fromarr, what_to_change, conversion_unit):
    if (len(fromarr) != len(what_to_change)):
        raise SectionLengthError("what_to_change and fromarr must be the same length")
    if (math.isnan(conversion_unit)):
        raise TypeError(conversion_unit)
    toarr = []
    for i in range(len(fromarr)):
        if (what_to_change[i] == True or what_to_change[i] == 1):
            toarr.append(fromarr[i] * conversion_unit)
        else:
            toarr.append(fromarr[i])
    
    return toarr

In [183]:
def mothership_gen_init(col, daysheet, expected_len):
    np_array_return = daysheet[col]
    if (len(np_array_return) != expected_len):
            raise SectionLengthError(col + " list length and provided number of drivers should match")
    return np_array_return

model_parameters_driver:

driver_mothership: this should be a spreadsheet containing only one sheet. It should contain the following columns:
Driver	isCar	isBike	bagCapacity	timeAvailable	driverEndPreferences	requiredStops	currentRoute

"Driver" is the driver's name. "isCar" and "isBike" are both binary lists that are 1 if the driver in that row is a driver/biker (depending on the column) and 0 otherwise. bagCapacity is the number of bags that the row's driver can handle (empty if unspecified). timeAvailable is the number of hours a driver can work for (N/A if unspecified). requiredStops is the list of all stops a driver indicated they wish to continue driving to. currentRoute is the list of current stops that a driver is making.


num_drivers: this value is manually entered by the client. This value is intended to check that all lists are the correct length and that the user has not accidentally left out a driver that was intended to be included, or included one that shouldn't have been. The class WILL throw an error if this number does not match what has been found.


adjacency_matrix: the adjacency matrix containing all locations and the distances between them. These addresses should be formatted exactly as they are in the driver_mothership.


daysheet: essentially another redundant check. This should include a column called "Addresses" which contains the same list of addresses as in the adjacencyMatrix.

In [184]:
class model_parameters_drivers:
    def __init__(self, driver_mothership, num_drivers, b_adjacency_matrix, c_adjacency_matrix, daysheet, drivers=-1):
        self.__onlyDrivers = drivers
        
        self.__addresses = daysheet["Address"].to_numpy(copy=True)
        
        self.__num_drivers = num_drivers
        
        self.__address_num_bags = convert_to_bags(daysheet["Bags"], daysheet["Boxes"])
        
        # initialize driver_names list and check length
        self.__driver_names = mothership_gen_init('Driver', driver_mothership, num_drivers).to_numpy(copy=True)

        # initialize carness and check length
        self.__carness = mothership_gen_init('isCar', driver_mothership, num_drivers).to_numpy(copy=True)
        
        # initialize
        # self.__bikerness = mothership_gen_init('isBike', driver_mothership, num_drivers).to_numpy(copy=True)

        # make driver time constraints list
        self.__driver_time_constraints = mothership_gen_init('timeAvailable', driver_mothership, num_drivers).to_numpy(dtype=str, copy=True)
        self.__driver_time_constraints = from_string_separated_np_array(self.__driver_time_constraints, '', DEFAULT_TIME_LIMIT, False)
        self.__driver_time_constraints = [float(t)*MINUTES_PER_HOUR for t in self.__driver_time_constraints]
            
        # TODO
        # do we need this line now that we have the two different matrices?
        # self.__driver_time_constraints = general_convert_units(self.__driver_time_constraints, self.__bikerness, BIKE_CONVERSION_TIME)
        
        # set up list of where drivers want to end
        # if drivers don't have a specified end destination, we have given them one
        self.__driver_end_preferences = mothership_gen_init('driverEndPreferences', driver_mothership, num_drivers).to_numpy(dtype=str, copy=True)
        self.__driver_end_preferences = from_string_separated_np_array(self.__driver_end_preferences, ';', "-1", True)

        
        # set up list of zip codes drivers want to still go to
        self.__driver_required_stops = from_string_separated_np_array(driver_mothership['requiredStops'].to_numpy(copy=True), '; ', "-1", True)


        # set up driver current routes lists
        self.__driver_current_routes = driver_mothership["currentRoute"].to_numpy(copy=True)
        self.__driver_current_routes = from_string_separated_np_array(self.__driver_current_routes, '; ', "-1", True)

        # check that addresses are correctly formatted in driver current routes
        # uncomment if routes have been changed
        for i in range(len(self.__driver_current_routes)):
            check_addresses(self.__driver_current_routes[i], self.__addresses, "processing Driver Current Routes", i)

        
        # set up driver bag capacity list
        self.__driver_bag_capacity = mothership_gen_init('bagCapacity', driver_mothership, num_drivers).to_numpy(copy=True)
        self.__driver_bag_capacity = from_string_separated_np_array(self.__driver_bag_capacity, '', DEFAULT_DRIVER_BAGS, False)
        self.__driver_bag_capacity = [int(float(b)) for b in self.__driver_bag_capacity]

        
        self.__b_adjacency_matrix = b_adjacency_matrix.iloc[:,1:].to_numpy(copy=True)/SECONDS_PER_MINUTE
        self.__c_adjacency_matrix = c_adjacency_matrix.iloc[:,1:].to_numpy(copy=True)/SECONDS_PER_MINUTE

        addresses_with_virtual_addresses = c_adjacency_matrix.columns[1:]

        # at some point clean this up to use less space and take less time
        self.__driver_current_routes_index = self.__driver_current_routes
        
        prev_add = []
        for i in range(len(self.__driver_current_routes)):
            if ((self.__carness[i] == self.__onlyDrivers) or self.__onlyDrivers < 0):
                for j in range(len(self.__driver_current_routes[i])):
                    next = convert_address_to_index(self.__driver_current_routes[i][j], addresses_with_virtual_addresses, prev_add)
                    if (next is not None):
                        self.__driver_current_routes_index[i][j] = next
                        prev_add.append(next)
            elif (self.__carness[i] != self.__onlyDrivers):
                self.__driver_current_routes_index.pop(i)

        
        self.__driver_current_routes_index = [[int(stop) for stop in route] \
                                              for route in self.__driver_current_routes_index]

        self.__driver_end_preferences_index = address_2d_array_init(self.__driver_names, addresses_with_virtual_addresses)
        self.__driver_required_stops_index = address_2d_array_init(self.__driver_required_stops, addresses_with_virtual_addresses)

        for i, stops in enumerate(self.__driver_required_stops_index):
            if type(stops) == str and stops == 'nan':
                self.__driver_required_stops_index[i] = -1
            else:
                self.__driver_required_stops_index[i] = [int(stop) for stop in stops]

        for i in range(len((self.__driver_bag_capacity))):
            if (self.__driver_bag_capacity[i] == -1):
                print(f"WARNING: Autofilling bag capacity for driver {i+1}.")
                self.__driver_bag_capacity[i] = sum([self.__address_num_bags[j] for j in self.__driver_current_routes_index[i]])        

        for i in range(self.__num_drivers):
            if (self.__driver_time_constraints[i] < 0):
                print(f"WARNING: Autofilling time constraints for driver {i+1}.")
                route_time = 0
                route = [0] + self.__driver_current_routes_index[i] + [self.__driver_end_preferences_index[i]]
                for j in range(len(route)-1):
                    if self.__carness[i]:
                        route_time += self.__c_adjacency_matrix[route[j]][route[j+1]]
                    else:
                        route_time += self.__b_adjacency_matrix[route[j]][route[j+1]]
                self.__driver_time_constraints[i] = route_time
                        
                
    
    # getters with no parameters

    @property
    # return: int (number of drivers)
    def num_drivers(self):
        return self.__num_drivers
    
    @property
    # return: numpy array of driver names
    def drivers_names(self):
        return self.__driver_names
    
    @property
    # return: numpy array of driver carness
    def carness(self):
        return self.__carness
    
    @property
    # return: numpy array of driver bikerness
    def bikerness(self):
        return np.logical_not(self.__carness).astype(int)
    
    @property
    # return: numpy array of driver time constraints
    def driver_time_constraints(self):
        return self.__driver_time_constraints
    
    @property
    # return: numpy array of driver bag capacity
    def driver_bag_capacity(self):
        return self.__driver_bag_capacity
    
    @property
    # return: numpy array of driver end location preferences
    def driver_end_preferences(self):
        return self.__driver_end_preferences_index
    
    @property
    # return: numpy array of driver requested stops
    def driver_required_stops(self):
        return self.__driver_required_stops_index
    
    @property
    # return: the list of current locations drivers are stopping at
    def driver_current_routes(self):
        return self.__driver_current_routes_index
    
    @property
    # return: a list of how many bags to deliver to an address
    # where each index corresponds to an index in adjacency matrix
    def address_bag_needs(self):
        return self.__address_num_bags
    
    @property
    # return: bike adjacency matrix of all locations
    # has header row/column with locations
    # distance is zero if it is between the same location
    # otherwise contains a positive number representing
    # the straightline distance between two locations
    def b_adjacency_matrix(self):
        return self.__b_adjacency_matrix

    @property
    # return: car adjacency matrix of all locations
    # has header row/column with locations
    # distance is zero if it is between the same location
    # otherwise contains a positive number representing
    # the straightline distance between two locations
    def c_adjacency_matrix(self):
        return self.__c_adjacency_matrix
    
    @property
    # paremeter addressOne: one of the addresses
    # parameter addressTwo: other address
    # throws error if addresses cannot be found or they are incorrectly formatted
    # returns: straightline distance between points
    def strtlinedist_from_adjacency_matrix(self, addressOne, addressTwo):
        if (addressOne not in self.__addresses):
            raise AddressesError(addressOne, "getting distance")
        if (addressTwo not in self.__addresses):
            raise AddressesError(addressTwo, "getting distance")
        raise NotImplementedError("This has been deprecated and shouldn't be used.")
        dist = self.__adjacency_matrix[addressOne, addressTwo]
        if (math.isnan(dist)):
            raise("Provided addresses were not in correct format or do not exist")
        return dist
    


    # getters by driver index

    @property
    # parameter ind: the driver's index in the driver array
    # throws error if index is out of bounds
    # return: the name of the driver at ind
    def get_driver_name_by_ind(self, ind):
        self.__indError(self, ind)  
        return self.__drivers[ind]
    
    @property
    # parameter ind: the driver's index in the driver array
    # throws error if index is out of bounds
    # return: the bikerness of the driver at ind
    def get_driver_bikerness_by_ind(self, ind):
        self.__indError(self, ind)
        return np.logical_not(self.__carness).astype(int)[ind]
    
    @property
    # parameter ind: the driver's index in the driver array
    # throws error if index is out of bounds
    # return: the carness of the driver at ind
    def get_driver_carness_by_ind(self, ind):
        self.__indError(self, ind)
        return self.__carness[ind]
    
    @property
    # parameter ind: the driver's index in the driver array
    # throws error if index is out of bounds
    # return: the required stops of the driver at ind
    def get_driver_required_stops_by_ind(self, ind):
        self.__indError(self, ind)
        return self.__driver_required_stops[ind]
    
    @property
    # parameter address: the address you wish to know the index of
    # in adjacency matrix
    # returns: index of address in adjacency matrix/address list
    # or -1 if it is not found
    def get_address_index(self, address):
        for i in range(len(self.__addresses)):
            if (self.__addresses[i] == address):
                return i
        return -1
    
    @property
    # parameter address: the address you want bag info for
    # return: the number of bags that address requires
    # or -1 if address was not found
    def get_address_required_bags_by_address(self, address):
        ind = self.fget_address_index(address)
        if (ind == -1):
            return -1
        return self.__address_num_bags[ind]

    # helper methods

    # errors
    def __indError(self, ind):
        if (ind < 0):
            indError("must have non-negative index")
        if (ind > self.__num_drivers):
            indError("must be a valid index")

In [185]:
b_sat_adj_matrix = pd.read_excel("Sheets/B_SaturdayAdjacencyMatrix.xlsx")
c_sat_adj_matrix = pd.read_excel("Sheets/C_SaturdayAdjacencyMatrix.xlsx")
b_wed_adj_matrix = pd.read_excel("Sheets/B_WednesdayAdjacencyMatrix.xlsx")
c_wed_adj_matrix = pd.read_excel("Sheets/C_WednesdayAdjacencyMatrix.xlsx")
mothership_saturday = pd.read_excel("Sheets/SaturdayDriverMothership.xlsx")
mothership_wednesday = pd.read_excel("Sheets/WednesdayDriverMothership.xlsx")
wed = pd.read_excel("Sheets/Wednesday.xlsx")
sat = pd.read_excel("Sheets/Saturday.xlsx")

In [186]:
#list(c_wed_adj_matrix.columns[1:])

In [187]:
#m = model_parameters_drivers(mothership_wednesday, 14, b_wed_adj_matrix, c_wed_adj_matrix, wed)

In [188]:
m = model_parameters_drivers(mothership_saturday, 13, b_sat_adj_matrix, c_sat_adj_matrix, sat)



In [189]:
#m.driver_time_constraints

In [190]:
#m.driver_current_routes[7]

In [191]:
#m.driver_end_preferences[7]

In [192]:
#np.set_printoptions(precision=1)
#print(m.c_adjacency_matrix[59][205])