# **Function to compute a similarity score between routes**

In [233]:
import numpy as np
import copy
import os
import json

# select current working directory and subfolder to save the files
current_directory = os.getcwd()
subfolder_path = 'data'

# select file path for standardroutes
file_path = os.path.join(current_directory, subfolder_path, 'standard.json')
with open(file_path, 'r') as file:
    standardroutes = json.load(file) #yaml.safe_load(file)

# select file path for actual routes
file_path = os.path.join(current_directory, subfolder_path, 'actual.json')
with open(file_path, 'r') as file:
    actual_routes = json.load(file)

In [234]:
# run notebook to load function show_route() to visualize routes
%run "functions_data_generation.ipynb"

# view first standardroute 's1'
show_route(standardroutes[0])

# get all actual routes of standardroute 's1
indizes = [i['sroute'] == 's1' for i in actual_routes]
actual_routes_s1 = [route for route, index in zip(actual_routes, indizes) if index]

# view first actualroute 'a1' of 's1'
show_route(actual_routes_s1[3])

id:s1, route:
     {'from': 'Milano', 'to': 'Genova', 'merchandise': {'Tea': 16, 'Meat': 22, 'Pasta': 25, 'Water': 30}}
     {'from': 'Genova', 'to': 'Siena', 'merchandise': {'Pasta': 27, 'Pens': 17, 'Meat': 12, 'Honey': 11, 'Butter': 3, 'Bread': 8}}
     {'from': 'Siena', 'to': 'Trento', 'merchandise': {'Carrots': 6, 'Honey': 25, 'Tomatoes': 6, 'Rice': 3, 'Apples': 20}}
     {'from': 'Trento', 'to': 'Lecce', 'merchandise': {'Pasta': 7, 'Fish': 18, 'Apples': 9, 'Milk': 15}}
     {'from': 'Lecce', 'to': 'Bologna', 'merchandise': {'Water': 18, 'Tea': 23, 'Apples': 2, 'Potatoes': 19, 'Bananas': 23, 'Tomatoes': 23}}
     {'from': 'Bologna', 'to': 'Bolzano', 'merchandise': {'Potatoes': 5, 'Cheese': 23, 'Beer': 17, 'Chocolate': 17, 'Water': 16}}


id:a4, driver:E, sroute:s1, route:
     {'from': 'Milano', 'to': 'Genova', 'merchandise': {'Tea': 16, 'Meat': 22, 'Pasta': 25, 'Water': 30}}
     {'from': 'Genova', 'to': 'Siena', 'merchandise': {'Pasta': 27, 'Pens': 17, 'Meat': 12, 'Honey': 11, 'B

In [409]:
# helperfunction to compoute the similarity between two trips
def fun_similarity_merchandise(trip1, trip2, prints=True):
    if (prints==True):
        print('\n--------------------------new trip--------------------------')
        print('Trip from {} to {}:\n'.format(trip1['from'], trip1['to']))

    # check if driver made any modifications
    if trip1['merchandise'] == trip2['merchandise']:
        trip_similarity_score = 1
        print('No modification by the driver.')
    else:
        # extract merchandise dictionaries and products of both trips
        merchandise1 = trip1['merchandise']
        merchandise2 = trip2['merchandise']
        products1 = list(merchandise1.keys())
        products2 = list(merchandise2.keys())

        # get the intersection of correctly picked products by the driver and their quantities
        corr_products = set(products1).intersection(products2)
        if (prints==True): print('Correct products:', corr_products)

        ### OPTION 1: get the correctly picked products
        corr_prod_and_quant = {product for product in corr_products if merchandise1[product] == merchandise2[product]} #check if quantities are equal in trip1 and trip2
        corr_quantities = int(np.sum([merchandise1[product] for product in corr_prod_and_quant]))
        if (len(corr_prod_and_quant) > 0) and (prints==True):
            print('  - Correct quantity: {} with sum: {}'.format({product: merchandise1[product] for product in corr_prod_and_quant}, corr_quantities))

        ### OPTION 2: get the products whose quantities have been changed and their respective quantity modifications
        corr_prod_and_wrong_quant = list(set(corr_products) - set(corr_prod_and_quant))
        old_quantities = int(np.sum([merchandise1[product] for product in corr_prod_and_wrong_quant]))
        quantity_changes = [merchandise2[product] - merchandise1[product] for product in corr_prod_and_wrong_quant]
        changes_up = int(np.sum([i for i in quantity_changes if i > 0]))
        changes_down = int(np.sum([i for i in quantity_changes if i < 0]))
        if (len(corr_prod_and_wrong_quant) > 0) and (prints==True):
            print('  - Changed quantites:', {product: quantity for product, quantity in zip(corr_prod_and_wrong_quant, quantity_changes)})

        ### OPTION 3: get the products and their respective quantities that were cut of route 1 by the driver
        cut_products = list(set(products1) - corr_products)
        cut_quantities = int(np.sum([merchandise1[i] for i in cut_products]))
        if (len(cut_products) > 0) and (prints==True):
            print('Driver cut products: {} with sum: {}'.format({product: merchandise1[product] for product in cut_products}, cut_quantities))
        
        ### OPTION 4: get the products and their respective quantities that added to route 2 by the driver
        added_products = list(set(products2) - corr_products)
        added_quantities = int(np.sum([merchandise2[i] for i in added_products]))
        if (len(added_products) > 0) and (prints==True):
            print('Driver added products: {} with sum: {}'.format({product: merchandise2[product] for product in added_products}, added_quantities))

        # compute similarity score of the trips
        numerator = corr_quantities + (old_quantities - changes_down) #all corr. quantities + (the modified quanitities minus their reductions)
        denominator = corr_quantities + (old_quantities + changes_up) + cut_quantities + added_quantities #all correct quantities + (all modified quantities plus their increase) + cut products + added products´,
        trip_similarity_score =  numerator / denominator
    
        # view results
        if (prints==True): print('\nSimilarity score formula between the trips to {}:'.format(trip1['to']))
        if (prints==True): print('    sim(trip1, trip2) = ({} + ({}-{})) / ({} + ({}+{}) + {} + {})'.format(corr_quantities, old_quantities, changes_down, corr_quantities, old_quantities, changes_up, cut_quantities, added_quantities))
    if (prints==True): print('    sim(trip1, trip2) = {}'.format(np.round(trip_similarity_score, 4)))

    return trip_similarity_score

In [410]:
# define two example trips
trip1 = {'from': 'Lecce', 'to': 'Bologna', 'merchandise': {'Water': 18, 'Tea': 23, 'Apples': 2, 'Potatoes': 19, 'Bananas': 23, 'Tomatoes': 23}}
trip2 = {'from': 'Lecce', 'to': 'Bologna', 'merchandise': {'Tea': 23, 'Apples': 2, 'Potatoes': 19, 'Bananas': 23}}
#trip2 = {'from': 'Lecce', 'to': 'Bologna', 'merchandise': {'Water': 18, 'Tea': 23, 'Apples': 2, 'Potatoes': 19, 'Bananas': 23, 'Tomatoes': 23}}
prints=True

fun_similarity_merchandise(trip1, trip2)


--------------------------new trip--------------------------
Trip from Lecce to Bologna:

Correct products: {'Bananas', 'Tea', 'Apples', 'Potatoes'}
  - Correct quantity: {'Bananas': 23, 'Tea': 23, 'Apples': 2, 'Potatoes': 19} with sum: 67
Driver cut products: {'Tomatoes': 23, 'Water': 18} with sum: 41

Similarity score formula between the trips to Bologna:
    sim(trip1, trip2) = (67 + (0-0)) / (67 + (0+0) + 41 + 0)
    sim(trip1, trip2) = 0.6204


0.6203703703703703

In [416]:
# main funciton to compute the similarity between two routes - with route1 as standardroute and route2 as actual route
def fun_similarity_score(route1, route2, prints=True):
    if (prints==True):
        print('\n--------------------------new similarity score computation--------------------------')
        show_route(route1)
        show_route(route2)

    # copy routes to modify them
    route1 = route1.copy()
    route1 = copy.deepcopy(route1)
    route2 = route2.copy()
    route2 = copy.deepcopy(route2)
    
    # create a dictionary with a list of all cities for route 1 and route 2
    trips = {}
    for index, route in enumerate([route1, route2]):
        route_length = len(route['route'])
        trips.update({'route' + str(index+1): [tuple([route['route'][i]['from'], route['route'][i]['to']]) for i in range(route_length)]})

    # generate a list of intersection and union of the routes' trips
    intersection = set(trips['route1']).intersection(trips['route2'])
    union = set(trips['route1']).union(trips['route2'])
    if (prints==True):
        print('', union)
        print('', union)

    # compare the merchandise of all the correct trips (same starting city and destination city)
    trip_similarities = []
    for trip in intersection:
        index1 = [i == trip for i in trips['route1']].index(True)
        trip1 = route1['route'][index1]

        index2 = [i == trip for i in trips['route2']].index(True)
        trip2 = route2['route'][index2]

        # compute similarity score between the two trips (compare merchandise)
        trip_similarity_score = fun_similarity_merchandise(trip1, trip2)

        # add the score to the list of similarities per trip
        trip_similarities.append(trip_similarity_score)

        # remove the trips from the routes after calculating their similarity
        route1['route'].remove(trip1)
        route2['route'].remove(trip2)

    #show_route(route1)
    #show_route(route2)

    # get a list of all city destinations of route 1 that haven't been compared yet
    # either the trip got a new starting city or the city was added by the driver
    route_length = len(route1['route'])
    cities =  [route1['route'][i]['to'] for i in range(route_length)]
    print('Trips to {} have a new starting location.', cities)

    # compare the merchandise of all the modified trips (same destination city, but different starting city)
    for city in cities:
        index1 = [i['to'] == city for i in route1['route']].index(True)
        trip1 = route1['route'][index1]

        index2 = [i['to'] == city for i in route2['route']].index(True)
        trip2 = route2['route'][index2]

        # compute similarity score between the two trips (compare merchandise)
        trip_similarity_score = fun_similarity_merchandise(trip1, trip2)

        # add the score to the list of similarities per trip
        trip_similarities.append(trip_similarity_score)

        # remove the trips from the routes after calculating their similarity
        route1['route'].remove(trip1)
        route2['route'].remove(trip2)
    
    #show_route(route1)
    #show_route(route2)
    
    # compute the number of added (route 2) or cut trips (route 1) by the driver (simply the remaining trips in route 1 and route 2)
    error_trips = np.sum(len(route1['route']) + len(route2['route']))
    if (prints==True): print('\n\nNumber of added or cut trips by the driver:', error_trips)

    # add zero similarity score for all added or cut trips by the driver to the list of similarities per trip
    for i in range(error_trips): trip_similarities.append(0)
    if (prints==True): print('Similarities per trip:', trip_similarities)

    # calculate the similarity score of route 1 and route 2
    similarity_score = np.sum(trip_similarities) / len(trip_similarities)
    if (prints==True): print('\nTotal similarity score:    sim({}, {}) = {}'.format(route1['id'], route2['id'], np.round(similarity_score, 4)))

    return similarity_score

In [417]:
# define two example routes
route1 = standardroutes[0]
route2 = actual_routes_s1[3]

### EXECUTE FUNCTION ###
fun_similarity_score(route1, route2)


--------------------------new similarity score computation--------------------------
id:s1, route:
     {'from': 'Milano', 'to': 'Genova', 'merchandise': {'Tea': 16, 'Meat': 22, 'Pasta': 25, 'Water': 30}}
     {'from': 'Genova', 'to': 'Siena', 'merchandise': {'Pasta': 27, 'Pens': 17, 'Meat': 12, 'Honey': 11, 'Butter': 3, 'Bread': 8}}
     {'from': 'Siena', 'to': 'Trento', 'merchandise': {'Carrots': 6, 'Honey': 25, 'Tomatoes': 6, 'Rice': 3, 'Apples': 20}}
     {'from': 'Trento', 'to': 'Lecce', 'merchandise': {'Pasta': 7, 'Fish': 18, 'Apples': 9, 'Milk': 15}}
     {'from': 'Lecce', 'to': 'Bologna', 'merchandise': {'Water': 18, 'Tea': 23, 'Apples': 2, 'Potatoes': 19, 'Bananas': 23, 'Tomatoes': 23}}
     {'from': 'Bologna', 'to': 'Bolzano', 'merchandise': {'Potatoes': 5, 'Cheese': 23, 'Beer': 17, 'Chocolate': 17, 'Water': 16}}


id:a4, driver:E, sroute:s1, route:
     {'from': 'Milano', 'to': 'Genova', 'merchandise': {'Tea': 16, 'Meat': 22, 'Pasta': 25, 'Water': 30}}
     {'from': 'Genova

0.7957671957671958