# **Functions to compute a similarity score**

In [1]:
import numpy as np
import copy
from tqdm import tqdm

# run notebook to load function show_route() to visualize routes
%run "functions_data_generation.ipynb"

Functions for data generation successfully loaded.


In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import sys
import json

# select current working directory and subfolder to save the files
current_directory = os.getcwd()
subfolder_path = 'data'

# select file path for standardroutes
file_path = os.path.join(current_directory, subfolder_path, 'standard20.json')
with open(file_path, 'r') as file:
    standardroutes = json.load(file) #yaml.safe_load(file)

# select file path for actual routes
file_path = os.path.join(current_directory, subfolder_path, 'actual20.json')
with open(file_path, 'r') as file:
    actual_routes = json.load(file)

### **Function to compute a similarity score between two trips**

In [3]:
def fun_similarity_merchandise(trip1, trip2, prints=True):
    if (prints==True):
        print('--------------------------new trip--------------------------')
        print('Trip from {} to {}:\n'.format(trip1['from'], trip1['to']))

    # check if driver made any modifications
    if trip1['merchandise'] == trip2['merchandise']:
        trip_similarity_score = 1
        if (prints==True): print('No modification by the driver.')
    else:
        # extract merchandise dictionaries and products of both trips
        merchandise1 = trip1['merchandise']
        merchandise2 = trip2['merchandise']
        products1 = list(merchandise1.keys())
        products2 = list(merchandise2.keys())

        # get the intersection of correctly picked products by the driver and their quantities
        corr_products = set(products1).intersection(products2)
        if (prints==True): print('Correct products:', corr_products)

        ### OPTION 1: get the correctly picked products
        corr_prod_and_quant = {product for product in corr_products if merchandise1[product] == merchandise2[product]} #check if quantities are equal in trip1 and trip2
        corr_quantities = int(np.sum([merchandise1[product] for product in corr_prod_and_quant]))
        if (len(corr_prod_and_quant) > 0) and (prints==True):
            print('  - Correct quantity: {} with sum: {}'.format({product: merchandise1[product] for product in corr_prod_and_quant}, corr_quantities))

        ### OPTION 2: get the products whose quantities have been changed and their respective quantity modifications
        corr_prod_and_wrong_quant = list(set(corr_products) - set(corr_prod_and_quant))
        old_quantities = int(np.sum([merchandise1[product] for product in corr_prod_and_wrong_quant]))
        quantity_changes = [merchandise2[product] - merchandise1[product] for product in corr_prod_and_wrong_quant]
        changes_up = int(np.sum([i for i in quantity_changes if i > 0]))
        changes_down = int(np.abs(np.sum([i for i in quantity_changes if i < 0])))
        if (len(corr_prod_and_wrong_quant) > 0) and (prints==True):
            print('  - Changed quantites (+/-):', {product: quantity for product, quantity in zip(corr_prod_and_wrong_quant, quantity_changes)})

        ### OPTION 3: get the products and their respective quantities that were cut of route 1 by the driver
        cut_products = list(set(products1) - corr_products)
        cut_quantities = int(np.sum([merchandise1[i] for i in cut_products]))
        if (len(cut_products) > 0) and (prints==True):
            print('Driver cut products: {} with sum: {}'.format({product: merchandise1[product] for product in cut_products}, cut_quantities))
        
        ### OPTION 4: get the products and their respective quantities that added to route 2 by the driver
        added_products = list(set(products2) - corr_products)
        added_quantities = int(np.sum([merchandise2[i] for i in added_products]))
        if (len(added_products) > 0) and (prints==True):
            print('Driver added products: {} with sum: {}'.format({product: merchandise2[product] for product in added_products}, added_quantities))

        # compute similarity score of the trips
        numerator = corr_quantities + (old_quantities - changes_down) #all corr. quantities + (the modified quanitities minus their reductions)
        denominator = corr_quantities + (old_quantities + changes_up) + cut_quantities + added_quantities #all correct quantities + (all modified quantities plus their increase) + cut products + added products´,
        trip_similarity_score =  numerator / denominator
    
        # view results
        if (prints==True): print('\nSimilarity score formula between the trips to {}:'.format(trip1['to']))
        if (prints==True): print('    sim(trip1, trip2) = ({} + ({}-{})) / ({} + ({}+{}) + {} + {})'.format(corr_quantities, old_quantities, changes_down, corr_quantities, old_quantities, changes_up, cut_quantities, added_quantities))
    if (prints==True): print('    sim(trip1, trip2) = {}\n'.format(np.round(trip_similarity_score, 4)))

    return trip_similarity_score

In [4]:
# define two example trips
trip1 = {'from': 'Lecce', 'to': 'Bologna', 'merchandise': {'Water': 18, 'Tea': 23, 'Apples': 2, 'Potatoes': 19, 'Bananas': 23, 'Tomatoes': 23}}
trip2 = {'from': 'Lecce', 'to': 'Bologna', 'merchandise': {'Tea': 23, 'Apples': 2, 'Potatoes': 19, 'Bananas': 23}}

fun_similarity_merchandise(trip1, trip2, prints=False)

0.6203703703703703

### **Function to compute a similarity score between two routes**

In [5]:
# route 1 as standardroute and route 2 as actual route
def fun_similarity_score(route1, route2, prints=True):
    if (prints==True):
        print('\n--------------------------new similarity score computation--------------------------')
        for route in [route1, route2]: show_route(route)

    # copy routes to modify them
    route1 = copy.deepcopy(route1)
    route2 = copy.deepcopy(route2)

    # get a list of all city destinations of both routes
    destinations1 =  set([trip['to'] for trip in route1['route']])
    destinations2 =  set([trip['to'] for trip in route2['route']])
    destinations = destinations1.intersection(destinations2)
    if (prints==True):
        print('Trips to {} occuring in both routes.'.format(destinations))
        if (destinations != destinations1): print('Trips to {} not found in route 2.'.format(destinations1 - destinations))
        if (destinations != destinations2): print('Found additional trips to {} in route 2.\n'.format(destinations2 - destinations))

    # identify the trips that belong together and compare the merchandise of all trips
    # the remaining trips were cut off trip 1 or added to trip 2 and appear only in one route (zero similarity)
    trip_similarities = []
    for city in destinations:

        indizes1 = [i['to'] == city for i in route1['route']]
        indizes1 = list(np.array(indizes1).nonzero()[0])

        indizes2 = [i['to'] == city for i in route2['route']]
        indizes2 = list(np.array(indizes2).nonzero()[0])
        
        # if a city appears twice as destination in route 1 or 2: compute the similarity of all combinations of trips with that city as destination in route 1 and 2 
        # and take the trips with the highest similarity score to identify what trips belong together
        scores = []
        for i in indizes1:
            for j in indizes2:
                trip1 = route1['route'][i]
                trip2 = route2['route'][j]

                # compute similarity score between the the combination (compare merchandise)
                scores.append({'index route 1': i, 'index route 2': j, 'score': fun_similarity_merchandise(trip1, trip2, prints=False)})
        
        if (len(scores) > 1) and (prints==True):
            print('\n{} appeared multiple times in a trip as destination. The trip combination with highest similarity score is taken:\n{}'.format(city, scores))
        
        # get the indizes of the trips with highest similarity and compute similarity score with print functions
        max_index = np.argmax([i['score'] for i in scores])
        trip1 = route1['route'][scores[max_index]['index route 1']]
        trip2 = route2['route'][scores[max_index]['index route 2']]
        trip_similarity_score = fun_similarity_merchandise(trip1, trip2, prints)

        # add the score to the list of similarities per trip
        trip_similarities.append(trip_similarity_score)

        # remove the trips from the routes after calculating their similarity
        route1['route'].remove(trip1)
        route2['route'].remove(trip2)

    # view remaining trips in both routes
    if (prints==True):
        print('Remaining trips per route (cut or added by the driver):')
        for route in [route1, route2]:
            if (len(route['route']) > 0): show_route(route)
    
    # compute the number of added (route 2) or cut trips (route 1) by the driver (simply the remaining trips in route 1 and route 2)
    error_trips = np.sum(len(route1['route']) + len(route2['route']))
    if (prints==True): print('Number of cut or added trips by the driver:', error_trips)

    # add zero similarity score for all added or cut trips by the driver to the list of similarities per trip
    for i in range(error_trips): trip_similarities.append(0)
    if (prints==True): print('Similarities per trip:', [np.round(value, 4) for value in trip_similarities])

    # calculate the similarity score of route 1 and route 2
    similarity_score = np.mean(trip_similarities)
    if (prints==True): print('\nTotal similarity score (Mean of trips similarities):    sim({}, {}) = {}'.format(route1['id'], route2['id'], np.round(similarity_score, 4)))

    return similarity_score

In [6]:
# # define two example routes
# route1 = standardroutes[13]
# route2 = actual_routes[69817]

# ### EXECUTE FUNCTION ###
# fun_similarity_score(route1, route2, prints=False)

0.7159090909090909

### **Function to compute all similarity scores between standardroutes and their actual routes** ###

In [7]:
# this function will be executed in the evalutation notebook
def compute_similarites(input_sr, input_ar, prints=True, bar=True):

    # creare dictionary to store scores
    similarity_scores = []

    # compute similarity score for each standardroute-actual-route-pair
    for actual_route in tqdm(input_ar, desc='Processing', unit='iteration', leave=bar):

        # get corresponding standardroute
        actual_route_id = actual_route['id']
        driver = actual_route['driver']
        standardroute_id = actual_route['sroute']
        indizes = [i['id'] == standardroute_id for i in input_sr]
        standardroute = [standardroute for standardroute, index in zip(input_sr, indizes) if index][0]

        # compute similarity score with above defined funciton
        similarity_score = fun_similarity_score(route1=standardroute, route2=actual_route, prints=prints)

        # create new entry in the results dictionary and add the score
        similarity_scores.append({'id': actual_route_id, 'driver': driver, 'sroute': standardroute_id, 'score': similarity_score}) 

    if (bar == True): print('\nAll scores successfully computed.')
    if (bar == True): print('Mean similarity score:', np.round(np.mean([i['score'] for i in similarity_scores]), 4))

    return similarity_scores

In [8]:
print('Functions for similarity score computations successfully loaded.')

Functions for similarity score computations successfully loaded.
