## The Clark-Wright Savings Algorithm

The savings algorithm aims to find a solution to the Vechile Routing Problem (VRP). It starts off with a trivial solution where there are $n$ vehicles which deliver to $n$ nodes. It then merges routes based on the highest possible savings per merge. The idea is to be able to start with the image on the left and move to the image on the right. 

<img src=img/clark.png width="300" height="200" />


Basically the algorithm can be understood as follows: 
<ol>
  <li>Start off with a trivial solution to the problem. That is, have one vechicle travel from the depot to each node.</li>
  <li>Create a savings matrix such that s(i, j) = distance(i, depot) + distance(depot, j) - distance(i, j)</li>
  <li>Sort your savings matrix so you consider the pairs with the highest savings first</li>
  <li>Merge those nodes if they meet all the feasibility constraints</li>
</ol>         


In [None]:
import pandas as pd
import numpy  as np
from graph import Graph

In [34]:
# Reading Data
timedf             = pd.read_csv('data/PrepSiteTimeMatrix.csv', header=0, index_col=0)
prep_carry_matches = pd.read_csv('data/prep-carry-matches2.csv')
delivery_times     = pd.read_csv('data/delivery-times2.csv')
prep_carry_matches = prep_carry_matches.fillna(0)

In [23]:
# Defining all the segments. Each segment has one prep site, one/two carry-in sites, and one timewindow
#
# Ex: Segment 0 -> Prep Site = 450 | Carry-Ins = [604,] | Time-Window = (6:15, 7:15)
#     Segment 1 -> Prep Site = 450 | Carry-Ins = [604,] | Time-Windwo = (9:30, 10:30)
segments = []
for i in prep_carry_matches.values.tolist():
    x = (int(i[0]), int(i[1]), int(i[2]))
    segments.append(x)

# Creating Time Windows

In [48]:
def time_lookup(carryin_):
    for i in delivery_times.values:
        if i[0] == carryin_:
            return list(i)
    raise Exception(f'carryin site {carryin_} not found')

In [29]:
# Given a segment that has one or two carry-in sites, it builds a time window for that segment
# The start of the window is considered the earliest arrival time, and the end of the window is considered the 
# earliest departure time

def window_builder(segments_):
    windows = {}
    seen = {}
    visited = {}
    for segment_ in segments_:
        if segment_[1] in visited:
            visited[segment_[1]] += 1
        else:
            visited[segment_[1]] = 1

        if segment_[2] in visited:
            visited[segment_[2]] += 1
        else:
            visited[segment_[2]] = 1
        
    for indx, segment_ in enumerate(segments_):
        carry1 = segment_[1]
        carry2 = segment_[2]
        if carry1 not in seen:
            if visited[carry1] == 1:
                time = time_lookup(carry1)
                start = time[3]
                end   = time[4]
                seen[carry1] = True
            else:
                time = time_lookup(carry1)
                start = time[1]
                end   = time[2]



    for segment_ in segments_:
        for i in time_window_df.values.tolist():
            if i[0] == segment_[1]: 
                first = i
                break
        if (first[1] == 'DIA') or (pd.isna(first[1])) or (first[0] in seen_):
            start = i[3]
            end   = i[4]
        else:
            start = i[1]
            end = i[2]
            seen_[i[0]] = True
        if segment_[2] == 0: 
            return (pd.to_datetime(start), pd.to_datetime(end))

        # If there is a second carry-in site, explore that one as well
        for i in time_window_df.values.tolist():
            if i[0] == segment_[1]: 
                second = i
                break
        if (second[1] == 'DIA') or (pd.isna(second[1])) or (second[0] in seen):
            start2 = i[3]
            end2   = i[4]
        else:
            start = i[1]
            end = i[2]
            seen[i[0]] = True 

    # Compare between the two start and end times
    if pd.to_datetime(start) < pd.to_datetime(start2):
        start = pd.to_datetime(start)
    else: 
        start = pd.to_datetime(start2)
    
    if pd.to_datetime(end) < pd.to_datetime(end2):
        end = pd.to_datetime(end)
    else:
        end = pd.to_datetime(end2)

    return (start, end)
        

In [31]:
segments

[(450, 604, 0),
 (450, 604, 0),
 (292, 181, 0),
 (292, 181, 0),
 (423, 490, 0),
 (423, 490, 0),
 (423, 477, 801),
 (423, 477, 801),
 (214, 192, 0),
 (214, 192, 0),
 (437, 252, 0),
 (218, 999, 0),
 (984, 394, 0),
 (682, 891, 0),
 (682, 532, 602),
 (682, 515, 0),
 (682, 602, 0),
 (682, 497, 515),
 (682, 532, 0),
 (461, 479, 478),
 (461, 479, 478),
 (451, 213, 0),
 (451, 212, 0),
 (451, 212, 213),
 (981, 110, 0),
 (981, 110, 0),
 (405, 264, 0),
 (258, 179, 0),
 (258, 179, 0),
 (248, 168, 0),
 (301, 438, 0),
 (408, 316, 158),
 (408, 158, 316),
 (982, 516, 0),
 (982, 516, 0),
 (464, 499, 0),
 (464, 499, 0),
 (464, 436, 0),
 (412, 223, 203),
 (419, 522, 0),
 (419, 522, 0),
 (424, 509, 0),
 (455, 383, 0),
 (455, 605, 383),
 (455, 488, 0),
 (455, 488, 605),
 (150, 161, 0),
 (150, 161, 0),
 (190, 750, 0),
 (415, 426, 0),
 (415, 426, 473),
 (971, 117, 0),
 (971, 117, 0),
 (275, 533, 0),
 (278, 178, 0),
 (457, 328, 0),
 (457, 328, 0)]

In [30]:
timewindow = {}
seen = {}
for indx, val in enumerate(segments):
    timewindow[indx] = window_builder(val, seen)
timewindow

{0: (Timestamp('2020-05-13 06:15:00'), Timestamp('2020-05-13 07:15:00')),
 1: (Timestamp('2020-05-13 09:30:00'), Timestamp('2020-05-13 10:30:00')),
 2: (Timestamp('2020-05-13 06:10:00'), Timestamp('2020-05-13 07:10:00')),
 3: (Timestamp('2020-05-13 09:45:00'), Timestamp('2020-05-13 10:45:00')),
 4: (Timestamp('2020-05-13 07:00:00'), Timestamp('2020-05-13 08:00:00')),
 5: (Timestamp('2020-05-13 10:15:00'), Timestamp('2020-05-13 11:15:00')),
 6: (Timestamp('2020-05-13 09:15:00'), Timestamp('2020-05-13 11:15:00')),
 7: (Timestamp('2020-05-13 09:15:00'), Timestamp('2020-05-13 11:15:00')),
 8: (Timestamp('2020-05-13 08:45:00'), Timestamp('2020-05-13 10:45:00')),
 9: (Timestamp('2020-05-13 08:45:00'), Timestamp('2020-05-13 10:45:00')),
 10: (Timestamp('2020-05-13 03:45:00'), Timestamp('2020-05-13 11:45:00')),
 11: (Timestamp('2020-05-13 09:45:00'), Timestamp('2020-05-13 10:45:00')),
 12: (Timestamp('2020-05-13 06:30:00'), Timestamp('2020-05-13 06:31:00')),
 13: (Timestamp('2020-05-13 08:15:0

# Creating Service Times

In [11]:
len(segments)

49

In [None]:
# Getting prep sites
# Expressing distance matrix as list of lists 
# Marking the depot as having code '0'
prep = timedf.index.values 
dist = [list(i) for i in list(timedf.values)]
depot = 0 

In [None]:
## Building a dictionary which maps a prep site's 3 digit code to it's node in the graph
## site_to_indx[174] -> 0
site_to_indx = {}
for indx, site in enumerate(prep):
    site_to_indx[site] = indx

In [10]:
## Takes two prep sites and returns the distance between them. Distances are not symetrical. 
## (174, 1109) -> 14.74
## (1109, 174) -> 15.59
def distance(i, j):
    return dist[site_to_indx[i]][site_to_indx[j]]

In [None]:
## Calculates the savings from merging two prep sites i, j. 
def savings(prep_, depot_, distance_):
    savings = []
    for i in prep_:
        if i == depot_: continue
        for j in prep_:
            if (j==depot_) or (i==j): 
                continue 
            i_cost = distance(i, depot_)
            j_cost = distance(depot_, i)
            ij_cost = distance(i, j)
            save = i_cost + j_cost - ij_cost
            savings.append(((i,j), save))
    return savings

In [None]:
## Sort the savings such that highest savings is on bottom
savings_ls = savings(prep, depot, dist)
savings_ls.sort(key=lambda tup: tup[1])

In [None]:
## Creating a graph and applying the savings algorithm
graph = Graph(prep, depot)
i = 0
while i<1000:
    x = savings_ls.pop()
    node1 = x[0][0]
    node2 = x[0][1]
    if not graph.is_node_interior(node1) and not graph.is_node_interior(node2):
        if not graph.on_same_route(x[0]):
            graph.merge(x[0])
    i += 1

In [None]:
# libraries
import networkx as nx
import matplotlib.pyplot as plt
 
# Build the dataframe for the graph
selected_arcs = list(graph.arcs)
start = [i[0] for i in selected_arcs]
end = [i[1] for i in selected_arcs]
df = pd.DataFrame({ 'from':start, 'to':end})
 
# Build your graph
G=nx.from_pandas_edgelist(df, 'from', 'to')
 
 
# Plot it
nx.draw(G, with_labels=True)
plt.show()
