import pandas as pd
import re
# ^^^ pyforest auto-imports - don't write above this line
# Imports

In [206]:
# pip install dijkstar

In [207]:
import folium
from collections import Counter
import postman_problems
from postman_problems.solver import cpp
from postman_problems.stats import calculate_postman_solution_stats
import networkx as nx
import itertools
import dwave_networkx as dnx
import dimod
import time
import dijkstar

## My Data

In [208]:
stations_df = pd.read_csv("./saved_data/final_station_df.csv", index_col = 0)

<IPython.core.display.Javascript object>

In [209]:
non_unique_stations_df = pd.read_csv("./saved_data/non_unique_mta_stations.csv", index_col=0)

<IPython.core.display.Javascript object>

In [210]:
my_edgelist = pd.read_csv('./saved_data/edge_list_df_no_req.csv', index_col=0)

<IPython.core.display.Javascript object>

In [211]:
node_list_df = pd.read_csv("./saved_data/nodelist_nyc_subway.csv", index_col=0).reset_index()

<IPython.core.display.Javascript object>

In [212]:
node_list_df

Unnamed: 0,station_id,X,Y
0,101,40.889248,-73.898583
1,103,40.884667,-73.900870
2,104,40.878856,-73.904834
3,106,40.874561,-73.909831
4,107,40.869444,-73.915279
...,...,...,...
441,R42,40.634967,-74.023377
442,R43,40.629742,-74.025510
443,R44,40.622687,-74.028398
444,S03,40.674772,-73.957624


# Mini Dijkstar Attempt

## Getting A,C,E Edges

In [215]:
good_indices_ace = []
for idx, x in enumerate(my_edgelist['node1']):
    for letter in 'A':
        if letter in x and letter in my_edgelist['node2'][idx]:
            if idx not in good_indices_ace:
                good_indices_ace.append(idx)

In [216]:
A_edgelist = my_edgelist.iloc[good_indices_ace]

In [217]:
A_edgelist.reset_index(inplace=True, drop=True)

In [218]:
A_edgelist[A_edgelist['node1'] == '112_A09']

Unnamed: 0,node1,node2,trail,color,distance
5,112_A09,A10,nyc subway,red,90
6,112_A09,A12_D13,nyc subway,red,210


## Getting A,C,E Nodes

In [219]:
A_nodelist = node_list_df[node_list_df.station_id.str.contains("A")]
# C_nodelist = node_list_df[node_list_df.station_id.str.contains("C")]
# E_nodelist = node_list_df[node_list_df.station_id.str.contains("E")]
# A_nodelist = pd.concat([A_nodelist, C_nodelist])

In [220]:
A_nodelist.reset_index(inplace=True, drop=True)

In [221]:
A_nodelist.shape

(52, 3)

In [222]:
A_nodelist.head()

Unnamed: 0,station_id,X,Y
0,112_A09,40.840556,-73.940133
1,125_A24,40.768247,-73.981929
2,A02,40.868072,-73.919899
3,A03,40.865491,-73.927271
4,A05,40.859022,-73.93418


## Making TSP Matrix

In [223]:
dijk_ace_graph = dijkstar.Graph(undirected=True)

In [224]:
zipped_edges = list(zip(A_edgelist['node1'], A_edgelist['node2'], A_edgelist['distance']))
for x in zipped_edges:
    dijk_ace_graph.add_edge(x[0], x[1], x[2])

In [225]:
len(zipped_edges)

62

### Find distances between all pairs

In [226]:
def get_indirect_distance(dijkstar_graph, nodelist):
    distance_matrix = []
    for i in range(len(nodelist)):
        start_node = nodelist['station_id'][i]
        one_node_tree = []
        for num in range(i, len(nodelist)):
            dest_node = nodelist['station_id'][num]
            path = dijkstar.find_path(dijkstar_graph, start_node, dest_node)
            path_distance = path.total_cost
            one_node_tree.append(path_distance)
        distance_matrix.append(one_node_tree)  # top right triangle of matrix
    for i in range(len(distance_matrix)):
#         print(f"i = {i}")
        for x in range(i, len(distance_matrix)):
#             print(f"x = {x}")
            if distance_matrix[i][x] != 0:
                distance_matrix[x].insert(i, distance_matrix[i][x])
    return distance_matrix

In [227]:
A_dist_matrix = get_indirect_distance(dijk_ace_graph, A_nodelist)

In [228]:
len(A_dist_matrix)

52

In [229]:
len(A_dist_matrix[0])

52

In [230]:
len(A_dist_matrix[-1])

52

## Using OR Tools

**Note**: This code was adapted from the above python file developed by Google OR-Tools. License can be found here: https://www.apache.org/licenses/LICENSE-2.0

- This is trying to reach every stop and return to the original node

In [231]:
# [START import]
from __future__ import print_function
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
# [END import]

# [START data_model]
def create_data_model(A_dist_matrix):
    """Stores the data for the problem."""
    data = {}
    data['distance_matrix'] = A_dist_matrix
    data['num_vehicles'] = 1
    data['depot'] = 0
    return data
    # [END data_model]


# [START solution_printer]
def print_solution(manager, routing, solution):
    """Prints solution on console."""
    total_seconds = solution.ObjectiveValue()
    print('Objective: {} seconds'.format(total_seconds))
    index = routing.Start(0)
    plan_output = 'Route for vehicle 0:\n'
    route_distance = 0
    while not routing.IsEnd(index):
        plan_output += ' {} ->'.format(manager.IndexToNode(index))
        previous_index = index
        index = solution.Value(routing.NextVar(index))
        route_distance += routing.GetArcCostForVehicle(previous_index, index, 0)
    plan_output += ' {}\n'.format(manager.IndexToNode(index))
    print(plan_output)
    plan_output += 'Route distance: {}miles\n'.format(route_distance)
    return total_seconds, plan_output
    # [END solution_printer]


def main():
    """Entry point of the program."""
    # Instantiate the data problem.
    # [START data]
    data = create_data_model(A_dist_matrix)
    # [END data]

    # Create the routing index manager.
    # [START index_manager]
    manager = pywrapcp.RoutingIndexManager(len(data['distance_matrix']),
                                           data['num_vehicles'], data['depot'])
    # [END index_manager]

    # Create Routing Model.
    # [START routing_model]
    routing = pywrapcp.RoutingModel(manager)

    # [END routing_model]

    # [START transit_callback]
    def distance_callback(from_index, to_index):
        """Returns the distance between the two nodes."""
        # Convert from routing variable Index to distance matrix NodeIndex.
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return data['distance_matrix'][from_node][to_node]

    transit_callback_index = routing.RegisterTransitCallback(distance_callback)
    # [END transit_callback]

    # Define cost of each arc.
    # [START arc_cost]
    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
    # [END arc_cost]

    # Setting first solution heuristic.
    # [START parameters]
    search_parameters = pywrapcp.DefaultRoutingSearchParameters()
    search_parameters.first_solution_strategy = (
        routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
    # [END parameters]

    # Solve the problem.
    # [START solve]
    solution = routing.SolveWithParameters(search_parameters)
    # [END solve]

    # Print solution on console.
    # [START print_solution]
    if solution:
        seconds, route_list = print_solution(manager, routing, solution)
    route_list_text_match = re.match(".+vehicle\s0:\\n\s(.+0)\\n", route_list)
    stops_as_text = route_list_text_match.group(1)
    list_of_stops = stops_as_text.split(" -> ")
    
    return seconds, list_of_stops
    # [END print_solution]


if __name__ == '__main__':
    seconds, list_of_stops = main()
    
# [END program]

Objective: 9150 seconds
Route for vehicle 0:
 0 -> 7 -> 8 -> 9 -> 10 -> 11 -> 12 -> 13 -> 14 -> 15 -> 16 -> 17 -> 18 -> 1 -> 19 -> 20 -> 21 -> 22 -> 23 -> 24 -> 25 -> 26 -> 27 -> 28 -> 29 -> 30 -> 31 -> 32 -> 33 -> 34 -> 35 -> 36 -> 37 -> 38 -> 39 -> 40 -> 41 -> 42 -> 43 -> 44 -> 45 -> 46 -> 47 -> 48 -> 49 -> 50 -> 51 -> 6 -> 5 -> 4 -> 3 -> 2 -> 0



<IPython.core.display.Javascript object>

In [232]:
.13 * 60

7.800000000000001

In [233]:
def get_time_in_hrs(seconds):
    minutes = seconds / 60
    hours = minutes / 60
    remainder_hour = hours % 1
    final_minutes = round((remainder_hour * 60), 2)
    return f"The entire route takes {hours - remainder_hour} hours, {final_minutes} minutes"

### Translating list of stops to station_ids

In [234]:
def tsp_station_ids(list_of_stops, nodelist):
    nodes = nodelist['station_id']
    list_of_nodes = [nodes[int(x)] for x in list_of_stops]
    return list_of_nodes

In [235]:
x = tsp_station_ids(list_of_stops, A_nodelist)

In [236]:
# x

### Getting station names in order

In [237]:
non_unique_stations_df[non_unique_stations_df['stop_id'] == '101']

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,location_type,parent_station
0,101,Van Cortlandt Park - 242 St,40.889248,-73.898583,1,


In [238]:
non_unique_stations_df['stop_name'][0]

'Van Cortlandt Park - 242 St'

In [239]:
def get_station_names(stations_df, tsp_station_ids):
    full_list = []
    for mta_station in tsp_station_ids:
        single_stations = mta_station.split("_")
        station_string = ""
        counter = 0
        for station in single_stations:
            row = stations_df.index[stations_df['stop_id'] == station].tolist()
            for item in row:
                name = stations_df['stop_name'][item]
                if counter >= 1:
                    station_string += " / " + name
                    counter += 1
                elif counter == 0:
                    station_string += name
                    counter += 1
        full_list.append(station_string)
    return full_list

# Applying to All the Data

In [240]:
nyc_graph = dijkstar.Graph(undirected=True)

In [241]:
nyc_zipped_edges = list(zip(my_edgelist['node1'], my_edgelist['node2'], my_edgelist['distance']))
for x in nyc_zipped_edges:
    nyc_graph.add_edge(x[0], x[1], x[2])

In [242]:
non_unique_stations_df[non_unique_stations_df['stop_id'] == '101']

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,location_type,parent_station
0,101,Van Cortlandt Park - 242 St,40.889248,-73.898583,1,


In [243]:
non_unique_stations_df[non_unique_stations_df['stop_id'] == 'D17']

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,location_type,parent_station
786,D17,34 St - Herald Sq,40.749719,-73.987823,1,


In [244]:
my_edgelist[my_edgelist['node1'].str.contains("D17")]

Unnamed: 0,node1,node2,trail,color,distance
301,D17_R17,D18,nyc subway,red,90
302,D17_R17,A32_D20,nyc subway,red,180
514,D17_R17,635_L03_R20,nyc subway,red,150
515,D17_R17,R18,nyc subway,red,60


In [245]:
my_edgelist[my_edgelist['node2'].str.contains("D17")]

Unnamed: 0,node1,node2,trail,color,distance
300,D16,D17_R17,nyc subway,red,120
513,127_725_902_R16,D17_R17,nyc subway,red,90


In [246]:
nyc_dist_matrix = get_indirect_distance(nyc_graph, node_list_df)

NoPathError: Could not find a path from 101 to D17

In [247]:
len(nyc_dist_matrix)

451

## Running Algorithm

In [248]:
# [START import]
from __future__ import print_function
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
# [END import]

# [START data_model]
def create_data_model(nyc_dist_matrix):
    """Stores the data for the problem."""
    data = {}
    data['distance_matrix'] = nyc_dist_matrix
    data['num_vehicles'] = 1
    data['depot'] = 0
    return data
    # [END data_model]


# [START solution_printer]
def print_solution(manager, routing, solution):
    """Prints solution on console."""
    total_seconds = solution.ObjectiveValue()
    print('Objective: {} seconds'.format(total_seconds))
    index = routing.Start(0)
    plan_output = 'Route for vehicle 0:\n'
    route_distance = 0
    while not routing.IsEnd(index):
        plan_output += ' {} ->'.format(manager.IndexToNode(index))
        previous_index = index
        index = solution.Value(routing.NextVar(index))
        route_distance += routing.GetArcCostForVehicle(previous_index, index, 0)
    plan_output += ' {}\n'.format(manager.IndexToNode(index))
    print(plan_output)
    plan_output += 'Route distance: {}miles\n'.format(route_distance)
    return total_seconds, plan_output
    # [END solution_printer]


def main():
    """Entry point of the program."""
    # Instantiate the data problem.
    # [START data]
    data = create_data_model(nyc_dist_matrix)
    # [END data]

    # Create the routing index manager.
    # [START index_manager]
    manager = pywrapcp.RoutingIndexManager(len(data['distance_matrix']),
                                           data['num_vehicles'], data['depot'])
    # [END index_manager]

    # Create Routing Model.
    # [START routing_model]
    routing = pywrapcp.RoutingModel(manager)

    # [END routing_model]

    # [START transit_callback]
    def distance_callback(from_index, to_index):
        """Returns the distance between the two nodes."""
        # Convert from routing variable Index to distance matrix NodeIndex.
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return data['distance_matrix'][from_node][to_node]

    transit_callback_index = routing.RegisterTransitCallback(distance_callback)
    # [END transit_callback]

    # Define cost of each arc.
    # [START arc_cost]
    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
    # [END arc_cost]

    # Setting first solution heuristic.
    # [START parameters]
    search_parameters = pywrapcp.DefaultRoutingSearchParameters()
    search_parameters.first_solution_strategy = (
        routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
    # [END parameters]

    # Solve the problem.
    # [START solve]
    solution = routing.SolveWithParameters(search_parameters)
    # [END solve]

    # Print solution on console.
    # [START print_solution]
    if solution:
        seconds, route_list = print_solution(manager, routing, solution)
    route_list_text_match = re.match(".+vehicle\s0:\\n\s(.+0)\\n", route_list)
    stops_as_text = route_list_text_match.group(1)
    list_of_stops = stops_as_text.split(" -> ")
    
    return seconds, list_of_stops
    # [END print_solution]


if __name__ == '__main__':
    seconds, list_of_stops = main()
    
# [END program]

Objective: 76080 seconds
Route for vehicle 0:
 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 7 -> 8 -> 9 -> 173 -> 174 -> 175 -> 176 -> 177 -> 10 -> 11 -> 12 -> 13 -> 14 -> 15 -> 16 -> 20 -> 19 -> 18 -> 17 -> 61 -> 60 -> 59 -> 58 -> 92 -> 91 -> 56 -> 55 -> 54 -> 53 -> 52 -> 51 -> 50 -> 49 -> 48 -> 47 -> 46 -> 45 -> 44 -> 43 -> 42 -> 41 -> 40 -> 39 -> 38 -> 114 -> 113 -> 112 -> 111 -> 110 -> 57 -> 105 -> 132 -> 131 -> 130 -> 129 -> 128 -> 127 -> 126 -> 125 -> 124 -> 123 -> 122 -> 121 -> 120 -> 119 -> 118 -> 117 -> 116 -> 115 -> 133 -> 134 -> 135 -> 136 -> 137 -> 138 -> 139 -> 140 -> 141 -> 142 -> 144 -> 145 -> 146 -> 80 -> 79 -> 78 -> 77 -> 76 -> 75 -> 74 -> 90 -> 89 -> 88 -> 87 -> 86 -> 85 -> 84 -> 83 -> 82 -> 81 -> 73 -> 72 -> 71 -> 70 -> 69 -> 37 -> 36 -> 35 -> 25 -> 26 -> 27 -> 28 -> 29 -> 30 -> 31 -> 32 -> 33 -> 34 -> 62 -> 63 -> 64 -> 65 -> 66 -> 67 -> 68 -> 109 -> 108 -> 107 -> 106 -> 152 -> 151 -> 150 -> 149 -> 148 -> 366 -> 194 -> 193 -> 192 -> 191 -> 190 -> 249 -> 286 -> 285 -> 284 -> 32

<IPython.core.display.Javascript object>

In [249]:
hours = get_time_in_hrs(seconds)
hours

'The entire route takes 21.0 hours, 8.0 minutes'

In [250]:
full_nyc_tsp_stations = tsp_station_ids(list_of_stops, node_list_df)

KeyError: 446

In [251]:
# full_nyc_tsp_stations

In [253]:
tsp_names = get_station_names(non_unique_stations_df, full_nyc_tsp_stations)

In [256]:
# tsp_names