In [174]:
import pandas as pd
import numpy as np
from geopy.distance import geodesic
from itertools import combinations
from tqdm.auto import tqdm

In [92]:
vanuatu = pd.read_csv('cities/vanuatu.csv', header=None, names=['city', 'latitude', 'longitude'])
italy = pd.read_csv('cities/italy.csv', header=None, names=['city', 'latitude', 'longitude'])
russia = pd.read_csv('cities/russia.csv', header=None, names=['city', 'latitude', 'longitude'])
us = pd.read_csv('cities/us.csv', header=None, names=['city', 'latitude', 'longitude'])
china = pd.read_csv('cities/china.csv', header=None, names=['city', 'latitude', 'longitude'])

In [93]:
def compute_distance(city1, city2):
    return geodesic(
        (city1.latitude, city1.longitude), (city2.latitude, city2.longitude)
        ).km

## Greedy algorithm:
* start from the first line -> no need to choose the starting point as we'd need to go through all of them in a closed cycle anyway
* the cost of moving is the euclidian distance
* once the city is "visited", remove it from df
* add the cost from the last city in a route to the first one, closing the cycle

In [94]:
def greedy_tsp(cities):
    route = [cities.iloc[0]['city']]
    current_city = cities.iloc[0]
    cost = 0
    unvisited = cities.copy()
    unvisited = unvisited.drop(index=0)

    while not unvisited.empty:
        shortest_dist = 1000000
        closest_city_index = None
        for i in unvisited.index:
            dist = compute_distance(current_city, unvisited.loc[i])
            if dist < shortest_dist:
                shortest_dist = dist
                closest_city_index = i
        route.append(unvisited.loc[closest_city_index]['city'])
        cost += shortest_dist
        # print('current_city: ', current_city)
        # print('going to', unvisited.loc[closest_city_index])
        # print('travelling distance', shortest_dist)
        current_city = unvisited.loc[closest_city_index]
        unvisited = unvisited.drop(index=closest_city_index)

    #print(current_city)
    #print(cities.iloc[0])
    distance_return = compute_distance(current_city, cities.iloc[0])
    #print(distance_return)
    cost += distance_return
    
    return route, cost

In [95]:
print(greedy_tsp(vanuatu))

(['Isangel', 'Vila', 'Lakatoro', 'Norsup', 'Luganville', 'Port Olry', 'Longana', 'Sola'], 1475.528091104531)


In [96]:
print(greedy_tsp(italy)[1])

4436.03176952516


In [97]:
print(greedy_tsp(russia)[1])

42334.16465744784


In [98]:
print(greedy_tsp(us)[1])

48050.02586446137


In [99]:
print(greedy_tsp(china)[1])

63962.9184294552


## Greedier algorithm

In [106]:
def create_dist_matrix(cities):
    dist_matrix = np.zeros((cities.shape[0], cities.shape[0]))
    for c1, c2 in combinations(cities.itertuples(), 2):
        dist_matrix[c1.Index, c2.Index] = dist_matrix[c2.Index, c1.Index] = compute_distance(c1,c2)
    return dist_matrix

In [176]:
def greedier_tsp(cities):

    min_cost = np.inf
    min_route = None
    num_cities = cities.shape[0]
    max_iter = np.min((num_cities, 15))

    for _ in tqdm(range(max_iter)):
        dist_matrix = create_dist_matrix(cities)
        route = []
        current_city_index = np.random.randint(num_cities)
        route.append(current_city_index)
        cost = 0

        while not len(route)==num_cities:
            dist_matrix[:, current_city_index] = np.inf
            closest_city_index = np.argmin(dist_matrix[current_city_index])
            route.append(closest_city_index)
            cost += dist_matrix[current_city_index, closest_city_index]
            current_city_index = closest_city_index
        
        cost += compute_distance(cities.iloc[route[-1]], cities.iloc[route[0]])
        route.append(route[0])

        if cost < min_cost:
            min_cost = cost
            min_route = route


    return min_route, min_cost

In [177]:
print(greedier_tsp(vanuatu))

  0%|          | 0/8 [00:00<?, ?it/s]

([0, np.int64(7), np.int64(1), np.int64(4), np.int64(3), np.int64(5), np.int64(2), np.int64(6), 0], np.float64(1475.528091104531))


In [173]:
print(greedier_tsp(italy))

([35, np.int64(21), np.int64(14), np.int64(15), np.int64(34), np.int64(39), np.int64(26), np.int64(0), np.int64(33), np.int64(12), np.int64(30), np.int64(9), np.int64(4), np.int64(19), np.int64(32), np.int64(25), np.int64(28), np.int64(18), np.int64(20), np.int64(3), np.int64(6), np.int64(44), np.int64(45), np.int64(23), np.int64(43), np.int64(41), np.int64(5), np.int64(40), np.int64(22), np.int64(42), np.int64(13), np.int64(16), np.int64(29), np.int64(10), np.int64(27), np.int64(11), np.int64(1), np.int64(2), np.int64(38), np.int64(17), np.int64(31), np.int64(8), np.int64(37), np.int64(24), np.int64(7), np.int64(36), 35], np.float64(4628.352240990477))


In [167]:
print(greedier_tsp(russia)[1])

41526.35101041394


In [168]:
print(greedier_tsp(us)[1])

47538.6574768417


In [169]:
# print(greedier_tsp(china)[1])
# This computation took 12m36s on my machine. Computationally heavy and totally not optimal for large set of cities

63057.19111098851
