In [102]:
import pandas as pd
import numpy as np
import codecs
import math
import copy
import random

In [103]:
with codecs.open("data/countries.csv", "r", "utf-8", "ignore") as file:
    df = pd.read_table(file,encoding = "utf-8", delimiter=",")

# remove UM because it has no latitude or longitude
df = df[df['country']!='UM']
df = df.dropna(how='any')
df

Unnamed: 0,country,latitude,longitude,name
0,AD,42.546245,1.601554,Andorra
1,AE,23.424076,53.847818,United Arab Emirates
2,AF,33.939110,67.709953,Afghanistan
3,AG,17.060816,-61.796428,Antigua and Barbuda
4,AI,18.220554,-63.068615,Anguilla
...,...,...,...,...
240,YE,15.552727,48.516388,Yemen
241,YT,-12.827500,45.166244,Mayotte
242,ZA,-30.559482,22.937506,South Africa
243,ZM,-13.133897,27.849332,Zambia


In [104]:
# calculate distance
def calc_distance(latitudeA, longitudeA, latitudeB, longitudeB, ):
    rlaA = math.radians(latitudeA)
    rloA = math.radians(longitudeA)
    
    rlaB = math.radians(latitudeB)
    rloB = math.radians(longitudeB)

    # 北極からの角度
    a = math.pi/2-rlaA
    b = math.pi/2-rlaB
    # 経度の差
    C = rloA - rloB

    return math.acos(max(min(math.cos(a) * math.cos(b) + math.sin(a) * math.sin(b) * math.cos(C), 1), -1))

In [105]:
country2lola = {}
for country, latitude, longitude in zip(list(df['country']), list(df['latitude']), list(df['longitude'])):
    country2lola[country] = (latitude, longitude)

In [130]:
distance_matrix = {}
for country_i in list(country2lola.keys()):
    distance_matrix_i = {}
    for country_j in list(country2lola.keys()):
        distance_matrix_i[country_j] = calc_distance(country2lola[country_i][0], country2lola[country_i][1], country2lola[country_j][0], country2lola[country_j][1])
    distance_matrix[country_i] = distance_matrix_i

In [131]:
class JOURNEY():
    route = []
    Ndestinations = 0
    def __init__(self, route):
        self.route         = route
        self.Ndestinations = len(route)
    def get_similar_route(self, distance_matrix): # return {route: distance}
        route2distance = {}
        self_distance  = self.get_total_distance(distance_matrix)
        for i in list(range(self.Ndestinations-1)):
            for j in list(range(self.Ndestinations))[i+1:]:
                #j = i + 1
                #if True:                                    # 出発点と目的点も変える
                if (i != 0 and j != self.Ndestinations - 1): # 出発点と目的点は変えない
                    route_        = copy.deepcopy(self.route)
                    # swap i-th and j-th destinations
                    tmp       = route_[i]
                    route_[i] = route_[j]
                    route_[j] = tmp
                    # calculate total distance
                    similar_journey = JOURNEY(route_)
                    route2distance[similar_journey] = similar_journey.get_total_distance(distance_matrix)
        return route2distance
    def get_total_distance(self, distance_matrix):
        total_distance = 0
        for i in range(self.Ndestinations-1):
            total_distance += distance_matrix[self.route[i]][self.route[i+1]]
        return total_distance

In [138]:
def optimize_journey(route):
    journey = JOURNEY(route)

    T = 1000 # temperature

    while T > 0:
        journey2distance = journey.get_similar_route(distance_matrix)
        min_journey = min(journey2distance, key=journey2distance.get)

        score_difference = min_journey.get_total_distance(distance_matrix) - journey.get_total_distance(distance_matrix)

        if (score_difference < 0):
            journey = min_journey
        elif random.random() < np.exp(-(score_difference)/T):
            journey = min_journey
        print(T, journey.get_total_distance(distance_matrix))
        T -= 1
    return journey.route

In [139]:
route = list(df['country'])

for i in range(len(route)):
    if ( route[i] == 'FI' ):
        route[i] = route[0]
        route[0] = 'FI'
    
    if ( route[i] == 'JP' ):
        route[i] = route[-1]
        route[-1] = 'JP'

journey = JOURNEY(route)

In [140]:
journey.get_total_distance(distance_matrix)

293.1369651628344

In [141]:
optimized_route = optimize_journey(journey.route)

8
915 113.40209441320103
914 112.84842849582674
913 112.47421549493475
912 112.11436134815942
911 111.75693497624279
910 111.31503381798031
909 110.97137830612535
908 110.39471257789829
907 109.410721738649
906 109.06797490628308
905 108.64617074065161
904 108.30875394861133
903 107.98451428165255
902 107.68115257476835
901 107.38429957802951
900 107.03644250560585
899 106.74492886129879
898 106.4547144506702
897 106.18966522327737
896 105.94929294254952
895 105.40890899568605
894 105.08597973992964
893 104.84693120567228
892 104.62530157855345
891 104.39977285413495
890 104.07956631236137
889 103.8636071318151
888 103.6484592890759
887 103.43361264205944
886 103.22014696098977
885 103.01000792671941
884 102.80594150803275
883 102.60618548016748
882 102.23100024208537
881 102.031711674903
880 101.80572271422474
879 101.45958980382034
878 101.23585333058216
877 101.04829133922074
876 100.86350951222734
875 100.68209736252906
874 100.47368391836731
873 100.1067787438567
872 99.8042226097

In [142]:
with codecs.open("data/countries.csv", "r", "utf-8", "ignore") as file:
    df2 = pd.read_table(file,encoding = "utf-8", delimiter=",", index_col = 0)

In [143]:
optimized_df = df2.reindex(optimized_route)
optimized_df.to_csv("output/output.csv", sep = ',')