In [102]:
import pandas as pd
import numpy as np
import codecs
import math
import copy
import random

In [103]:
with codecs.open("data/countries.csv", "r", "utf-8", "ignore") as file:
    df = pd.read_table(file,encoding = "utf-8", delimiter=",")

# remove UM because it has no latitude or longitude
df = df[df['country']!='UM']
df = df.dropna(how='any')
df

Unnamed: 0,country,latitude,longitude,name
0,AD,42.546245,1.601554,Andorra
1,AE,23.424076,53.847818,United Arab Emirates
2,AF,33.939110,67.709953,Afghanistan
3,AG,17.060816,-61.796428,Antigua and Barbuda
4,AI,18.220554,-63.068615,Anguilla
...,...,...,...,...
240,YE,15.552727,48.516388,Yemen
241,YT,-12.827500,45.166244,Mayotte
242,ZA,-30.559482,22.937506,South Africa
243,ZM,-13.133897,27.849332,Zambia


In [104]:
# calculate distance
def calc_distance(latitudeA, longitudeA, latitudeB, longitudeB, ):
    rlaA = math.radians(latitudeA)
    rloA = math.radians(longitudeA)
    
    rlaB = math.radians(latitudeB)
    rloB = math.radians(longitudeB)

    # 北極からの角度
    a = math.pi/2-rlaA
    b = math.pi/2-rlaB
    # 経度の差
    C = rloA - rloB

    return math.acos(max(min(math.cos(a) * math.cos(b) + math.sin(a) * math.sin(b) * math.cos(C), 1), -1))

In [105]:
country2lola = {}
for country, latitude, longitude in zip(list(df['country']), list(df['latitude']), list(df['longitude'])):
    country2lola[country] = (latitude, longitude)

In [130]:
distance_matrix = {}
for country_i in list(country2lola.keys()):
    distance_matrix_i = {}
    for country_j in list(country2lola.keys()):
        distance_matrix_i[country_j] = calc_distance(country2lola[country_i][0], country2lola[country_i][1], country2lola[country_j][0], country2lola[country_j][1])
    distance_matrix[country_i] = distance_matrix_i

In [131]:
class JOURNEY():
    route = []
    Ndestinations = 0
    def __init__(self, route):
        self.route         = route
        self.Ndestinations = len(route)
    def get_similar_route(self, distance_matrix): # return {route: distance}
        route2distance = {}
        self_distance  = self.get_total_distance(distance_matrix)
        for i in list(range(self.Ndestinations-1)):
            for j in list(range(self.Ndestinations))[i+1:]:
                #j = i + 1
                #if True:                                    # 出発点と目的点も変える
                if (i != 0 and j != self.Ndestinations - 1): # 出発点と目的点は変えない
                    route_        = copy.deepcopy(self.route)
                    # swap i-th and j-th destinations
                    tmp       = route_[i]
                    route_[i] = route_[j]
                    route_[j] = tmp
                    # calculate total distance
                    similar_journey = JOURNEY(route_)
                    route2distance[similar_journey] = similar_journey.get_total_distance(distance_matrix)
        return route2distance
    def get_total_distance(self, distance_matrix):
        total_distance = 0
        for i in range(self.Ndestinations-1):
            total_distance += distance_matrix[self.route[i]][self.route[i+1]]
        return total_distance

In [132]:
def optimize_journey(route):
    journey = JOURNEY(route)

    T = 1000 # temperature

    while T > 0:
        journey2distance = journey.get_similar_route(distance_matrix)
        min_journey = min(journey2distance, key=journey2distance.get)

        score_difference = min_journey.get_total_distance(distance_matrix) - journey.get_total_distance(distance_matrix)

        if (score_difference < 0):
            journey = min_journey
        elif random.random() < np.exp(-(score_difference)/T):
            journey = min_journey
        print(T, journey.get_total_distance(distance_matrix))
        T -= 1
    return journey.route

In [133]:
route = list(df['country'])

for i in range(len(route)):
    if ( route[i] == 'FI' ):
        route[i] = route[0]
        route[0] = 'FI'
    
    if ( route[i] == 'JP' ):
        route[i] = route[-1]
        route[-1] = 'JP'

journey = JOURNEY(route)

In [134]:
journey.get_total_distance(distance_matrix)

293.1369651628344

In [135]:
optimized_route = optimize_journey(journey.route)

100 283.559207379444
99 275.900660372042
98 269.5186193658177
97 263.27372920444174
96 257.6555579873895
95 252.07638896820214
94 246.99333366026778
93 242.0443040569921
92 237.12512337683586
91 232.2334506299482
90 227.49474231982128
89 223.09799251860701
88 218.94226630275824
87 214.8618468734667
86 211.1032232221833
85 207.34459962860973
84 203.88797795512275
83 200.48932313849906
82 197.10541417277838
81 193.78549637671424
80 190.4716988604774
79 187.40293728473037
78 184.61661778235595
77 181.8859243332998
76 179.17915633450212
75 176.4963490787848
74 174.0765988547388
73 171.80207592889923
72 169.58040004671201
71 167.39605273740807
70 165.38806498178613
69 163.3872052714781
68 161.4187668849287
67 159.47150651723095
66 157.5547931929395
65 156.06917959108782
64 154.6921784388006
63 153.33930177083099
62 152.05280553499514
61 150.7800854898196
60 149.60370787993742
59 148.06608635507672
58 146.89265907839402
57 145.78813424149533
56 144.514110369034
55 143.41146601894525
54 142.3

In [136]:
with codecs.open("data/countries.csv", "r", "utf-8", "ignore") as file:
    df2 = pd.read_table(file,encoding = "utf-8", delimiter=",", index_col = 0)

In [137]:
optimized_df = df2.reindex(optimized_route)
optimized_df.to_csv("output/output.csv", sep = ',')