In [1]:
import pandas as pd


def read_data_sites_to_visit() -> pd.DataFrame:
    """ Reads in a dataframe the locations of the sites to visit """
    DATA_FOLDER = ("https://raw.githubusercontent.com/carlosjuribe/"
                   "traveling-tourist-problem/main/data")
    FILE_LOCATION_HOTEL = "location_hotel.csv"
    FILE_LOCATION_SITES = "sites_coordinates.csv"
    
    df_sites = pd.concat([
        # coordinates of our hotel, the starting location
        pd.read_csv(f"{DATA_FOLDER}/{FILE_LOCATION_HOTEL}", index_col='site'),
        # coordinates of the actual places we want to visit
        pd.read_csv(f"{DATA_FOLDER}/{FILE_LOCATION_SITES}", index_col='site'), 
    ])
    return df_sites

df_sites = read_data_sites_to_visit()

df_sites

Unnamed: 0_level_0,latitude,longitude
site,Unnamed: 1_level_1,Unnamed: 2_level_1
hotel,48.852753,2.354209
Sacre Coeur,48.886782,2.34311
Louvre,48.860719,2.337638
Montmartre,48.887208,2.338884
Port de Suffren,48.85774,2.290202
Arc de Triomphe,48.873968,2.295092
Av. Champs Élysées,48.871056,2.3037
Notre Dame,48.853175,2.349822
Tour Eiffel,48.858503,2.294525


In [2]:
from geopy.distance import geodesic

def ellipsoidal_distance(p1, p2) -> float:
    """ Calculate distance (in meters) between p1 and p2, where 
    each point is represented as a tuple (lat, lon) """
    return geodesic(p1, p2).meters

In [3]:
p1 = df_sites.loc['Tour Eiffel']
p2 = df_sites.loc['Louvre']

ellipsoidal_distance(p1, p2)

3173.119635531859

In [4]:
ellipsoidal_distance(
    df_sites.loc['Tour Eiffel'],
    df_sites.loc['Port de Suffren']
)

328.3147101635456

In [5]:
def compute_distance_matrix(df_sites, dist_metric=ellipsoidal_distance):
    """ Creates an N x N distance matrix from a dataframe of N locations 
    with a latitute column and a longitude column """
    df_dist_matrix = pd.DataFrame(index=df_sites.index, 
                                  columns=df_sites.index)

    for orig, orig_loc in df_sites.iterrows():  # for each origin
        for dest, dest_loc in df_sites.iterrows():  # for each destination
            df_dist_matrix.at[orig, dest] = dist_metric(orig_loc, dest_loc)
    return df_dist_matrix


df_distances = compute_distance_matrix(df_sites)

display(df_distances)

site,hotel,Sacre Coeur,Louvre,Montmartre,Port de Suffren,Arc de Triomphe,Av. Champs Élysées,Notre Dame,Tour Eiffel
site,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
hotel,0.0,3870.928331,1504.504794,3993.198652,4729.682743,4937.665959,4228.218702,325.365938,4426.253969
Sacre Coeur,3870.928331,0.0,2926.054506,313.510869,5049.211134,3799.323371,3378.574927,3769.70264,4753.209183
Louvre,1504.504794,2926.054506,0.0,2947.153228,3496.45354,3451.702471,2742.509881,1226.085843,3173.119636
Montmartre,3993.198652,313.510869,2947.153228,0.0,4846.87136,3533.369706,3144.296441,3868.875917,4558.425822
Port de Suffren,4729.682743,5049.211134,3496.45354,4846.87136,0.0,1839.956729,1781.375877,4404.441812,328.31471
Arc de Triomphe,4937.665959,3799.323371,3451.702471,3533.369706,1839.956729,0.0,709.683664,4633.828888,1720.355776
Av. Champs Élysées,4228.218702,3378.574927,2742.509881,3144.296441,1781.375877,709.683664,0.0,3925.119113,1549.780465
Notre Dame,325.365938,3769.70264,1226.085843,3868.875917,4404.441812,4633.828888,3925.119113,0.0,4100.888106
Tour Eiffel,4426.253969,4753.209183,3173.119636,4558.425822,328.31471,1720.355776,1549.780465,4100.888106,0.0


In [6]:
from typing import Tuple

import pandas as pd
from geopy.distance import geodesic


class GeoAnalyzer:
    """ Utils for geolocation information and processing """  
    _GeoPoint = Tuple[float, float]
    
    def __init__(self):
        """ Use method `add_locations` to store some locations inside 
        and start using the geo-utilities """
        self._df_locations = pd.DataFrame(columns=['latitude', 'longitude'])
        
    #####################   distances   #####################
    @staticmethod
    def ellipsoidal_distance(point1: _GeoPoint, point2: _GeoPoint) -> float:
        """ Calculate ellipsoidal distance (in meters) between point1 
        and point2 where each point is represented as a tuple (lat, lon)
        """
        return geodesic(point1, point2).meters
    #########################################################
    
    @property
    def locations(self):
        return self._df_locations
    
    @property
    def num_locations(self):
        return len(self._df_locations)
        
    def add_locations(self, df_locations: pd.DataFrame):
        """ Geo-location data needed for analysis.
        Parameters
        ----------
        df_locations : pd.DataFrame
            Dataframe of geographical coordinates with the first column 
            named 'latitude' and the second column named 'longitude'
        """
        self._name_index = df_locations.index.name
        df_updated = pd.concat([self._df_locations, df_locations.copy()])
        # drop duplicates just in case the user adds repeated locations
        self._df_locations = df_updated.drop_duplicates()
        
    def get_distance_matrix(self, precision: int = 4) -> pd.DataFrame:
        """ Computes the distance matrix as a dataframe based on the 
        provided location data """
        df_locations = self._df_locations
        dist_metric = self.ellipsoidal_distance  # only distance available

        # initialize matrix df
        df_dist_matrix = pd.DataFrame(index=df_locations.index, 
                                      columns=df_locations.index)
        # for each origin and destination pair, compute distance
        for orig, orig_loc in df_locations.iterrows():
            for dest, dest_loc in df_locations.iterrows():
                distance = round(dist_metric(orig_loc, dest_loc), precision)
                df_dist_matrix.at[orig, dest] = distance
        
        # a bit of metadata doesn't hurt
        df_dist_matrix.distance_metric = dist_metric.__name__
        df_dist_matrix.index.name = self._name_index
        return df_dist_matrix    
    
    def __repr__(self):
        """ Display number of currently considered locations """
        return f"{self.__class__.__name__}(n_locs={self.num_locations})"

In [7]:
geo_analyzer = GeoAnalyzer()
geo_analyzer.add_locations(df_sites)

In [8]:
display(geo_analyzer)
display(geo_analyzer.locations)

GeoAnalyzer(n_locs=9)

Unnamed: 0,latitude,longitude
hotel,48.852753,2.354209
Sacre Coeur,48.886782,2.34311
Louvre,48.860719,2.337638
Montmartre,48.887208,2.338884
Port de Suffren,48.85774,2.290202
Arc de Triomphe,48.873968,2.295092
Av. Champs Élysées,48.871056,2.3037
Notre Dame,48.853175,2.349822
Tour Eiffel,48.858503,2.294525


In [9]:
df_distances = geo_analyzer.get_distance_matrix()

display(df_distances)

site,hotel,Sacre Coeur,Louvre,Montmartre,Port de Suffren,Arc de Triomphe,Av. Champs Élysées,Notre Dame,Tour Eiffel
site,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
hotel,0.0,3870.9283,1504.5048,3993.1987,4729.6827,4937.666,4228.2187,325.3659,4426.254
Sacre Coeur,3870.9283,0.0,2926.0545,313.5109,5049.2111,3799.3234,3378.5749,3769.7026,4753.2092
Louvre,1504.5048,2926.0545,0.0,2947.1532,3496.4535,3451.7025,2742.5099,1226.0858,3173.1196
Montmartre,3993.1987,313.5109,2947.1532,0.0,4846.8714,3533.3697,3144.2964,3868.8759,4558.4258
Port de Suffren,4729.6827,5049.2111,3496.4535,4846.8714,0.0,1839.9567,1781.3759,4404.4418,328.3147
Arc de Triomphe,4937.666,3799.3234,3451.7025,3533.3697,1839.9567,0.0,709.6837,4633.8289,1720.3558
Av. Champs Élysées,4228.2187,3378.5749,2742.5099,3144.2964,1781.3759,709.6837,0.0,3925.1191,1549.7805
Notre Dame,325.3659,3769.7026,1226.0858,3868.8759,4404.4418,4633.8289,3925.1191,0.0,4100.8881
Tour Eiffel,4426.254,4753.2092,3173.1196,4558.4258,328.3147,1720.3558,1549.7805,4100.8881,0.0


In [10]:
print(f"Distance metric used: {df_distances.distance_metric}")

Distance metric used: ellipsoidal_distance
