In [1]:
import googlemaps
import itertools

CITIES = [
    'Paris', 'Marseille', 'Lyon', 'Toulouse',
    'Nice', 'Nantes', 'Montpellier', 'Strasbourg',
    'Bordeaux', 'Lille'
]

In [2]:
def get_cities_combination(cities):
    product = list(itertools.product(cities, repeat=2))
    df = pd.DataFrame(product, columns=['origin', 'dest'])
    return df

In [3]:
def get_distances(cities):
    gmaps = googlemaps.Client(key=open('googlemaps-api-key.txt').read())
    matrix = gmaps.distance_matrix(cities, cities)
    elements = [r['elements'] for r in matrix['rows']]
    elements = list(itertools.chain.from_iterable(elements))
    distances = [e['distance']['value'] for e in elements]
    return distances

In [4]:
df = get_cities_combination(CITIES)

In [5]:
df

Unnamed: 0,origin,dest
0,Paris,Paris
1,Paris,Marseille
2,Paris,Lyon
3,Paris,Toulouse
4,Paris,Nice
...,...,...
95,Lille,Nantes
96,Lille,Montpellier
97,Lille,Strasbourg
98,Lille,Bordeaux


In [6]:
distances = get_distances(CITIES)
df['distance'] = distances

In [7]:
df.sort_values('distance').head()

Unnamed: 0,origin,dest,distance
0,Paris,Paris,0
88,Bordeaux,Bordeaux,0
77,Strasbourg,Strasbourg,0
66,Montpellier,Montpellier,0
55,Nantes,Nantes,0


Problème: on a des distances nulles vu qu'on compare les villes à elles-même. Solution: virer les 0 ?

In [8]:
df[df.distance > 0].sort_values('distance').head()

Unnamed: 0,origin,dest,distance
16,Marseille,Montpellier,168615
61,Montpellier,Marseille,168920
14,Marseille,Nice,198833
41,Nice,Marseille,199335
90,Lille,Paris,225202


Second problème: on aimerait enlever les lignes "équivalentes" (Marseille/Montpellier et Montpellier/Marseille).
Pour ça on va filtrer l'index avec .loc[].

In [9]:
df_with_index = df.set_index(['origin', 'dest'])

In [10]:
df_with_index.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,distance
origin,dest,Unnamed: 2_level_1
Paris,Paris,0
Paris,Marseille,774919
Paris,Lyon,465611
Paris,Toulouse,678310
Paris,Nice,931889


In [11]:
combinations = list(itertools.combinations(CITIES, 2))

In [12]:
combinations

[('Paris', 'Marseille'),
 ('Paris', 'Lyon'),
 ('Paris', 'Toulouse'),
 ('Paris', 'Nice'),
 ('Paris', 'Nantes'),
 ('Paris', 'Montpellier'),
 ('Paris', 'Strasbourg'),
 ('Paris', 'Bordeaux'),
 ('Paris', 'Lille'),
 ('Marseille', 'Lyon'),
 ('Marseille', 'Toulouse'),
 ('Marseille', 'Nice'),
 ('Marseille', 'Nantes'),
 ('Marseille', 'Montpellier'),
 ('Marseille', 'Strasbourg'),
 ('Marseille', 'Bordeaux'),
 ('Marseille', 'Lille'),
 ('Lyon', 'Toulouse'),
 ('Lyon', 'Nice'),
 ('Lyon', 'Nantes'),
 ('Lyon', 'Montpellier'),
 ('Lyon', 'Strasbourg'),
 ('Lyon', 'Bordeaux'),
 ('Lyon', 'Lille'),
 ('Toulouse', 'Nice'),
 ('Toulouse', 'Nantes'),
 ('Toulouse', 'Montpellier'),
 ('Toulouse', 'Strasbourg'),
 ('Toulouse', 'Bordeaux'),
 ('Toulouse', 'Lille'),
 ('Nice', 'Nantes'),
 ('Nice', 'Montpellier'),
 ('Nice', 'Strasbourg'),
 ('Nice', 'Bordeaux'),
 ('Nice', 'Lille'),
 ('Nantes', 'Montpellier'),
 ('Nantes', 'Strasbourg'),
 ('Nantes', 'Bordeaux'),
 ('Nantes', 'Lille'),
 ('Montpellier', 'Strasbourg'),
 ('Montpell

In [13]:
df_with_index.loc[combinations].sort_values('distance').head()

Unnamed: 0_level_0,Unnamed: 1_level_0,distance
origin,dest,Unnamed: 2_level_1
Marseille,Montpellier,168615
Marseille,Nice,198833
Paris,Lille,225424
Toulouse,Montpellier,243358
Toulouse,Bordeaux,244896


In [14]:
# Tout en un
df.set_index(['origin', 'dest'])\
  .loc[combinations]\
  .reset_index()\
  .sort_values('distance')[:10]

Unnamed: 0,origin,dest,distance
13,Marseille,Montpellier,168615
11,Marseille,Nice,198833
8,Paris,Lille,225424
26,Toulouse,Montpellier,243358
28,Toulouse,Bordeaux,244896
20,Lyon,Montpellier,303813
9,Marseille,Lyon,313682
31,Nice,Montpellier,325779
37,Nantes,Bordeaux,349326
4,Paris,Nantes,384943
