In [1]:
# dependencies
import urllib3
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

from geopy.geocoders import IGNFrance

In [2]:
# parse web page with list of audika shops
page = urllib3.PoolManager().request('GET', 'https://www.audika.fr/centres/annuaire-audioprothesistes')
soup = BeautifulSoup(page.data, features="html.parser")

In [3]:
# get name and address
shop_name = [x.get_text().strip('\n') for x in soup.find_all('div', class_ = 'm-clinic-tile-card')]
address = [x.get_text().strip().replace('< /br>', '') for x in soup.find_all('div', class_ = 'm-clinic-tile-card__address')]
df = pd.DataFrame({'shop_name': shop_name, 'address': address})

In [4]:
# geolocate shops
geolocator = IGNFrance()

df['latitude'] = np.nan
df['longitude'] = np.nan
df['city'] = np.nan
df['postcode'] = np.nan

for i in df.index:

    if ((i>0) and (i%50==0)):
        print(f'{i}/{len(df)}')

    try:
        #tries fetch address from geopy
        location = geolocator.geocode(df['address'][i], timeout=None)
        
        #append lat/long to column using dataframe location
        df.loc[i,'latitude'] = float(location.latitude)
        df.loc[i,'longitude'] = float(location.longitude)
        df.loc[i,'city'] = location.raw['commune']
        df.loc[i,'postcode'] = location.raw['postal_code']
        
    except:
        continue

50/588
100/588
150/588
200/588
250/588
300/588
350/588
400/588
450/588
500/588
550/588


In [5]:
# check
df.head()

Unnamed: 0,shop_name,address,latitude,longitude,city,postcode
0,Audioprothésiste Audika Bourg-en-Bresse,16 Boulevard de Brou - 01000 Bourg-en-Bresse (...,46.204853,5.229367,Bourg-en-Bresse,1000
1,Audioprothésiste Audika Chatillon-sur-Chalaronne,100 Rue Pasteur - 01400 Châtillon-sur-Chalaron...,46.119722,4.955721,Châtillon-sur-Chalaronne,1400
2,Audioprothésiste Audika Château-Thierry,3 Place de l'Hôtel de Ville - 02400 Château-Th...,49.045511,3.402932,Château-Thierry,2400
3,Audioprothésiste Audika Saint-Quentin,13 Rue Emile Zola - 02100 Saint-Quentin (Aisne),49.846515,3.286195,Saint-Quentin,2100
4,Audioprothésiste Audika Soissons,16 Rue du Collège - 02200 Soissons (Aisne),49.38227,3.325836,Soissons,2200


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 588 entries, 0 to 587
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   shop_name  588 non-null    object 
 1   address    588 non-null    object 
 2   latitude   588 non-null    float64
 3   longitude  588 non-null    float64
 4   city       588 non-null    object 
 5   postcode   588 non-null    object 
dtypes: float64(2), object(4)
memory usage: 27.7+ KB


In [7]:
df.to_csv('../output/audika_shops_geocoded.csv', index=False)