Example found [here](https://gist.github.com/ThomasG77/e488251696a00de73631) (gist.github).

Tobacco shop adresses in France from [data.gouv](https://www.data.gouv.fr/fr/datasets/adresses-des-debits-de-tabac/).

In [1]:
import requests
import pandas as pd
import time

In [2]:
df = pd.read_csv('data/annuaire-des-debits-de-tabac-2018.csv', sep=';', encoding="ISO-8859-1", index_col=0)

In [3]:
df.head(2)

Unnamed: 0_level_0,ENSEIGNE,ADRESSE,COMPLEMENT,CODE POSTAL,COMMUNE,NATURE DU DEBIT
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,,32 route des grands champs,TABAC,1400,L-ABERGEMENT-CLEMENCIAT,Ordinaire permanent
2,,42 RUE ROGER VAILLANT,CC DAME LOUISE-TABAC-,1500,AMBERIEU-EN-BUGEY,Ordinaire permanent


In [4]:
df.rename(columns={'CODE POSTAL': 'CODE_POSTAL'}, inplace=True)
df.drop(['ENSEIGNE', 'COMPLEMENT', 'NATURE DU DEBIT'], axis=1, inplace=True)

In [5]:
df['CODE_POSTAL'] = df['CODE_POSTAL'].astype(str).map(lambda x: x.zfill(5))

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 24434 entries, 1 to 24434
Data columns (total 3 columns):
ADRESSE        24370 non-null object
CODE_POSTAL    24434 non-null object
COMMUNE        24434 non-null object
dtypes: object(3)
memory usage: 763.6+ KB


In [7]:
test_address = df.ADRESSE.iloc[0] + ' ' + df.CODE_POSTAL.iloc[0] + ' ' + df['COMMUNE'].iloc[0]
test_address

'32 route des grands champs 01400 L-ABERGEMENT-CLEMENCIAT'

In [8]:
URL = 'http://api-adresse.data.gouv.fr/search/'

In [9]:
response = requests.get(URL, params={'q': test_address, 'limit': 1})
response.json()

{'attribution': 'BAN',
 'version': 'draft',
 'features': [{'properties': {'street': 'Route des Grands Champs',
    'postcode': '01400',
    'label': "32 Route des Grands Champs 01400 L'Abergement-Clémenciat",
    'name': '32 Route des Grands Champs',
    'y': 6563089.5,
    'score': 0.9101818181818182,
    'context': '01, Ain, Auvergne-Rhône-Alpes (Rhône-Alpes)',
    'citycode': '01001',
    'housenumber': '32',
    'city': "L'Abergement-Clémenciat",
    'id': 'ADRNIVX_0000002006758334',
    'importance': 0.012,
    'type': 'housenumber',
    'x': 848126.2},
   'type': 'Feature',
   'geometry': {'coordinates': [4.919419, 46.151419], 'type': 'Point'}}],
 'limit': 1,
 'licence': 'ODbL 1.0',
 'type': 'FeatureCollection',
 'query': '32 route des grands champs 01400 L-ABERGEMENT-CLEMENCIAT'}

In [10]:
features = response.json()['features'][0]
lat, lon = None, None
coords = features['geometry']['coordinates']
if features['geometry']['type'] == 'Point':
    lon = coords[0]
    lat = coords[1]
print('lat.:', lat, 
      ', lon.:', lon,
      ', score:', features['properties']['score'])

lat.: 46.151419 , lon.: 4.919419 , score: 0.9101818181818182


In [13]:
def get_coords(address):
    response = requests.get(URL, params={'q': address, 'limit': 1})
    geom_type, lat, lon, score = None, None, None, None
    try:
        features = response.json()['features'][0]
        score = features['properties']['score']
        geom_type = features['geometry']['type']
        coords = features['geometry']['coordinates']
        if geom_type == 'Point':
            lon = coords[0]
            lat = coords[1]
    except:
        print("No response...")
    return geom_type, lat, lon, score

In [14]:
i = 0
geom_types = []
scores = []
lats = []
lons = []
for row in df.itertuples():
    
    address = row.ADRESSE + ' ' + row.CODE_POSTAL + ' ' + row.COMMUNE
    geom_type, lat, lon, score = get_coords(address)
#     print('geom. type:', geom_type, 'lat.:', lat, ', lon.:', lon, ', score:', score)
    geom_types.append(geom_type)
    scores.append(score)
    lats.append(lat)
    lons.append(lon)
    i += 1
    if i == 1000:
        break
    if i % 100 == 0:
        print("processed", i, "addresses over", len(df))
    time.sleep(0.1)

processed 100 addresses over 24434
processed 200 addresses over 24434
processed 300 addresses over 24434
processed 400 addresses over 24434
No response...
processed 500 addresses over 24434


TypeError: unsupported operand type(s) for +: 'float' and 'str'

In [None]:
if len(geom_types) == len(df):
    df['geom_type'] = pd.Series(geom_types).values
if len(scores) == len(df):
    df['score'] = pd.Series(scores).values
if len(lats) == len(df):
    df['lat'] = pd.Series(lats).values
if len(lons) == len(df):
    df['lon'] = pd.Series(lons).values

In [None]:
# # Utillisez http://localhost:7878 si vous utilisez une instance locale.
# ADDOK_URL = 'http://api-adresse.data.gouv.fr/search/csv/'


# def geocode(filepath_in):
#     with open(filepath_in, 'rb') as f:
#         filename, response = post_to_addok(filepath_in, f.read())
#         write_response_to_disk(filename, response)


# def geocode_chunked(filepath_in, filename_pattern, chunk_by):
#     with open(filepath_in, 'r') as bigfile:
#         headers = bigfile.readline()
#         current_lines = bigfile.readlines(chunk_by)
#         i = 1
#         while current_lines:
#             current_filename = filename_pattern.format(i)
#             current_csv = ''.join([headers] + current_lines)
#             filename, response = post_to_addok(current_filename, current_csv)
#             write_response_to_disk(filename, response)
#             current_lines = bigfile.readlines(chunk_by)
#             i += 1


# def write_response_to_disk(filename, response, chunk_size=1024):
#     with open(filename, 'wb') as fd:
#         for chunk in response.iter_content(chunk_size=chunk_size):
#             fd.write(chunk)


# def post_to_addok(filename, filelike_object):
#     files = {'data': (filename, filelike_object)}
#     response = requests.post(ADDOK_URL, files=files)
#     # You might want to use https://github.com/g2p/rfc6266
#     content_disposition = response.headers['content-disposition']
#     filename = content_disposition[len('attachment; filename="'):-len('"')]
#     return filename, response


# # Geocoder votre fichier en une fois s'il est petit.
# geocode('data.csv')
# # => data.geocoded.csv

# # Sinon, geocoder-le par morceaux quand il est gros.
# chunk_by = 50 * 2  # approximative number of lines.
# geocode_chunked('data.csv', 'result-{}.csv', chunk_by)
# # => result-1.geocoded.csv, result-2.geocoded.csv, etc

In [None]:
# requests_session = requests.Session()

# kwargs = {
#     'data': OrderedDict([
#         ('columns', ['numero_libelle_de_voie', 'commune']),
#         ('postcode', 'code_postal')
#     ]),
#     'method': 'post',
#     'files': OrderedDict([
#         ('data', ('annuaire-des-debits-de-tabac-part.csv',
#          io.BytesIO(
#              open('annuaire-des-debits-de-tabac-part.csv', 'rb').read()
#          )))
#     ]),
#     'stream': True,
#     'url': 'http://api-adresse.data.gouv.fr/search/csv/'
# }

# response = requests_session.request(**kwargs)

# with codecs.open('/tmp/out.csv', 'wb', 'utf-8') as f:
# f.write(response.text)