# Consolidation IRVE

In [92]:
import requests
import agate
import agateexcel

from pathlib import Path
import cchardet as chardet

In [93]:
# get the list of datasets tagged irve on data.gouv.fr

url = 'https://www.data.gouv.fr/api/1/datasets/?tag=irve&page_size=1000'
r = requests.get(url)
datasets = r.json()['data']

len(datasets)

105

In [94]:
# Download all tabular files in data/ directory, as best as we can

downloaded = []
for d in datasets:
    orga = d['organization']['slug'] if d['organization'] else d['owner']['slug']
    slug = d['slug']
    for r in d['resources']:
        rurl = r['url']
        rid = r['id']
        # ODS style NB: won't work more than once for CKAN
        if 'format=csv' in rurl:
            filename = rurl.split('/')[-3] + '.csv'
        else:
            filename = rurl.split('/')[-1]
        if filename in downloaded:
            print('x existing file %s' % rurl)
            continue
        ext = filename.split('.')[-1]
        if ext not in ['csv', 'xls', 'xlsx']:
            print('x ignored file %s' % rurl)
            continue
        r = requests.get(rurl, allow_redirects=True)
        p = Path('data/%s' % slug)
        p.mkdir(exist_ok=True)
        written_filename = '%s.%s' % (rid, ext) 
        with open('%s/%s' % (p, written_filename), 'wb') as f:
            f.write(r.content)
            downloaded.append(filename)
            print('- downloaded file [%s] %s' % (filename, rurl))
print('Done')

- downloaded file [IRVE_Data_Gouv_Rambouilllet_Territoires.xlsx] https://www.data.gouv.fr/s/resources/bornes-de-recharge-de-rambouillet-territoires/20180511-093502/IRVE_Data_Gouv_Rambouilllet_Territoires.xlsx
- downloaded file [BDD_IRVE-RT78.csv] https://www.data.gouv.fr/s/resources/irve-de-rambouillet-territoires/20161025-134007/BDD_IRVE-RT78.csv
- downloaded file [Borne_de_recharge_Syndicat_Departemental_dEnergie_du_Gers.xls] https://www.data.gouv.fr/s/resources/borne-de-recharge-syndicat-departemental-denergie-du-gers-2/20160411-095030/Borne_de_recharge_Syndicat_Departemental_dEnergie_du_Gers.xls
- downloaded file [IRVE_VILLEDEBEAUVAIS_20170320.csv] https://www.data.gouv.fr/s/resources/bornes-de-recharge-de-la-ville-de-beauvais/20170320-135208/IRVE_VILLEDEBEAUVAIS_20170320.csv
- downloaded file [IRVE_Se61_20180615.csv] https://www.data.gouv.fr/s/resources/irve-se61-20170306/20180724-160703/IRVE_Se61_20180615.csv
- downloaded file [IRVE_Se61_20170306.csv] https://www.data.gouv.fr/s/r

- downloaded file [station_de_recharge_C-CAR.xlsx] https://www.data.gouv.fr/s/resources/jeu-de-donnees-c-car/20170529-175757/station_de_recharge_C-CAR.xlsx
- downloaded file [IRVE_SDESM77_20170228.xlsx] https://www.data.gouv.fr/s/resources/irve-sdesm77-20170228/20170228-141624/IRVE_SDESM77_20170228.xlsx
- downloaded file [Fichier_DataGouv_17-01-17.xlsx] https://www.data.gouv.fr/s/resources/bornes-de-recharge-sde03/20170117-084058/Fichier_DataGouv_17-01-17.xlsx
x existing file https://www.data.gouv.fr/s/resources/bornes-de-recharge-sde03/20170117-084031/Fichier_DataGouv_17-01-17.xlsx
- downloaded file [FFichier_DataGouv_SDE03_160609.xlsx] https://www.data.gouv.fr/s/resources/bornes-de-recharge-sde03/20160614-103821/FFichier_DataGouv_SDE03_160609.xlsx
- downloaded file [irve-saint-louis-20180814.csv] https://static.data.gouv.fr/resources/saint-louis-agglomeration-bornes-de-recharge-pour-vehicule-electrique/20180829-151948/irve-saint-louis-20180814.csv
- downloaded file [reseau-brevcar-01



- downloaded file [irve_wiiizcapg_20180516.csv] https://opendata.ozwillo.com/dataset/5b3f7e3c-df70-4552-8b48-5e96c2009edc/resource/931f6677-cafa-4245-a43f-907d966d3f17/download/irve_wiiizcapg_20180516.csv
- downloaded file [bornes-irve.csv] https://static.data.gouv.fr/resources/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques/20181211-121636/bornes-irve.csv
x existing file https://static.data.gouv.fr/resources/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques/20181211-121636/bornes-irve.csv
- downloaded file [bornes-irve-20181022.csv] https://static.data.gouv.fr/resources/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques/20181022-100255/bornes-irve-20181022.csv
- downloaded file [20180928-bornes-irve.csv] https://static.data.gouv.fr/resources/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques-irve/20180928-171452/20180928-bornes-irve.csv
- downloaded file [IRVE-201605.csv] https://www.data.gouv.fr/s/resources/fichi

- downloaded file [irve-syded-20182012.csv] https://static.data.gouv.fr/resources/irve-syded-recensement-et-information/20181220-101549/irve-syded-20182012.csv
- downloaded file [irve-wiiizcasa-20181005.csv] https://static.data.gouv.fr/resources/infrastructures-de-recharge-pour-vehicules-electriques-irve/20181015-145030/irve-wiiizcasa-20181005.csv
x ignored file https://static.data.gouv.fr/resources/infrastructures-de-recharge-pour-vehicules-electriques-irve/20181015-142721/arrete-irve-joe-20170113-0011-0013.pdf
- downloaded file [irve-sedi-20190114.csv] https://static.data.gouv.fr/resources/sedi-bornes-de-recharge-pour-vehicule-electrique/20190114-124050/irve-sedi-20190114.csv
- downloaded file [irve-paris-recharge-20181207.csv] https://static.data.gouv.fr/resources/paris-recharge/20181207-071323/irve-paris-recharge-20181207.csv
- downloaded file [irve-tem53-20190124.csv] https://static.data.gouv.fr/resources/infrastructures-de-recharge-pour-vehicules-electriques-te53/20190124-102432/

- downloaded file [IRVE_PlusDeBornes_20170314.csv] https://www.data.gouv.fr/s/resources/caracteristiques-des-points-de-charge-pour-vehicules-electriques-plus-de-bornes-ouverts-au-public/20170314-145939/IRVE_PlusDeBornes_20170314.csv
- downloaded file [opendataPlusDeBornes210316.csv] https://www.data.gouv.fr/s/resources/caracteristiques-des-points-de-charge-pour-vehicules-electriques-plus-de-bornes-ouverts-au-public/20160321-220522/opendataPlusDeBornes210316.csv
- downloaded file [opendataPlusDeBornes210316.xlsx] https://www.data.gouv.fr/s/resources/caracteristiques-des-points-de-charge-pour-vehicules-electriques-plus-de-bornes-ouverts-au-public/20160321-220457/opendataPlusDeBornes210316.xlsx
- downloaded file [opendataPlusDeBornes.csv] https://www.data.gouv.fr/s/resources/caracteristiques-des-points-de-charge-pour-vehicules-electriques-plus-de-bornes-ouverts-au-public/community/20150415-164327/opendataPlusDeBornes.csv
- downloaded file [opendataPlusDeBornes.xls] https://www.data.gouv.f



In [95]:
def parse_csv(file_path):
    # deactivate type testing, this puts too much constraint on parsing
    # especially for lat/lon columns with commas
    tester = agate.TypeTester(types=(agate.Text, ), limit=0)
    with file_path.open('rb') as f:
        encoding = chardet.detect(f.read()).get('encoding')
    try:
        table = agate.Table.from_csv(file_path, encoding=encoding, sniff_limit=None, column_types=tester)
    except Exception as e:
        print('[x] CSV parse error for %s (%s)' % (file_path, e))
    else:
        return table

In [96]:
columns_mapping = [
    ('n_amenageur', 'nom_amenageur', 'n_amenageu'), 
    ('n_operateur', 'n_operateu'), 
    ('n_enseigne', ), 
    ('id_station', ), 
    ('n_station', 'nom_station'), 
    ('ad_station', 'adresse_station'), 
    ('code_insee', ), 
    ('Xlongitude', 'longitude_wsg84'), 
    ('Ylatitude', 'latitude_wsg84'), 
    ('nbre_pdc', 'nbre_borne'), 
    ('id_pdc', 'n° borne'), 
    ('puiss_max', ), 
    ('type_prise', 'type_connecteur', 'typ_charge'), 
    ('acces_recharge', 'modalité d\'accès à la borne', 'acces_rech'), 
    ('accessibilité', 'accessibilitã©', 'accessibilite', 'accessibilit�', 'accessibilit‚'), 
    ('observations', ), 
    ('date_maj', ),
]
columns = ['n_amenageur', 'n_operateur', 'n_enseigne', 'id_station', 'n_station', 'ad_station', 'code_insee', 'Xlongitude', 'Ylatitude', 'nbre_pdc', 'id_pdc', 'puiss_max', 'type_prise', 'acces_recharge', 'accessibilité', 'observations', 'date_maj']
columns_low = [x.lower() for x in columns]

In [18]:
# parse CSV files: analyze difference between standard schema and real data
# infer a mapping of column names variation to match a maximum of data later

# /!\ this is only an analysis step that helped build `columns_mapping` above
# you do not need to run this when doing a consolidation

p = Path('data')
for child in [x for x in p.iterdir() if x.is_dir()]:
    csvs = list(child.glob('*.csv'))
    for csv in csvs:
        table = parse_csv(csv)
        if table:
            cols = [x.lower() for x in table.column_names]
            missing_pivot = []
            for pivot in ['id_station', 'id_pdc', 'date_maj']:
                if pivot not in cols:
                    missing_pivot.append(pivot)
            if missing_pivot:
                print('Skipping %s for missing pivot %s, cols were %s' % (csv, missing_pivot, cols))
                break
            diff = list(set(cols) - set(columns_low))
            if diff:
                print('DIFF for %s: %s' % (csv, diff))
print('Done.')



DIFF for data/region-provence-alpes-cote-d-azur/bornes-recharge-agregation-region-reference.csv: ['etat', 'n° borne', 'type', 'financement région', 'commune  de a à z', 'date mise en service', 'a', 'accessibilite', 'state', 'nbre_borne']
DIFF for data/aurelien-ouellette/data_gouv_18.csv: ['accessibilitã©']
DIFF for data/aurelien-ouellette/Reseau_Brevcar_09042018.csv: ['accessibilitã©']
DIFF for data/aurelien-ouellette/IRVE_SDE76_20180411.csv: ['accessibilitã©']
DIFF for data/aurelien-ouellette/20180711Opendata_ADP.csv: ['accessibilitã©']
DIFF for data/gabrielle-caro/irve-sedi-20180905.csv: ['accessibilite']
DIFF for data/gabrielle-caro/irve-sieeen-20180905.csv: ['accessibilite']
DIFF for data/gabrielle-caro/irve-syme05-20180905.csv: ['accessibilite']
DIFF for data/gabrielle-caro/irve-sded-20180905.csv: ['accessibilite']
DIFF for data/gabrielle-caro/irve-syane-20180905.csv: ['accessibilite']
DIFF for data/gabrielle-caro/irve-istres-20180906.csv: ['accessibilite']
DIFF for data/gabrielle



Skipping data/communaute-dagglomeration-du-douaisis/LocalisationBornesElectCAD.csv for missing pivot ['id_station', 'id_pdc', 'date_maj'], cols were ['numéro de la borne ', 'commune  ', 'nom de la borne ', "adresse d'implantation", 'code postal', 'puissance électrique ', 'y_coord_cc50', 'x_coord_cc50', 'point_x_wgs84', 'point_y_wgs84']
DIFF for data/soregies/irve-soregies-20180831.csv: ['r', 'w', 'v', 'u', 's', 'accessibilite', 'x', 't']




Skipping data/parking-effia-effia-stationnement/Stations-recharge-VE-EFFIA.csv for missing pivot ['id_pdc'], cols were ['id_station', 'nom_station', 'adresse_station', 'latitude_wsg84', 'longitude_wsg84', 'nom_porteur', 'type_charge', 'nbre_pdc', 'type_connecteur', 'date_maj', 'observations', 'l', 'm', 'n', 'o']
DIFF for data/service-sig-sdef/IRVE_SDE29_20170822.csv: ['accessibilite']




DIFF for data/service-sig-sdef/IRVE_SDE29_20180404.csv: ['rrrr', 'u', 'dddddd', 'mm', 'oo', 'aaaa', 'rrrrrr', 'xxxxx', 'aa', 'mmmmm', 'eee', 'uuuuu', 'nnnnnn', 'mmm', 'cc', 'zz', 'www', 'w', 'ooooo', 'vvvvvv', 'ee', 'dd', 'uuuu', 'kkk', 'ddd', 'oooooo', 'mmmm', 'aaaaaa', 'ttt', 'ppppp', 's', 'ggg', 'll', 'lllll', 'ccccc', 'oooo', 'hhhhhh', 'wwwww', 'aaa', 'nn', 'rrr', 'jj', 'ff', 'qqqq', 'r', 'z', 'tttttt', 'xxxx', 'bbbb', 'hhhh', 'hhhhh', 'jjjjjj', 'cccccc', 'x', 'rr', 'wwwwww', 'jjjj', 'pppppp', 'ii', 'ttttt', 'pppp', 'vvv', 'bbb', 'nnn', 'eeeee', 'mmmmmm', 'jjjjj', 'zzzzz', 'vvvv', 'v', 'xxx', 'llllll', 'pp', 'jjj', 'sssss', 'gg', 'ssss', 'gggg', 'xx', 'qq', 'qqq', 'llll', 't', 'uu', 'hh', 'ww', 'iiii', 'nnnn', 'iiiiii', 'ddddd', 'zzz', 'iiiii', 'bb', 'kkkkkk', 'ss', 'kkkkk', 'ffffff', 'bbbbb', 'hhh', 'vv', 'y', 'yy', 'yyyy', 'wwww', 'yyyyy', 'eeee', 'ooo', 'qqqqqq', 'ffff', 'ssssss', 'accessibilite', 'sss', 'eeeeee', 'uuu', 'dddd', 'nnnnn', 'iii', 'ppp', 'xxxxxx', 'vvvvv', 'kk', 't



DIFF for data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20170120.csv: ['r', 'longitude_wsg84', 'adresse_station', 'nom_amenageur', 'latitude_wsg84', 'nom_station']
DIFF for data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20170316.csv: ['r', 'longitude_wsg84', 'adresse_station', 'nom_amenageur', 'latitude_wsg84', 'nom_station']




DIFF for data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20170623.csv: ['r', 'longitude_wsg84', 'accessibilit�', 'adresse_station', 'nom_amenageur', 'latitude_wsg84', 'nom_station']
DIFF for data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20170207.csv: ['longitude_wsg84', 'adresse_station', 'nom_amenageur', 'latitude_wsg84', 'nom_station']
DIFF for data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20180125.csv: ['longitude_wsg84', 'adresse_station', 'nom_amenageur', 'latitude_wsg84', 'nom_station']
DIFF for data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20180806.csv: ['longitude_wsg84', 'accessibilit�', 'adresse_station', 'nom_amenageur', 'latitude_wsg84', 'nom_station']
Skipping data/etalab/IRVE-201510.csv for missing pivot ['id_pdc'], cols were ['id_station', 'nom_station', 'adresse_station', 'latitude', 'longitude', 'nom_porteur', 'type_charge', 'nbre_pdc', 'type_connecteur



DIFF for data/cnr/IRVE_CNR_20170731.csv: ['r', 'accessibilite']
DIFF for data/cnr/50_IRVE_CNR_20170210.csv: ['accessibilite']
DIFF for data/cnr/IRVE_CNR_20170330.csv: ['accessibilite']
DIFF for data/syndicat-departemental-denergie-de-laube-1/IRVE_SDEA_20180503.csv: ['accessibilită©']
DIFF for data/helene-singez/IRVE_PASS_PASS_ELECTRIQUE_Valenciennes_Metropole.csv: ['accessibilitã©']
DIFF for data/syndicat-departemental-denergie-du-cher/IRVE_SDE18_20171120.csv: ['accessibilite']
Skipping data/communaute-dagglomeration-de-nevers/BORNE_RECHARGE_ELECTRIQUE_WGS84.csv for missing pivot ['id_station', 'id_pdc', 'date_maj'], cols were ['id', 'adresse', 'commune', 'proprietai', 'deploiemen', 'numero', 'puissance', 'mode_de_ch', 'coord_x', 'coord_y']
	 CSV parse error: Row 0 has 13 values, but Table only has 11 columns.
	 CSV parse error: Row 0 has 18 values, but Table only has 16 columns.
DIFF for data/syndicat-departemental-denergies-de-lardeche/irve-sde07-20180905.csv: ['accessibilite']
Skipp



DIFF for data/mairie-de-sainte-adresse/IRVE_SAINTEADRESSE_01062018.csv: ['r', 'z', 'b', 'w', 'v', 'u', 's', 'y', 'a', 'accessibilite', 'x', 't']
Skipping data/saemes/points-recharges-electriques-parkings.csv for missing pivot ['id_station', 'id_pdc', 'date_maj'], cols were ['source data', 'n° point recharge electrique', 'code parking concerné', 'nom parking', 'type véhicule', 'puissance en kw', 'recharge accessible au public ou réservé aux abonnés', 'niveau du parking de la borne', 'coût de la recharge (€/heure)', 'paiement recharge  compte kiwhi', 'paiement recharge  compte freshmile', 'geo', 'code postal']
DIFF for data/patrice-lozere/IRVE_SDEY_YONNE_20170519.csv: ['commune', 'n_amenageu', 'n_operateu', 'acces_rech', 'accessibilite', 'typ_charge']
Skipping data/ville-d-issy-les-moulineaux/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques-irve.csv for missing pivot ['id_station', 'id_pdc', 'date_maj'], cols were ['{']
DIFF for data/syndicat-intercommunal-de-distribut



DIFF for data/symielecvar-symielec/IRVE_MOUVELECVAR_20180629.csv: ['accessibilite']
DIFF for data/mouvoise/IRVE_SE60_20170630.csv: ['accessibilite']




DIFF for data/mouvoise/IRVE_SE60_20180517.csv: ['aa', 'z', 'modele de borne', 'w', 'v', 'u', 'y', 'fabricant borne', 'téléphone', 'dd', 'x', 'bb', 'accessibilite', "modalité d'accès à la borne", 'cc']




DIFF for data/mouvoise/IRVE_SE60_20180716.csv: ['aa', 'z', 'modele de borne', 'w', 'v', 'u', 'y', 'fabricant borne', 'téléphone', 'dd', 'x', 'bb', 'accessibilite', "modalité d'accès à la borne", 'cc']




DIFF for data/mouvoise/IRVE_SE60_20180420.csv: ['aa', 'z', 'modele de borne', 'w', 'v', 'u', 'y', 'fabricant borne', 'téléphone', 'dd', 'x', 'bb', 'accessibilite', "modalité d'accès à la borne", 'cc']
	 CSV parse error: Row 0 has 2 values, but Table only has 1 columns.




DIFF for data/mouvoise/IRVE_SE60_20180816.csv: ['aa', 'z', 'modele de borne', 'w', 'v', 'u', 'y', 'fabricant borne', 'téléphone', 'dd', 'x', 'bb', 'accessibilite', "modalité d'accès à la borne", 'cc']
DIFF for data/syndicat-intercommunal-denergie-dindre-et-loire-1/IRVE_SIEIL37_20170321.csv: ['accessibilite']
DIFF for data/syndicat-departemental-denergie-de-saone-et-loire/IRVE_SYDESL_20180104.csv: ['accessibilitã©']
Skipping data/data-gouv-fr/Stations-recharge-VE-ville-exemple.csv for missing pivot ['id_pdc'], cols were ['id_station', 'nom_station', 'adresse_station', 'latitude_wsg84', 'longitude_wsg84', 'nom_porteur', 'type_charge', 'nbre_pdc', 'type_connecteur', 'date_maj', 'observations', 'l', 'm', 'n', 'o']




DIFF for data/leonard-du-mas-de-paysac/IRVE_SIDELC_20180209.csv: ['accessibilite']
DIFF for data/leonard-du-mas-de-paysac/IRVE_SYDEGO_20180226.csv: ['accessibilite']
DIFF for data/leonard-du-mas-de-paysac/IRVE_Se61_20170306.csv: ['accessibilite']
	 CSV parse error: Row 0 has 12 values, but Table only has 1 columns.
Skipping data/leonard-du-mas-de-paysac/IRVE_Se61_20180615.csv for missing pivot ['id_station'], cols were ['n_amenageur', 'n_operateur', 'n_enseigne', 'id_emi3_zone', 'libelle_affichable', 'ad_station', 'code_insee', 'xlongitude', 'ylatitude', 'nbre_pdc', 'id_pdc', 'puissance_max', 'type_prise', 'acces_recharge', 'accessibilite', 'observations', 'date_maj']
Skipping data/ville-de-beauvais/IRVE_VILLEDEBEAUVAIS_20170320.csv for missing pivot ['id_station', 'id_pdc', 'date_maj'], cols were ['n_amenageur', '\tn_operateur', '\tn_enseigne', '\tid_station', '\tn_station', '\tad_station', '\tcode_insee', '\txlongitude', '\tylatitude', '\tnbre_pdc', '\tid_pdc', '\tpuiss_max', '\ttype

In [97]:
# use columns_mapping to build a database of unique stations based on (id_station, id_pdc, max(date_maj))

bornes = []

def find_by_pivot(row, lines):
    pass

def ifind_in_row_by_col(col, row):
    for key in row.keys():
        if col.lower() == key.lower():
            return row[key]

p = Path('data')

for child in [x for x in p.iterdir() if x.is_dir()]:
    csvs = list(child.glob('*.csv'))
    for csv in csvs:
        table = parse_csv(csv)
        if table:
            table_cols = [x.lower() for x in table.column_names]
            missing_pivot = []
            for pivot in ['id_station', 'id_pdc', 'date_maj']:
                if pivot not in table_cols:
                    missing_pivot.append(pivot)
            if missing_pivot:
                break
            for row in table.rows:
                borne = {}
                for col in columns_mapping:
                    for c in col:
                        if c.lower() in table_cols:
                            # use the standard name for column
                            borne[col[0]] = ifind_in_row_by_col(c, row)
                            break
                upatt = 'https://www.data.gouv.fr/fr/datasets/%s/#resource-%s'
                borne['source'] = upatt % (csv.parents[0].stem, csv.stem)
                bornes.append(borne)
                
len(bornes)



[x] CSV parse error for data/caracteristiques-et-localisation-des-stations-de-recharge-supercharger-tesla-1/d1651707-d3ed-4206-94e3-43c13610abb1.csv (Row 0 has 13 values, but Table only has 11 columns.)












[x] CSV parse error for data/borne-de-recharge-recharge-a-destination/211f32db-0183-41a9-9d6d-1935f4382608.csv (Row 0 has 18 values, but Table only has 16 columns.)




28839

In [98]:
len(bornes)

28839

In [100]:
# deduplicate based on date_maj and id_pdc
from dateutil.parser import parse

unique_bornes = []

def parse_date(date):
    date = str(date)
    try:
        return parse(date)
    except ValueError:
        return parse('1970-1-1')

# filter out id_pdc==None
# TODO maybe use (id_station, id_pdc) instead of only id_pdc
ids = set([b['id_pdc'] for b in bornes if (b['id_pdc'] and str(b['id_pdc']).strip())])
for _id in ids:
    bs = [b for b in bornes if b['id_pdc'] == _id]
    if len(bs) > 1:
        max_date = max([parse_date(b['date_maj']) for b in bs])
        unique = [b for b in bs if parse_date(b['date_maj']) == max_date][0]
    else:
        unique = bs[0]
    ## Data hacks
    # replace `date_maj` original value with parsed date
    unique['date_maj'] = parse_date(unique['date_maj']).strftime('%Y/%m/%d')
    # replace `Xlongitude` and `Xlatitude` commas with points
    unique['Xlongitude'] = unique['Xlongitude'].replace(',', '.') if unique['Xlongitude'] else ''
    unique['Ylatitude'] = unique['Ylatitude'].replace(',', '.') if unique['Ylatitude'] else ''
    unique_bornes.append(unique)
    
unique_bornes

[{'n_amenageur': 'FDEL 46',
  'n_operateur': 'BOUYGUES ENERGIES ET SERVICES',
  'n_enseigne': 'Révéo',
  'id_station': 'FR*S46*P46087*001',
  'n_station': 'DEGAGNAC - Garage',
  'ad_station': 'Garage 46340 DEGAGNAC',
  'code_insee': '46087',
  'Xlongitude': '1.312516',
  'Ylatitude': '44.664793',
  'nbre_pdc': '4',
  'id_pdc': 'FR*S46*E46087*001*1',
  'puiss_max': '36',
  'type_prise': 'T2',
  'acces_recharge': 'Payant',
  'accessibilité': '24h/24 et 7j/7',
  'observations': 'Recharge par badge et avec une application smartphone',
  'date_maj': '2019/01/24',
  'source': 'https://www.data.gouv.fr/fr/datasets/infrastructures-de-recharge-pour-vehicules-electriques-reveo-1/#resource-42bd8111-0eb4-4624-baeb-d7d9f7af6ea7'},
 {'n_amenageur': 'SEDI',
  'n_operateur': 'SEDI',
  'n_enseigne': 'E Born',
  'id_station': 'FR*S38*P38433*A',
  'n_station': 'ST NIZIER DU MOUCHEROTTE_Place du 14 Avril 1929',
  'ad_station': 'Place du 14 Avril 1929 38250 ST NIZIER DU MOUCHEROTTE',
  'code_insee': '38433

In [101]:
len(bornes), len([b for b in bornes if b['id_pdc']]), len(unique_bornes)

(28839, 24417, 11647)

In [102]:
# enjoy the results!

import csv

# sort by n_amenageur
unique_bornes.sort(key=lambda k: k['n_amenageur'])

with open('bornes-IRVE.csv', 'w') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=columns + ['source'], delimiter=';')
    writer.writeheader()
    for b in unique_bornes:
        writer.writerow(b)

## Debug / test

In [3]:
# encoding

source = 'data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20180806.csv'

table = agate.Table.from_csv(source, sniff_limit=4096)

for row in table.rows[:3]:
    print(row)
    
# conclusion: this file is UTF-8 but data inside are encoded in something else... Nothing we can do automatically

<agate.Row: ('USEDA', 'CITEOS-FRESHMILE', 'DIRVE 02', 'FR*S02*E0255529A', '0029 - Place du March� ', ...)>
<agate.Row: ('USEDA', 'CITEOS-FRESHMILE', 'DIRVE 02', 'FR*S02*E0255529A', '0029 - Place du March� ', ...)>
<agate.Row: ('USEDA', 'CITEOS-FRESHMILE', 'DIRVE 02', 'FR*S02*E026913A', '0003 - GARE1 ', ...)>


In [19]:
parse_date('2018-09-05 00:00:00').strftime('%Y/%m/%d')

'2018/09/05'

In [32]:
# lat/long parsing

table = agate.Table.from_csv('data/alexandre-court-2/IRVE_Reveo_20180820.csv', encoding='latin-1', sniff_limit=4096)
for row in table.rows:
    if 'DEGAGNAC' in row['n_station']:
        print(row['Xlongitude'])

1312516
1312516
1312516
1312516
