# Consolidation IRVE

In [9]:
import requests
import agate
import agateexcel

from pathlib import Path
import cchardet as chardet

In [39]:
# get the list of datasets tagged irve on data.gouv.fr

url = 'https://www.data.gouv.fr/api/1/datasets/?tag=irve&page_size=1000'
r = requests.get(url)
datasets = r.json()['data']

In [42]:
# Download all tabular files in data/ directory, as best as we can

downloaded = []
for d in datasets:
    orga = d['organization']['slug'] if d['organization'] else d['owner']['slug']
    for r in d['resources']:
        rurl = r['url']
        # ODS style NB: won't work more than once for CKAN
        if 'format=csv' in rurl:
            filename = rurl.split('/')[-3] + '.csv'
        else:
            filename = rurl.split('/')[-1]
        if filename in downloaded:
            print('x existing file %s' % rurl)
            continue
        ext = filename.split('.')[-1]
        if ext not in ['csv', 'xls', 'xlsx']:
            print('x ignored file %s' % rurl)
            continue
        r = requests.get(rurl, allow_redirects=True)
        p = Path('data/%s' % orga)
        p.mkdir(exist_ok=True)
        with open('%s/%s' % (p, filename), 'wb') as f:
            f.write(r.content)
            downloaded.append(filename)
            print('- downloaded file [%s] %s' % (filename, rurl))
print('Done')

- downloaded file [IRVE_Data_Gouv_Rambouilllet_Territoires.xlsx] https://www.data.gouv.fr/s/resources/bornes-de-recharge-de-rambouillet-territoires/20180511-093502/IRVE_Data_Gouv_Rambouilllet_Territoires.xlsx
- downloaded file [BDD_IRVE-RT78.csv] https://www.data.gouv.fr/s/resources/irve-de-rambouillet-territoires/20161025-134007/BDD_IRVE-RT78.csv
- downloaded file [liste_bornes_CAMVS_2017.xlsx] https://www.data.gouv.fr/s/resources/reseau-de-bornes-de-recharge-pour-vehicules-electrique-sur-la-territoire-de-lagglomeration-maubeuge-val-de-sambre/20170905-171948/liste_bornes_CAMVS_2017.xlsx
- downloaded file [IRVE_SDEA_20180503.csv] https://www.data.gouv.fr/s/resources/bornes-de-recharge-de-laube-irve/20180503-181544/IRVE_SDEA_20180503.csv
- downloaded file [IRVE_SYDEV_20180502.csv] https://www.data.gouv.fr/s/resources/stations-de-recharge-pour-vehicules-electriques-sur-le-territoire-vendeen/20180502-104237/IRVE_SYDEV_20180502.csv
- downloaded file [IRVE_VILLEDEBEAUVAIS_20170320.csv] http

- downloaded file [IRVE_SDE_22.xls] https://www.data.gouv.fr/s/resources/reseau-brevcar-bornes-de-recharge-pour-vehicules-electrique-du-syndicat-departemental-delectricite-des-cotes-darmor-sde22/20170911-132756/IRVE_SDE_22.xls
- downloaded file [IRVE_SYDEGO_20180226.csv] https://www.data.gouv.fr/s/resources/irve-sydego-20180226-1/20180301-183216/IRVE_SYDEGO_20180226.csv
- downloaded file [IRVE_SDE28_20180205.csv.csv] https://www.data.gouv.fr/s/resources/infrastructures-de-recharge-pour-vehicules-electriques-sde28/20180205-185312/IRVE_SDE28_20180205.csv.csv
- downloaded file [IRVE_MOBISDEC_20180209.csv] https://www.data.gouv.fr/s/resources/bornes-de-recharge-mobisdec-syndicat-departemental-denergies-du-calvados/20180209-115402/IRVE_MOBISDEC_20180209.csv
- downloaded file [Stations-recharge-VE-CALVADOS_SDEC_ENERGIE_20170620.xlsx] https://www.data.gouv.fr/s/resources/bornes-de-recharge-mobisdec-syndicat-departemental-denergies-du-calvados/20170620-081755/Stations-recharge-VE-CALVADOS_SDEC



- downloaded file [irve_wiiizcapg_20180516.csv] https://opendata.ozwillo.com/dataset/5b3f7e3c-df70-4552-8b48-5e96c2009edc/resource/931f6677-cafa-4245-a43f-907d966d3f17/download/irve_wiiizcapg_20180516.csv
- downloaded file [IRVE-201605.csv] https://www.data.gouv.fr/s/resources/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques-irve/20160526-112003/IRVE-201605.csv
- downloaded file [IRVE-201510.csv] https://www.data.gouv.fr/s/resources/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques-irve/20151008-182813/IRVE-201510.csv
- downloaded file [IRVE.csv] https://www.data.gouv.fr/s/resources/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques-irve/community/20141212-175434/IRVE.csv
x existing file https://www.data.gouv.fr/s/resources/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques-irve/20141024-183917/IRVE.csv
- downloaded file [IRVE_Reveo_20180820.csv] https://static.data.gouv.fr/resources/infrastructures-de-recharge-pour

- downloaded file [IRVE_USEDADIRVE02_20180125.csv] https://www.data.gouv.fr/s/resources/useda-dirve02/20180125-160036/IRVE_USEDADIRVE02_20180125.csv
- downloaded file [IRVE_USEDADIRVE02_20170623.csv] https://www.data.gouv.fr/s/resources/useda-dirve02/20170623-143528/IRVE_USEDADIRVE02_20170623.csv
- downloaded file [IRVE_USEDADIRVE02_20170316.csv] https://www.data.gouv.fr/s/resources/useda-dirve02/20170316-111520/IRVE_USEDADIRVE02_20170316.csv
- downloaded file [IRVE_USEDADIRVE02_20170207.csv] https://www.data.gouv.fr/s/resources/useda-dirve02/20170207-144515/IRVE_USEDADIRVE02_20170207.csv
- downloaded file [IRVE_USEDADIRVE02_20170120.csv] https://www.data.gouv.fr/s/resources/useda-dirve02/20170123-081540/IRVE_USEDADIRVE02_20170120.csv
- downloaded file [fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques-irve.csv] https://data.issy.com//explore/dataset/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques-irve/download?format=csv&timezone=Europe/Berlin&use

- downloaded file [opendataPlusDeBornes.csv] https://www.data.gouv.fr/s/resources/caracteristiques-des-points-de-charge-pour-vehicules-electriques-plus-de-bornes-ouverts-au-public/community/20150415-164327/opendataPlusDeBornes.csv
- downloaded file [opendataPlusDeBornes.xls] https://www.data.gouv.fr/s/resources/caracteristiques-des-points-de-charge-pour-vehicules-electriques-plus-de-bornes-ouverts-au-public/community/20150415-164348/opendataPlusDeBornes.xls
- downloaded file [Stations-recharge-VE-data.gouv.xlsx] https://www.data.gouv.fr/s/resources/infrastructures-de-recharge-pour-vehicules-electriques-vincipark/20160523-172517/Stations-recharge-VE-data.gouv.xlsx
- downloaded file [Stations-recharge-VE-indigo_3.xlsx] https://www.data.gouv.fr/s/resources/infrastructures-de-recharge-pour-vehicules-electriques-vincipark/20160527-102724/Stations-recharge-VE-indigo_3.xlsx
- downloaded file [Stations-recharge-VE-ville-exemple.csv] https://www.data.gouv.fr/_uploads/resources/Stations-recharge

In [3]:
def parse_csv(file_path):
    with file_path.open('rb') as f:
        encoding = chardet.detect(f.read()).get('encoding')
    try:
        table = agate.Table.from_csv(file_path, encoding=encoding, sniff_limit=4096)
    except Exception as e:
        print('\t CSV parse error: %s' % e)
    else:
        return table

In [18]:
# parse CSV files: analyze difference between standard schema and real data
# infer a mapping of column names variation to match a maximum of data later

columns_mapping = [
    ('n_amenageur', 'nom_amenageur', 'n_amenageu'), 
    ('n_operateur', 'n_operateu'), 
    ('n_enseigne', ), 
    ('id_station', ), 
    ('n_station', 'nom_station'), 
    ('ad_station', 'adresse_station'), 
    ('code_insee', ), 
    ('Xlongitude', 'longitude_wsg84'), 
    ('Ylatitude', 'latitude_wsg84'), 
    ('nbre_pdc', 'nbre_borne'), 
    ('id_pdc', 'n° borne'), 
    ('puiss_max', ), 
    ('type_prise', 'type_connecteur', 'typ_charge'), 
    ('acces_recharge', 'modalité d\'accès à la borne', 'acces_rech'), 
    ('accessibilité', 'accessibilitã©', 'accessibilite', 'accessibilit�', 'accessibilit‚'), 
    ('observations', ), 
    ('date_maj', ),
]
columns = ['n_amenageur', 'n_operateur', 'n_enseigne', 'id_station', 'n_station', 'ad_station', 'code_insee', 'Xlongitude', 'Ylatitude', 'nbre_pdc', 'id_pdc', 'puiss_max', 'type_prise', 'acces_recharge', 'accessibilité', 'observations', 'date_maj']
columns_low = [x.lower() for x in columns]

p = Path('data')
for child in [x for x in p.iterdir() if x.is_dir()]:
    csvs = list(child.glob('*.csv'))
    for csv in csvs:
        table = parse_csv(csv)
        if table:
            cols = [x.lower() for x in table.column_names]
            missing_pivot = []
            for pivot in ['id_station', 'id_pdc', 'date_maj']:
                if pivot not in cols:
                    missing_pivot.append(pivot)
            if missing_pivot:
                print('Skipping %s for missing pivot %s, cols were %s' % (csv, missing_pivot, cols))
                break
            diff = list(set(cols) - set(columns_low))
            if diff:
                print('DIFF for %s: %s' % (csv, diff))
print('Done.')



DIFF for data/region-provence-alpes-cote-d-azur/bornes-recharge-agregation-region-reference.csv: ['etat', 'n° borne', 'type', 'financement région', 'commune  de a à z', 'date mise en service', 'a', 'accessibilite', 'state', 'nbre_borne']
DIFF for data/aurelien-ouellette/data_gouv_18.csv: ['accessibilitã©']
DIFF for data/aurelien-ouellette/Reseau_Brevcar_09042018.csv: ['accessibilitã©']
DIFF for data/aurelien-ouellette/IRVE_SDE76_20180411.csv: ['accessibilitã©']
DIFF for data/aurelien-ouellette/20180711Opendata_ADP.csv: ['accessibilitã©']
DIFF for data/gabrielle-caro/irve-sedi-20180905.csv: ['accessibilite']
DIFF for data/gabrielle-caro/irve-sieeen-20180905.csv: ['accessibilite']
DIFF for data/gabrielle-caro/irve-syme05-20180905.csv: ['accessibilite']
DIFF for data/gabrielle-caro/irve-sded-20180905.csv: ['accessibilite']
DIFF for data/gabrielle-caro/irve-syane-20180905.csv: ['accessibilite']
DIFF for data/gabrielle-caro/irve-istres-20180906.csv: ['accessibilite']
DIFF for data/gabrielle



Skipping data/communaute-dagglomeration-du-douaisis/LocalisationBornesElectCAD.csv for missing pivot ['id_station', 'id_pdc', 'date_maj'], cols were ['numéro de la borne ', 'commune  ', 'nom de la borne ', "adresse d'implantation", 'code postal', 'puissance électrique ', 'y_coord_cc50', 'x_coord_cc50', 'point_x_wgs84', 'point_y_wgs84']
DIFF for data/soregies/irve-soregies-20180831.csv: ['r', 'w', 'v', 'u', 's', 'accessibilite', 'x', 't']




Skipping data/parking-effia-effia-stationnement/Stations-recharge-VE-EFFIA.csv for missing pivot ['id_pdc'], cols were ['id_station', 'nom_station', 'adresse_station', 'latitude_wsg84', 'longitude_wsg84', 'nom_porteur', 'type_charge', 'nbre_pdc', 'type_connecteur', 'date_maj', 'observations', 'l', 'm', 'n', 'o']
DIFF for data/service-sig-sdef/IRVE_SDE29_20170822.csv: ['accessibilite']




DIFF for data/service-sig-sdef/IRVE_SDE29_20180404.csv: ['rrrr', 'u', 'dddddd', 'mm', 'oo', 'aaaa', 'rrrrrr', 'xxxxx', 'aa', 'mmmmm', 'eee', 'uuuuu', 'nnnnnn', 'mmm', 'cc', 'zz', 'www', 'w', 'ooooo', 'vvvvvv', 'ee', 'dd', 'uuuu', 'kkk', 'ddd', 'oooooo', 'mmmm', 'aaaaaa', 'ttt', 'ppppp', 's', 'ggg', 'll', 'lllll', 'ccccc', 'oooo', 'hhhhhh', 'wwwww', 'aaa', 'nn', 'rrr', 'jj', 'ff', 'qqqq', 'r', 'z', 'tttttt', 'xxxx', 'bbbb', 'hhhh', 'hhhhh', 'jjjjjj', 'cccccc', 'x', 'rr', 'wwwwww', 'jjjj', 'pppppp', 'ii', 'ttttt', 'pppp', 'vvv', 'bbb', 'nnn', 'eeeee', 'mmmmmm', 'jjjjj', 'zzzzz', 'vvvv', 'v', 'xxx', 'llllll', 'pp', 'jjj', 'sssss', 'gg', 'ssss', 'gggg', 'xx', 'qq', 'qqq', 'llll', 't', 'uu', 'hh', 'ww', 'iiii', 'nnnn', 'iiiiii', 'ddddd', 'zzz', 'iiiii', 'bb', 'kkkkkk', 'ss', 'kkkkk', 'ffffff', 'bbbbb', 'hhh', 'vv', 'y', 'yy', 'yyyy', 'wwww', 'yyyyy', 'eeee', 'ooo', 'qqqqqq', 'ffff', 'ssssss', 'accessibilite', 'sss', 'eeeeee', 'uuu', 'dddd', 'nnnnn', 'iii', 'ppp', 'xxxxxx', 'vvvvv', 'kk', 't



DIFF for data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20170120.csv: ['r', 'longitude_wsg84', 'adresse_station', 'nom_amenageur', 'latitude_wsg84', 'nom_station']
DIFF for data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20170316.csv: ['r', 'longitude_wsg84', 'adresse_station', 'nom_amenageur', 'latitude_wsg84', 'nom_station']




DIFF for data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20170623.csv: ['r', 'longitude_wsg84', 'accessibilit�', 'adresse_station', 'nom_amenageur', 'latitude_wsg84', 'nom_station']
DIFF for data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20170207.csv: ['longitude_wsg84', 'adresse_station', 'nom_amenageur', 'latitude_wsg84', 'nom_station']
DIFF for data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20180125.csv: ['longitude_wsg84', 'adresse_station', 'nom_amenageur', 'latitude_wsg84', 'nom_station']
DIFF for data/union-des-secteurs-denergie-du-departement-de-laisne/IRVE_USEDADIRVE02_20180806.csv: ['longitude_wsg84', 'accessibilit�', 'adresse_station', 'nom_amenageur', 'latitude_wsg84', 'nom_station']
Skipping data/etalab/IRVE-201510.csv for missing pivot ['id_pdc'], cols were ['id_station', 'nom_station', 'adresse_station', 'latitude', 'longitude', 'nom_porteur', 'type_charge', 'nbre_pdc', 'type_connecteur



DIFF for data/cnr/IRVE_CNR_20170731.csv: ['r', 'accessibilite']
DIFF for data/cnr/50_IRVE_CNR_20170210.csv: ['accessibilite']
DIFF for data/cnr/IRVE_CNR_20170330.csv: ['accessibilite']
DIFF for data/syndicat-departemental-denergie-de-laube-1/IRVE_SDEA_20180503.csv: ['accessibilită©']
DIFF for data/helene-singez/IRVE_PASS_PASS_ELECTRIQUE_Valenciennes_Metropole.csv: ['accessibilitã©']
DIFF for data/syndicat-departemental-denergie-du-cher/IRVE_SDE18_20171120.csv: ['accessibilite']
Skipping data/communaute-dagglomeration-de-nevers/BORNE_RECHARGE_ELECTRIQUE_WGS84.csv for missing pivot ['id_station', 'id_pdc', 'date_maj'], cols were ['id', 'adresse', 'commune', 'proprietai', 'deploiemen', 'numero', 'puissance', 'mode_de_ch', 'coord_x', 'coord_y']
	 CSV parse error: Row 0 has 13 values, but Table only has 11 columns.
	 CSV parse error: Row 0 has 18 values, but Table only has 16 columns.
DIFF for data/syndicat-departemental-denergies-de-lardeche/irve-sde07-20180905.csv: ['accessibilite']
Skipp



DIFF for data/mairie-de-sainte-adresse/IRVE_SAINTEADRESSE_01062018.csv: ['r', 'z', 'b', 'w', 'v', 'u', 's', 'y', 'a', 'accessibilite', 'x', 't']
Skipping data/saemes/points-recharges-electriques-parkings.csv for missing pivot ['id_station', 'id_pdc', 'date_maj'], cols were ['source data', 'n° point recharge electrique', 'code parking concerné', 'nom parking', 'type véhicule', 'puissance en kw', 'recharge accessible au public ou réservé aux abonnés', 'niveau du parking de la borne', 'coût de la recharge (€/heure)', 'paiement recharge  compte kiwhi', 'paiement recharge  compte freshmile', 'geo', 'code postal']
DIFF for data/patrice-lozere/IRVE_SDEY_YONNE_20170519.csv: ['commune', 'n_amenageu', 'n_operateu', 'acces_rech', 'accessibilite', 'typ_charge']
Skipping data/ville-d-issy-les-moulineaux/fichier-consolide-des-bornes-de-recharge-pour-vehicules-electriques-irve.csv for missing pivot ['id_station', 'id_pdc', 'date_maj'], cols were ['{']
DIFF for data/syndicat-intercommunal-de-distribut



DIFF for data/symielecvar-symielec/IRVE_MOUVELECVAR_20180629.csv: ['accessibilite']
DIFF for data/mouvoise/IRVE_SE60_20170630.csv: ['accessibilite']




DIFF for data/mouvoise/IRVE_SE60_20180517.csv: ['aa', 'z', 'modele de borne', 'w', 'v', 'u', 'y', 'fabricant borne', 'téléphone', 'dd', 'x', 'bb', 'accessibilite', "modalité d'accès à la borne", 'cc']




DIFF for data/mouvoise/IRVE_SE60_20180716.csv: ['aa', 'z', 'modele de borne', 'w', 'v', 'u', 'y', 'fabricant borne', 'téléphone', 'dd', 'x', 'bb', 'accessibilite', "modalité d'accès à la borne", 'cc']




DIFF for data/mouvoise/IRVE_SE60_20180420.csv: ['aa', 'z', 'modele de borne', 'w', 'v', 'u', 'y', 'fabricant borne', 'téléphone', 'dd', 'x', 'bb', 'accessibilite', "modalité d'accès à la borne", 'cc']
	 CSV parse error: Row 0 has 2 values, but Table only has 1 columns.




DIFF for data/mouvoise/IRVE_SE60_20180816.csv: ['aa', 'z', 'modele de borne', 'w', 'v', 'u', 'y', 'fabricant borne', 'téléphone', 'dd', 'x', 'bb', 'accessibilite', "modalité d'accès à la borne", 'cc']
DIFF for data/syndicat-intercommunal-denergie-dindre-et-loire-1/IRVE_SIEIL37_20170321.csv: ['accessibilite']
DIFF for data/syndicat-departemental-denergie-de-saone-et-loire/IRVE_SYDESL_20180104.csv: ['accessibilitã©']
Skipping data/data-gouv-fr/Stations-recharge-VE-ville-exemple.csv for missing pivot ['id_pdc'], cols were ['id_station', 'nom_station', 'adresse_station', 'latitude_wsg84', 'longitude_wsg84', 'nom_porteur', 'type_charge', 'nbre_pdc', 'type_connecteur', 'date_maj', 'observations', 'l', 'm', 'n', 'o']




DIFF for data/leonard-du-mas-de-paysac/IRVE_SIDELC_20180209.csv: ['accessibilite']
DIFF for data/leonard-du-mas-de-paysac/IRVE_SYDEGO_20180226.csv: ['accessibilite']
DIFF for data/leonard-du-mas-de-paysac/IRVE_Se61_20170306.csv: ['accessibilite']
	 CSV parse error: Row 0 has 12 values, but Table only has 1 columns.
Skipping data/leonard-du-mas-de-paysac/IRVE_Se61_20180615.csv for missing pivot ['id_station'], cols were ['n_amenageur', 'n_operateur', 'n_enseigne', 'id_emi3_zone', 'libelle_affichable', 'ad_station', 'code_insee', 'xlongitude', 'ylatitude', 'nbre_pdc', 'id_pdc', 'puissance_max', 'type_prise', 'acces_recharge', 'accessibilite', 'observations', 'date_maj']
Skipping data/ville-de-beauvais/IRVE_VILLEDEBEAUVAIS_20170320.csv for missing pivot ['id_station', 'id_pdc', 'date_maj'], cols were ['n_amenageur', '\tn_operateur', '\tn_enseigne', '\tid_station', '\tn_station', '\tad_station', '\tcode_insee', '\txlongitude', '\tylatitude', '\tnbre_pdc', '\tid_pdc', '\tpuiss_max', '\ttype

In [35]:
# use columns_mapping to build a database of unique stations based on (id_station, id_pdc, max(date_maj))

bornes = []

def find_by_pivot(row, lines):
    pass

def ifind_in_row_by_col(col, row):
    for key in row.keys():
        if col.lower() == key.lower():
            return row[key]

for child in [x for x in p.iterdir() if x.is_dir()]:
    csvs = list(child.glob('*.csv'))
    for csv in csvs:
        table = parse_csv(csv)
        if table:
            table_cols = [x.lower() for x in table.column_names]
            missing_pivot = []
            for pivot in ['id_station', 'id_pdc', 'date_maj']:
                if pivot not in table_cols:
                    missing_pivot.append(pivot)
            if missing_pivot:
                break
            for row in table.rows:
                borne = {}
                for col in columns_mapping:
                    for c in col:
                        if c.lower() in table_cols:
                            # use the standard name for column
                            borne[col[0]] = ifind_in_row_by_col(c, row)
                            break
                borne['source'] = str(csv)
                bornes.append(borne)
                
bornes





	 CSV parse error: Row 0 has 13 values, but Table only has 11 columns.
	 CSV parse error: Row 0 has 18 values, but Table only has 16 columns.




	 CSV parse error: Row 0 has 2 values, but Table only has 1 columns.




	 CSV parse error: Row 0 has 12 values, but Table only has 1 columns.


[{'Xlongitude': Decimal('0.007727'),
  'Ylatitude': Decimal('43.187355'),
  'acces_recharge': 'gratuit',
  'accessibilité': '24/24 7/7',
  'ad_station': 'Téléport 2 - D516 - 65290 Juillan',
  'code_insee': Decimal('65235'),
  'date_maj': '01/03/17',
  'id_pdc': 'FR*S65*E65290*TELEP*01',
  'id_station': 'FR*S65*P65290*TELEP',
  'n_amenageur': 'SDE65',
  'n_enseigne': 'Watt else?',
  'n_operateur': 'SDE65',
  'n_station': 'Téléport 2',
  'nbre_pdc': Decimal('2'),
  'observations': 'Badges du SDE65 à commander sur le site : www.sde-65.com/bienvenue/bornes-de-recharge-vehicules-electriques-ve/',
  'puiss_max': Decimal('18'),
  'source': 'data/serge-cieutat/IRVE_SDE65_20171020.csv',
  'type_prise': 'E/F-T2'},
 {'Xlongitude': Decimal('0.007727'),
  'Ylatitude': Decimal('43.187355'),
  'acces_recharge': 'gratuit',
  'accessibilité': '24/24 7/7',
  'ad_station': 'Téléport 2 - D516 - 65290 Juillan',
  'code_insee': Decimal('65235'),
  'date_maj': '01/03/17',
  'id_pdc': 'FR*S65*E65290*TELEP*02'

In [49]:
# deduplicate based on date_maj and id_pdc
from dateutil.parser import parse

unique_bornes = []

def parse_date(date):
    date = str(date)
    try:
        return parse(date)
    except ValueError:
        return parse('1970-1-1')

# filter out id_pdc==None
# TODO maybe use (id_station, id_pdc) instead of only id_pdc
ids = set([b['id_pdc'] for b in bornes if b['id_pdc']])
for _id in ids:
    bs = [b for b in bornes if b['id_pdc'] == _id]
    if len(bs) > 1:
        max_date = max([parse_date(b['date_maj']) for b in bs])
        unique = [b for b in bs if parse_date(b['date_maj']) == max_date][0]
    else:
        unique = bs[0]
    unique_bornes.append(unique)
    
unique_bornes

[{'Xlongitude': Decimal('6.64907300'),
  'Ylatitude': Decimal('44.66132400'),
  'acces_recharge': 'Payant',
  'accessibilité': '24/24 7/7 jours',
  'ad_station': 'Champ de Foire 05600 GUILLESTRE',
  'code_insee': Decimal('5065'),
  'date_maj': Decimal('20180905'),
  'id_pdc': 'FR*S05*E05065*A*B1*D',
  'id_station': 'FR*S05*P05065*A',
  'n_amenageur': 'SYME05',
  'n_enseigne': 'E Born',
  'n_operateur': 'SYME05',
  'n_station': 'GUILLESTRE_Champ de Foire',
  'nbre_pdc': None,
  'observations': None,
  'puiss_max': Decimal('22.00'),
  'source': 'data/gabrielle-caro/irve-syme05-20180905.csv',
  'type_prise': 'E/F + T2'},
 {'Xlongitude': Decimal('7165'),
  'Ylatitude': Decimal('43225879'),
  'acces_recharge': '2|3',
  'accessibilité': 'Informations tarifaires disponibles sur le site de RÉVÉO : https://www.reveocharge.com/fr/comment-ca-marche/tarifs-de-recharge/',
  'ad_station': 'Parking du SDE 65000 TARBES',
  'code_insee': Decimal('65440'),
  'date_maj': '20/08/2018',
  'id_pdc': 'FR*S65

In [50]:
len(bornes), len([b for b in bornes if b['id_pdc']]), len(unique_bornes)

(14173, 12657, 9819)

In [55]:
# enjoy the results!

import csv

with open('bornes-IRVE.csv', 'w') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=columns + ['source'], delimiter=';')
    writer.writeheader()
    for b in unique_bornes:
        writer.writerow(b)