In [2]:
import pandas as pd
import json

In [23]:
pd.concat([
    totaux[['departement_libelle', 'circo_libelle']],
    votes[['MÉLENCHON', 'LE PEN']].rename(columns=lambda c: 'votes '+c),
    exprimes[['MÉLENCHON', 'LE PEN']].rename(columns=lambda c: 'exprimés '+c)
], axis=1).loc[le_pen].to_csv('contre_lepen.csv', sep=';', encoding='cp1252', decimal=',')

In [24]:
pd.concat([
    totaux[['departement_libelle', 'circo_libelle']],
    votes[['MÉLENCHON', 'MACRON']].rename(columns=lambda c: 'votes '+c),
    exprimes[['MÉLENCHON', 'MACRON']].rename(columns=lambda c: 'exprimés '+c)
], axis=1).loc[macron].to_csv('contre_macron.csv', sep=';', encoding='cp1252', decimal=',')

In [12]:
qualifie = (votes['MÉLENCHON'] / totaux['inscrits']) >= 0.125

In [20]:
force = pd.cut(exprimes['MÉLENCHON'], [.0, .1, .2, .3, 1], labels=['-10%', '10-20%', '20-30%', '+30%'])

In [35]:
pd.concat([exprimes[['MÉLENCHON']], totaux], axis=1).sort_values(['MÉLENCHON'])\
.to_csv('votes_melenchon.csv', sep=';', encoding='cp1252', decimal=',')

In [34]:
pd.concat([totaux[['departement_libelle', 'circo_libelle']], exprimes], axis=1).sort_values(['MÉLENCHON'])\
.to_csv('exprimes.csv', sep=';', encoding='cp1252', decimal=',')

In [37]:
pd.concat([totaux, votes], axis=1).sort_index().to_csv('votes_absolus.csv', sep=';', encoding='cp1252', decimal=',')

In [23]:
topology = json.load(open('data/circos_topo.json'))

In [24]:
for geometry in topology['objects']['circos']['geometries']:
    circo = geometry['properties']['ID']
    geometry['properties']['votes'] = exprimes.loc[circo].to_dict()
    geometry['properties']['rang'] = int(rang.loc[circo])
    geometry['properties']['qualifie'] = bool(qualifie.loc[circo])
    geometry['properties']['force'] = str(force.loc[circo])

In [25]:
json.dump(topology, open('data/circos_exprimes.json', 'w'))

In [26]:
import csv

In [31]:
f = open('data/PR17_BVot_T1_FE.txt', 'r', encoding='latin1')

In [32]:
r = csv.reader(f, delimiter=';')

In [33]:
headers = next(r)

In [51]:
champs_candidats = headers[-7:]
champs_globaux = headers[:-7]

In [44]:
ligne = next(r)

In [54]:
valeurs_candidats = ligne[len(champs_globaux):]

In [58]:
ligne[len(champs_globaux)+2::7]

['DUPONT-AIGNAN',
 'LE PEN',
 'MACRON',
 'HAMON',
 'ARTHAUD',
 'POUTOU',
 'CHEMINADE',
 'LASSALLE',
 'MÉLENCHON',
 'ASSELINEAU',
 'FILLON']

In [88]:
headers_correct = {
    'Code du département': 'departement',
    'Libellé du département': 'departement_libelle',
    'Code de la circonscription': 'circo',
    'Libellé de la circonscription': 'circo_libelle',
    'Code de la commune': 'commune',
    'Libellé de la commune': 'commune_libelle',
    'Code du b.vote': 'bureau',
    'Inscrits': 'inscrits',
    'Abstentions': 'abstentions',
    '% Abs/Ins': None,
    'Votants': 'votants',
    '% Vot/Ins': None,
    'Blancs': 'blancs',
    '% Blancs/Ins': None,
    '% Blancs/Vot': None,
    'Nuls': 'nuls',
    '% Nuls/Ins': None,
    '% Nuls/Vot': None,
    'Exprimés': 'exprimes',
    '% Exp/Ins': None,
    '% Exp/Vot': None,
}

In [123]:
with open('data/PR17_BVot_T1_FE.txt', 'r', encoding='latin1') as brut, open('output/2017_cleaned.csv', 'w') as cleaned:
    r = csv.reader(brut, delimiter=';')
    
    headers = next(r)
    
    global_fields = headers[:-7]
    
    line = next(r)
    candidats = line[len(global_fields)+2::7]
    
    fields = [headers_correct[field] for field in global_fields if headers_correct[field] is not None] + candidats
    global_indices = [i for i, f in enumerate(global_fields) if headers_correct[f] is not None]
    
    f.seek(0)
    next(r)
    
    w = csv.writer(cleaned)
    
    w.writerow(fields)
    
    for line in r:
        noms = line[len(global_fields)+2::7]
        scores = line[len(global_fields)+4::7]
        
        paired_scores = {candidat: score for candidat, score in zip(noms, scores)}
        
        w.writerow([line[i] for i in global_indices] + [paired_scores[candidat] for candidat in candidats])

In [4]:
scores = pd.read_csv('data/2017_cleaned.csv', dtype={'departement': str, 'bureau': str, 'commune': str})

In [5]:
scores[['departement', 'departement_libelle']].drop_duplicates()

Unnamed: 0,departement,departement_libelle
0,01,Ain
587,02,Aisne
1566,03,Allier
2020,04,Alpes-de-Haute-Provence
2281,05,Hautes-Alpes
2492,06,Alpes-Maritimes
3464,07,Ardèche
3900,08,Ardennes
4462,09,Ariège
4845,10,Aube


In [125]:
scores['code'] = scores.departement + scores.circo.map(str).str.pad(3, fillchar='0')

In [126]:
aggregations = {
    'departement': 'first',
    'departement_libelle': 'first',
    'circo': 'first',
    'circo_libelle': 'first',
    'inscrits': 'sum',
    'abstentions': 'sum',
    'votants': 'sum',
    'blancs': 'sum',
    'nuls': 'sum',
    'exprimes': 'sum',
    'DUPONT-AIGNAN': 'sum',
    'LE PEN': 'sum',
    'MACRON': 'sum',
    'HAMON': 'sum',
    'ARTHAUD': 'sum',
    'POUTOU': 'sum',
    'CHEMINADE': 'sum',
    'LASSALLE': 'sum',
    'MÉLENCHON': 'sum',
    'ASSELINEAU': 'sum',
    'FILLON': 'sum',
}

In [127]:
scores.groupby(['code']).agg(aggregations).to_csv('output/2017_par_circo.csv')

In [6]:
geo = json.load(open('raw/circos.json'))

In [9]:
departements = set(f['properties']['code_dpt'] for f in geo['features'])

In [10]:
departements

{'01',
 '02',
 '03',
 '04',
 '05',
 '06',
 '07',
 '08',
 '09',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '2A',
 '2B',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '40',
 '41',
 '42',
 '43',
 '44',
 '45',
 '46',
 '47',
 '48',
 '49',
 '50',
 '51',
 '52',
 '53',
 '54',
 '55',
 '56',
 '57',
 '58',
 '59',
 '60',
 '61',
 '62',
 '63',
 '64',
 '65',
 '66',
 '67',
 '68',
 '69',
 '70',
 '71',
 '72',
 '73',
 '74',
 '75',
 '76',
 '77',
 '78',
 '79',
 '80',
 '81',
 '82',
 '83',
 '84',
 '85',
 '86',
 '87',
 '88',
 '89',
 '90',
 '91',
 '92',
 '93',
 '94',
 '95',
 'ZA',
 'ZB',
 'ZC',
 'ZD',
 'ZM'}