# Exploring presidential candidates endorsements

In [18]:
import json
import csv
import gzip

## Reading the JSON file

In [2]:
with open('../data/parrainages.json', encoding='utf-8') as f:
    ENDORSEMENTS = json.load(f)

In [3]:
ENDORSEMENTS[0]

{'Civilite': 'M.',
 'Nom': 'CORDIVAL',
 'Prenom': 'Gilles',
 'Mandat': 'Maire',
 'Circonscription': 'Mont-Saint-Père',
 'Departement': 'Aisne',
 'Candidat': 'ARTHAUD Nathalie',
 'DatePublication': '2022-02-01T00:00:00'}

In [4]:
len(ENDORSEMENTS)

13427

## Couting & finding distinct candidates

In [5]:
candidate_names = set()

for endorsment in ENDORSEMENTS:
    candidate_names.add(endorsment['Candidat'])

len(candidate_names)

64

## Filtering candidates that could not run

In France, candidates can only run if they could secure at least 500 endorsements.

In [6]:
endorsements_per_candidate = {}

for endorsement in ENDORSEMENTS:
    candidate_name = endorsement['Candidat']
    
    if candidate_name not in endorsements_per_candidate:
        endorsements_per_candidate[candidate_name] = 0
        
    endorsements_per_candidate[candidate_name] += 1

for candidate_name, count in sorted(endorsements_per_candidate.items(), key=lambda item: item[1], reverse=True)[:20]:
    print(candidate_name, count)

PÉCRESSE Valérie 2636
MACRON Emmanuel 2098
HIDALGO Anne 1440
MÉLENCHON Jean-Luc 906
ZEMMOUR Éric 741
JADOT Yannick 712
LASSALLE Jean 642
ROUSSEL Fabien 626
LE PEN Marine 622
DUPONT-AIGNAN Nicolas 600
POUTOU Philippe 596
ARTHAUD Nathalie 576
ASSELINEAU François 293
TAUBIRA Christiane 274
KAZIB Anasse 160
THOUY Hélène 139
KOENIG Gaspard 107
KUZMANOVIC Georges 49
MIGUET Nicolas 40
EGGER Clara 36


In [7]:
from collections import Counter

In [8]:
Counter()['something']

0

In [9]:
endorsements_per_candidate = Counter()

for endorsement in ENDORSEMENTS:
    endorsements_per_candidate[endorsement['Candidat']] += 1
    
endorsements_per_candidate.most_common(10)

[('PÉCRESSE Valérie', 2636),
 ('MACRON Emmanuel', 2098),
 ('HIDALGO Anne', 1440),
 ('MÉLENCHON Jean-Luc', 906),
 ('ZEMMOUR Éric', 741),
 ('JADOT Yannick', 712),
 ('LASSALLE Jean', 642),
 ('ROUSSEL Fabien', 626),
 ('LE PEN Marine', 622),
 ('DUPONT-AIGNAN Nicolas', 600)]

In [10]:
candidates_able_to_run = set()

for name, count in endorsements_per_candidate.items():
    if count >= 500:
        candidates_able_to_run.add(name)
        
candidates_able_to_run, len(candidates_able_to_run)

({'ARTHAUD Nathalie',
  'DUPONT-AIGNAN Nicolas',
  'HIDALGO Anne',
  'JADOT Yannick',
  'LASSALLE Jean',
  'LE PEN Marine',
  'MACRON Emmanuel',
  'MÉLENCHON Jean-Luc',
  'POUTOU Philippe',
  'PÉCRESSE Valérie',
  'ROUSSEL Fabien',
  'ZEMMOUR Éric'},
 12)

## Keeping only relevant endorsements

We are only interested in endorsements for running candidates and by mayors.

In [11]:
mandates = set()

for endorsement in ENDORSEMENTS:
    mandates.add(endorsement['Mandat'])
    
mandates

{'Conseiller de Paris',
 'Conseiller départemental',
 'Conseiller métropolitain de Lyon',
 'Conseiller régional',
 "Conseiller à l'Assemblée des Français de l'étranger",
 'Conseillère de Paris',
 'Conseillère départementale',
 'Conseillère métropolitaine de Lyon',
 'Conseillère régionale',
 "Conseillère à l'Assemblée des Français de l'étranger",
 'Député',
 'Députée',
 'Maire',
 "Maire d'arrondissement",
 "Maire délégué d'une commune associée ou d'une commune déléguée",
 "Maire déléguée d'une commune associée ou d'une commune déléguée",
 "Membre d'une assemblée d'une collectivité territoriale d'outre-mer à statut particulier",
 "Membre de l'Assemblée de Corse",
 "Président d'un EPCI à fiscalité propre",
 'Président de la Polynésie française',
 'Président du Conseil exécutif de Martinique',
 'Président du conseil consulaire',
 'Président du gouvernement de la Nouvelle-Calédonie',
 "Présidente d'un EPCI à fiscalité propre",
 'Présidente du conseil consulaire',
 'Représentant français au 

In [12]:
RELEVANT_ENDORSEMENTS = []

for endorsement in ENDORSEMENTS:
    if endorsement['Candidat'] in candidates_able_to_run and endorsement['Mandat'].startswith('Maire'):
        RELEVANT_ENDORSEMENTS.append(endorsement)

In [13]:
len(ENDORSEMENTS), len(RELEVANT_ENDORSEMENTS)

(13427, 8117)

## Computing per-candidate gender ratios

In [14]:
genders_per_candidates = {}

for endorsement in RELEVANT_ENDORSEMENTS:
    mayor_is_a_man = endorsement['Civilite'] == 'M.'
    candidate_name = endorsement['Candidat']
    
    if candidate_name not in genders_per_candidates:
        genders_per_candidates[candidate_name] = {'man': 0, 'woman': 0}
        
    gender_key = 'woman'
    
    if mayor_is_a_man:
        gender_key = 'man'
        
    genders_per_candidates[candidate_name][gender_key] += 1
    
genders_per_candidates

{'ARTHAUD Nathalie': {'man': 463, 'woman': 112},
 'DUPONT-AIGNAN Nicolas': {'man': 531, 'woman': 60},
 'HIDALGO Anne': {'man': 565, 'woman': 147},
 'JADOT Yannick': {'man': 253, 'woman': 59},
 'LASSALLE Jean': {'man': 566, 'woman': 57},
 'LE PEN Marine': {'man': 263, 'woman': 50},
 'MACRON Emmanuel': {'man': 1110, 'woman': 186},
 'MÉLENCHON Jean-Luc': {'man': 586, 'woman': 112},
 'PÉCRESSE Valérie': {'man': 1161, 'woman': 212},
 'POUTOU Philippe': {'man': 451, 'woman': 94},
 'ROUSSEL Fabien': {'man': 346, 'woman': 62},
 'ZEMMOUR Éric': {'man': 595, 'woman': 76}}

In [15]:
gender_ratios_per_candidates = {}

for name, counters in genders_per_candidates.items():
    total = counters['woman'] + counters['man']
    ratio = counters['woman'] / total
    gender_ratios_per_candidates[name] = ratio
    
for name, ratio in gender_ratios_per_candidates.items():
    print(name, ratio * 100)

ARTHAUD Nathalie 19.47826086956522
DUPONT-AIGNAN Nicolas 10.152284263959391
HIDALGO Anne 20.646067415730336
JADOT Yannick 18.91025641025641
LASSALLE Jean 9.149277688603531
LE PEN Marine 15.974440894568689
MACRON Emmanuel 14.351851851851851
MÉLENCHON Jean-Luc 16.045845272206304
PÉCRESSE Valérie 15.440640932265111
POUTOU Philippe 17.24770642201835
ROUSSEL Fabien 15.196078431372548
ZEMMOUR Éric 11.326378539493295


## Reading the mayor 2020 election file

In [26]:
with gzip.open('../data/municipale2020.csv.gz', 'rt', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    
    MAYOR_CANDIDATES = []
    
    for row in reader:
        MAYOR_CANDIDATES.append(row)
        
len(MAYOR_CANDIDATES)

902494

In [27]:
MAYOR_CANDIDATES[0]

OrderedDict([('Code du département', '01'),
             ('Libellé du département', 'Ain'),
             ('Code commune', '01004'),
             ('Libellé commune', 'Ambérieu-en-Bugey'),
             ('N° Panneau Liste', '1'),
             ('Libellé abrégé liste', 'UNIS POUR AMBERIEU'),
             ('Libellé Etendu Liste', 'UNIS POUR AMBERIEU'),
             ('Nuance Liste', 'LDVC'),
             ('N° candidat', '1'),
             ('Sexe candidat', 'M'),
             ('Nom candidat', 'FABRE'),
             ('Prénom candidat', 'Daniel'),
             ('Nationalité', 'Française'),
             ('Candidat au conseil communautaire', 'O')])

In [55]:
Counter(row['Nuance Liste'] for row in MAYOR_CANDIDATES).most_common(10)

[('', 564996),
 ('LDVG', 74339),
 ('LDVD', 72617),
 ('LDIV', 49631),
 ('LDVC', 40296),
 ('LRN', 14408),
 ('LEXG', 13837),
 ('LLR', 10990),
 ('LUG', 10902),
 ('LREM', 9509)]

In [33]:
def mayor_candidate_key(row):
    return row['Prénom candidat'].lower() + ' ' + row['Nom candidat'].lower() + ' ' + row['Libellé du département'].lower()

mayor_candidate_key(MAYOR_CANDIDATES[0])

'daniel fabre ain'

In [34]:
naming_collisions = Counter()

for row in MAYOR_CANDIDATES:
    naming_collisions[mayor_candidate_key(row)] += 1

for name, count in naming_collisions.most_common(10):
    print(name, count)

daniel muller moselle 5
isabelle petit pas-de-calais 5
pierre weber bas-rhin 5
michel schmitt bas-rhin 5
frédéric meyer bas-rhin 5
philippe lefevre aisne 4
alain marie calvados 4
catherine marie calvados 4
yvon briand côtes-d'armor 4
christophe muller moselle 4


In [36]:
def endorsement_key(row):
    return row['Prenom'].lower() + ' ' + row['Nom'].lower() + ' ' + row['Departement'].lower()

endorsement_key(RELEVANT_ENDORSEMENTS[0])

'gilles cordival aisne'

In [40]:
# Indexation
RELEVANT_ENDORSEMENTS_INDEX = {}

for endorsement in RELEVANT_ENDORSEMENTS:
    RELEVANT_ENDORSEMENTS_INDEX[endorsement_key(endorsement)] = endorsement
    
len(RELEVANT_ENDORSEMENTS_INDEX), len(RELEVANT_ENDORSEMENTS)

(8116, 8117)

In [56]:
for row in MAYOR_CANDIDATES:
    key = mayor_candidate_key(row)
    
    match = RELEVANT_ENDORSEMENTS_INDEX.get(key)
    
    if match is not None and row['Nuance Liste'] != '':
        match['Political Leaning'] = row['Nuance Liste']

In [58]:
sum(1 if 'Political Leaning' not in row or row['Political Leaning'] == '' else 0 for row in RELEVANT_ENDORSEMENTS)

6653

In [60]:
POLITICAL_ENDORSMENTS = []

for endorsement in RELEVANT_ENDORSEMENTS:
    if endorsement.get('Political Leaning'):
        POLITICAL_ENDORSMENTS.append(endorsement)
        
len(POLITICAL_ENDORSMENTS)

1464

In [61]:
len(RELEVANT_ENDORSEMENTS) - 6653

1464

In [62]:
Counter(row['Political Leaning'] for row in POLITICAL_ENDORSMENTS).most_common()

[('LDVD', 386),
 ('LDVG', 349),
 ('LDVC', 147),
 ('LLR', 132),
 ('LDIV', 129),
 ('LUG', 94),
 ('LSOC', 69),
 ('LCOM', 42),
 ('LUD', 35),
 ('LVEC', 16),
 ('LUDI', 16),
 ('LREM', 13),
 ('LUC', 10),
 ('LRN', 9),
 ('LECO', 5),
 ('LEXD', 4),
 ('LREG', 4),
 ('LMDM', 3),
 ('LFI', 1)]

In [71]:
with open('political-endorsements.csv', 'w', encoding='utf-8', newline='') as f:
    fieldnames = list(POLITICAL_ENDORSMENTS[0].keys())
    writer = csv.DictWriter(f, fieldnames)
    writer.writeheader()
    
    for endorsement in POLITICAL_ENDORSMENTS:
        writer.writerow(endorsement)

In [79]:
GROUPS = {
    "LDVD": "right",
    "LDVG": "left",
    "LDVC": "center",
    "LLR": "right",
    "LDIV": "center",
    "LUG": "left",
    "LSOC": "left",
    "LCOM": "left",
}

In [80]:
POLITICAL_ENDORSMENTS[0]

{'Civilite': 'M.',
 'Nom': 'THOMAS',
 'Prenom': 'Jean-Jacques',
 'Mandat': 'Maire',
 'Circonscription': 'Hirson',
 'Departement': 'Aisne',
 'Candidat': 'HIDALGO Anne',
 'DatePublication': '2022-02-01T00:00:00',
 'Political Leaning': 'LSOC'}