# Exploring presidential candidates endorsements

In [1]:
import json

## Reading and parsing the JSON file

In [2]:
with open(r"../data/parrainages.json", encoding="utf-8") as f:
    ENDORSEMENTS = json.load(f)

In [3]:
ENDORSEMENTS[0]

{'Civilite': 'M.',
 'Nom': 'CORDIVAL',
 'Prenom': 'Gilles',
 'Mandat': 'Maire',
 'Circonscription': 'Mont-Saint-Père',
 'Departement': 'Aisne',
 'Candidat': 'ARTHAUD Nathalie',
 'DatePublication': '2022-02-01T00:00:00'}

In [4]:
len(ENDORSEMENTS)

13427

## Find information about candidates

### Distinct number of candidates

In [5]:
distinct_candidates = set()

for endorsement in ENDORSEMENTS:
    candidat = endorsement['Candidat']
    distinct_candidates.add(candidat)

len(distinct_candidates)

64

### Endorsements per candidates (the hard way)

In [6]:
endorsement_per_candidate = {}

for endorsement in ENDORSEMENTS:
    candidat = endorsement['Candidat']

    if candidat not in endorsement_per_candidate:
        endorsement_per_candidate[candidat] = 1
    else:
        endorsement_per_candidate[candidat] += 1

len(endorsement_per_candidate)

64

In [7]:
relevant_candidates = []

for name, count in endorsement_per_candidate.items():
    if count >= 400:
        relevant_candidates.append((name, count))

len(relevant_candidates)

12

In [8]:
sorted(relevant_candidates, key=lambda record: record[1], reverse=True)

[('PÉCRESSE Valérie', 2636),
 ('MACRON Emmanuel', 2098),
 ('HIDALGO Anne', 1440),
 ('MÉLENCHON Jean-Luc', 906),
 ('ZEMMOUR Éric', 741),
 ('JADOT Yannick', 712),
 ('LASSALLE Jean', 642),
 ('ROUSSEL Fabien', 626),
 ('LE PEN Marine', 622),
 ('DUPONT-AIGNAN Nicolas', 600),
 ('POUTOU Philippe', 596),
 ('ARTHAUD Nathalie', 576)]

### Endorsements per candidates (the lazy way)

In [9]:
from collections import Counter

In [10]:
counter = Counter()
counter["i do not exists"]

0

In [11]:
endorsement_per_candidate = Counter()

for endorsement in ENDORSEMENTS:
    endorsement_per_candidate[endorsement["Candidat"]] += 1

len(endorsement_per_candidate)

64

In [12]:
for name, count in endorsement_per_candidate.most_common():
    if count < 500:
        break
    print(name, count)

PÉCRESSE Valérie 2636
MACRON Emmanuel 2098
HIDALGO Anne 1440
MÉLENCHON Jean-Luc 906
ZEMMOUR Éric 741
JADOT Yannick 712
LASSALLE Jean 642
ROUSSEL Fabien 626
LE PEN Marine 622
DUPONT-AIGNAN Nicolas 600
POUTOU Philippe 596
ARTHAUD Nathalie 576


### Finding the candidates who did run

In [13]:
candidates_who_ran = set()

for name, count in endorsement_per_candidate.most_common():
    if count < 500:
        break

    candidates_who_ran.add(name)

candidates_who_ran

{'ARTHAUD Nathalie',
 'DUPONT-AIGNAN Nicolas',
 'HIDALGO Anne',
 'JADOT Yannick',
 'LASSALLE Jean',
 'LE PEN Marine',
 'MACRON Emmanuel',
 'MÉLENCHON Jean-Luc',
 'POUTOU Philippe',
 'PÉCRESSE Valérie',
 'ROUSSEL Fabien',
 'ZEMMOUR Éric'}

### Analyzing mandates

In [14]:
distinct_mandates = set()

for endorsement in ENDORSEMENTS:
    distinct_mandates.add(endorsement['Mandat'])

# Same as the above
distinct_mandates = set(endorsement['Mandat'] for endorsement in ENDORSEMENTS)
distinct_mandates

{'Conseiller de Paris',
 'Conseiller départemental',
 'Conseiller métropolitain de Lyon',
 'Conseiller régional',
 "Conseiller à l'Assemblée des Français de l'étranger",
 'Conseillère de Paris',
 'Conseillère départementale',
 'Conseillère métropolitaine de Lyon',
 'Conseillère régionale',
 "Conseillère à l'Assemblée des Français de l'étranger",
 'Député',
 'Députée',
 'Maire',
 "Maire d'arrondissement",
 "Maire délégué d'une commune associée ou d'une commune déléguée",
 "Maire déléguée d'une commune associée ou d'une commune déléguée",
 "Membre d'une assemblée d'une collectivité territoriale d'outre-mer à statut particulier",
 "Membre de l'Assemblée de Corse",
 "Président d'un EPCI à fiscalité propre",
 'Président de la Polynésie française',
 'Président du Conseil exécutif de Martinique',
 'Président du conseil consulaire',
 'Président du gouvernement de la Nouvelle-Calédonie',
 "Présidente d'un EPCI à fiscalité propre",
 'Présidente du conseil consulaire',
 'Représentant français au 

### Filtering the relevant endorsements

In [15]:
RELEVANT_ENDORSEMENTS = []

for endorsement in ENDORSEMENTS:
    if (
        endorsement["Candidat"] in candidates_who_ran and
        endorsement["Mandat"].startswith("Maire")
    ):
        RELEVANT_ENDORSEMENTS.append(endorsement)

len(RELEVANT_ENDORSEMENTS) / len(ENDORSEMENTS)

0.6045281894689805

In [16]:
len(RELEVANT_ENDORSEMENTS)

8117

### Exploring gender ratios

#### Total ratios

In [17]:
women_count = 0
men_count = 0

for endorsement in RELEVANT_ENDORSEMENTS:
    if endorsement['Civilite'] == 'Mme':
        women_count += 1
        # women_count = women_count + 1
    else:
        men_count += 1

women_ratio = women_count / len(RELEVANT_ENDORSEMENTS)
men_ratio = men_count / len(RELEVANT_ENDORSEMENTS)

women_ratio, men_ratio, women_ratio + men_ratio

(0.1511642232351854, 0.8488357767648146, 1.0)

#### Breakdown by candidate

In [18]:
# Maps candidate names to dictionaries of data: {"women_count": int, "total_count": int}
gender_ratios = {}

for endorsement in RELEVANT_ENDORSEMENTS:
    is_woman = endorsement['Civilite'] == 'Mme'
    candidate = endorsement['Candidat']

    if candidate in gender_ratios:
        if is_woman:
            gender_ratios[candidate]["women_count"] += 1
            
        gender_ratios[candidate]["total_count"] += 1
    else:
        gender_ratios[candidate] = {
            "women_count": 1 if is_woman else 0,
            "total_count": 1
        }

#### Report

In [19]:
def percentage(ratio):
    # NOTE: we multiply the 0-1 ratio by 100, we transform it into a string
    # then we take a slice of the 6 first characters of the string
    return str(ratio * 100)[:5] + '%'

print('Total women ratio is:', percentage(women_ratio))
print('National ratio is: 19.8% (as per: https://www.collectivites-locales.gouv.fr/sites/default/files/Accueil/Etudes%20et%20statistiques/Documents%20de%20synth%C3%A8se/BIS/2020/bis_ndeg145_proportion_de_femmes_dans_les_cm_2020.pdf)')

for name, data in gender_ratios.items():
    print()
    print(name)
    print('-' * len(name))
    print('Number of woman mayor:', data["women_count"])
    print('Total:', data["total_count"])
    print('Ratio:', percentage(data["women_count"] / data["total_count"]))
    print('Delta:', percentage(data["women_count"] / data["total_count"] - women_ratio))

Total women ratio is: 15.11%
National ratio is: 19.8% (as per: https://www.collectivites-locales.gouv.fr/sites/default/files/Accueil/Etudes%20et%20statistiques/Documents%20de%20synth%C3%A8se/BIS/2020/bis_ndeg145_proportion_de_femmes_dans_les_cm_2020.pdf)

ARTHAUD Nathalie
----------------
Number of woman mayor: 112
Total: 575
Ratio: 19.47%
Delta: 4.361%

DUPONT-AIGNAN Nicolas
---------------------
Number of woman mayor: 60
Total: 591
Ratio: 10.15%
Delta: -4.96%

HIDALGO Anne
------------
Number of woman mayor: 147
Total: 712
Ratio: 20.64%
Delta: 5.529%

JADOT Yannick
-------------
Number of woman mayor: 59
Total: 312
Ratio: 18.91%
Delta: 3.793%

LASSALLE Jean
-------------
Number of woman mayor: 57
Total: 623
Ratio: 9.149%
Delta: -5.96%

LE PEN Marine
-------------
Number of woman mayor: 50
Total: 313
Ratio: 15.97%
Delta: 0.858%

MACRON Emmanuel
---------------
Number of woman mayor: 186
Total: 1296
Ratio: 14.35%
Delta: -0.76%

MÉLENCHON Jean-Luc
------------------
Number of woman mayo

## Joining the mayor election file

In [20]:
import gzip
import csv

In [21]:
MAYORS = []

with gzip.open(r'../data/municipale2020.csv.gz', encoding='utf-8', mode='rt') as f:
    reader = csv.DictReader(f)

    for row in reader:
        MAYORS.append(row)

len(MAYORS)

902494

In [22]:
for k, v in MAYORS[0].items():
    print(k, '->', v)

Code du département -> 01
Libellé du département -> Ain
Code commune -> 01004
Libellé commune -> Ambérieu-en-Bugey
N° Panneau Liste -> 1
Libellé abrégé liste -> UNIS POUR AMBERIEU
Libellé Etendu Liste -> UNIS POUR AMBERIEU
Nuance Liste -> LDVC
N° candidat -> 1
Sexe candidat -> M
Nom candidat -> FABRE
Prénom candidat -> Daniel
Nationalité -> Française
Candidat au conseil communautaire -> O


### Finding duplicate names

In [23]:
from collections import Counter

In [24]:
duplicate_names = Counter()

for row in MAYORS:
    key = row['Prénom candidat'] + ' ' + row['Nom candidat'] + ' ' + row['Libellé du département']

    duplicate_names[key] += 1

duplicate_names.most_common(10)

[('Daniel MULLER Moselle', 5),
 ('Isabelle PETIT Pas-de-Calais', 5),
 ('Pierre WEBER Bas-Rhin', 5),
 ('Michel SCHMITT Bas-Rhin', 5),
 ('Frédéric MEYER Bas-Rhin', 5),
 ('Philippe LEFEVRE Aisne', 4),
 ('Alain MARIE Calvados', 4),
 ('Catherine MARIE Calvados', 4),
 ("Yvon BRIAND Côtes-d'Armor", 4),
 ('Christophe MULLER Moselle', 4)]

### Designing our keys for each file

In [25]:
print(ENDORSEMENTS[0])

def endorsement_key(row):
    return row['Prenom'] + ' ' + row['Nom'] + ' ' + row['Departement']

endorsement_key(ENDORSEMENTS[0])

{'Civilite': 'M.', 'Nom': 'CORDIVAL', 'Prenom': 'Gilles', 'Mandat': 'Maire', 'Circonscription': 'Mont-Saint-Père', 'Departement': 'Aisne', 'Candidat': 'ARTHAUD Nathalie', 'DatePublication': '2022-02-01T00:00:00'}


'Gilles CORDIVAL Aisne'

In [26]:
print(MAYORS[0])

def mayor_key(row):
    return row['Prénom candidat'] + ' ' + row['Nom candidat'] + ' ' + row['Libellé du département']

mayor_key(MAYORS[0])

{'Code du département': '01', 'Libellé du département': 'Ain', 'Code commune': '01004', 'Libellé commune': 'Ambérieu-en-Bugey', 'N° Panneau Liste': '1', 'Libellé abrégé liste': 'UNIS POUR AMBERIEU', 'Libellé Etendu Liste': 'UNIS POUR AMBERIEU', 'Nuance Liste': 'LDVC', 'N° candidat': '1', 'Sexe candidat': 'M', 'Nom candidat': 'FABRE', 'Prénom candidat': 'Daniel', 'Nationalité': 'Française', 'Candidat au conseil communautaire': 'O'}


'Daniel FABRE Ain'

### Indexing the endorsements

In [27]:
# NOTE: it will map endorsement keys to endorsements
ENDORSEMENT_INDEX = {}

for endorsement in RELEVANT_ENDORSEMENTS:
    key = endorsement_key(endorsement)
    ENDORSEMENT_INDEX[key] = endorsement

len(ENDORSEMENT_INDEX), len(RELEVANT_ENDORSEMENTS)

(8116, 8117)

In [28]:
ENDORSEMENT_INDEX[endorsement_key(ENDORSEMENTS[9])]

{'Civilite': 'M.',
 'Nom': 'RUIZ',
 'Prenom': 'Albert',
 'Mandat': "Maire délégué d'une commune associée ou d'une commune déléguée",
 'Circonscription': 'Bourmont-entre-Meuse-et-Mouzon',
 'Departement': 'Haute-Marne',
 'Candidat': 'ARTHAUD Nathalie',
 'DatePublication': '2022-02-01T00:00:00'}

In [29]:
distinct_nuance_list = Counter()

for mayor in MAYORS:
    distinct_nuance_list[mayor['Nuance Liste']] += 1

distinct_nuance_list.most_common()

[('', 564996),
 ('LDVG', 74339),
 ('LDVD', 72617),
 ('LDIV', 49631),
 ('LDVC', 40296),
 ('LRN', 14408),
 ('LEXG', 13837),
 ('LLR', 10990),
 ('LUG', 10902),
 ('LREM', 9509),
 ('LVEC', 7299),
 ('LSOC', 6282),
 ('LECO', 6232),
 ('LCOM', 4317),
 ('LFI', 3197),
 ('LUC', 3113),
 ('LUD', 2917),
 ('LREG', 2674),
 ('LUDI', 1991),
 ('LEXD', 1269),
 ('LMDM', 713),
 ('LDLF', 420),
 ('LGJ', 362),
 ('LRDG', 183)]

### Performing the match

In [31]:
matches = 0

for mayor in MAYORS:
    key = mayor_key(mayor)

    match = ENDORSEMENT_INDEX.get(key)

    if match is not None:
        match['political_leaning'] = mayor['Nuance Liste']
        matches += 1

matches / len(RELEVANT_ENDORSEMENTS)

0.9906369348281385

In [33]:
RELEVANT_ENDORSEMENTS[457]

{'Civilite': 'M.',
 'Nom': 'GERAUDIE',
 'Prenom': 'Ludovic',
 'Mandat': 'Maire',
 'Circonscription': 'Le Palais-sur-Vienne',
 'Departement': 'Haute-Vienne',
 'Candidat': 'HIDALGO Anne',
 'DatePublication': '2022-02-03T00:00:00',
 'political_leaning': 'LDVG'}

## Analyzing the political leanings of endorsements per candidate

In [35]:
GROUPS = {
    "LDVD": "right",
    "LDVG": "left",
    "LDVC": "center",
    "LLR": "right",
    "LDIV": "center",
    "LUG": "left",
    "LSOC": "left",
    "LCOM": "left",
    "LUD": "right",
    "LUDI": "right",
    "LREM": "center",
    "LRN": "right"
}

In [44]:
POLITICAL_ENDORSEMENTS = []

for endorsement in RELEVANT_ENDORSEMENTS:
    if 'political_leaning' not in endorsement:
        continue

    leaning = endorsement['political_leaning']

    # Same as: if leaning == '':
    if not leaning:
        continue

    group = GROUPS.get(leaning)

    if group is None:
        continue

    endorsement['group'] = group
    POLITICAL_ENDORSEMENTS.append(endorsement)

len(POLITICAL_ENDORSEMENTS) / len(RELEVANT_ENDORSEMENTS)

0.1709991376124184

In [45]:
POLITICAL_ENDORSEMENTS[0]

{'Civilite': 'M.',
 'Nom': 'THOMAS',
 'Prenom': 'Jean-Jacques',
 'Mandat': 'Maire',
 'Circonscription': 'Hirson',
 'Departement': 'Aisne',
 'Candidat': 'HIDALGO Anne',
 'DatePublication': '2022-02-01T00:00:00',
 'political_leaning': 'LSOC',
 'group': 'left'}

In [47]:
# This dict will map candidate names to {'left': int, 'center': int, 'right': int}
political_ratios = {}

for endorsement in POLITICAL_ENDORSEMENTS:
    candidate = endorsement['Candidat']
    group = endorsement['group']

    if candidate not in political_ratios:
        # we need to create the entry
        political_ratios[candidate] = {"left": 0, "center": 0, "right": 0}

    political_ratios[candidate][group] += 1

political_ratios

{'HIDALGO Anne': {'left': 258, 'center': 5, 'right': 0},
 'MACRON Emmanuel': {'left': 48, 'center': 140, 'right': 75},
 'PÉCRESSE Valérie': {'left': 2, 'center': 50, 'right': 402},
 'ROUSSEL Fabien': {'left': 111, 'center': 3, 'right': 0},
 'ARTHAUD Nathalie': {'left': 11, 'center': 15, 'right': 2},
 'LE PEN Marine': {'left': 0, 'center': 6, 'right': 17},
 'MÉLENCHON Jean-Luc': {'left': 60, 'center': 18, 'right': 13},
 'ZEMMOUR Éric': {'left': 1, 'center': 8, 'right': 30},
 'DUPONT-AIGNAN Nicolas': {'left': 1, 'center': 11, 'right': 9},
 'JADOT Yannick': {'left': 31, 'center': 14, 'right': 4},
 'LASSALLE Jean': {'left': 3, 'center': 2, 'right': 7},
 'POUTOU Philippe': {'left': 11, 'center': 10, 'right': 10}}

In [51]:
# Printing a nice report
for name, data in political_ratios.items():
    total = data['left'] + data['right'] + data['center']
    
    print()
    print(name)
    print('-' * len(name))
    print('Left:', percentage(data['left'] / total))
    print('Center:', percentage(data['center'] / total))
    print('Right:', percentage(data['right'] / total))


HIDALGO Anne
------------
Left: 98.09%
Center: 1.901%
Right: 0.0%

MACRON Emmanuel
---------------
Left: 18.25%
Center: 53.23%
Right: 28.51%

PÉCRESSE Valérie
----------------
Left: 0.440%
Center: 11.01%
Right: 88.54%

ROUSSEL Fabien
--------------
Left: 97.36%
Center: 2.631%
Right: 0.0%

ARTHAUD Nathalie
----------------
Left: 39.28%
Center: 53.57%
Right: 7.142%

LE PEN Marine
-------------
Left: 0.0%
Center: 26.08%
Right: 73.91%

MÉLENCHON Jean-Luc
------------------
Left: 65.93%
Center: 19.78%
Right: 14.28%

ZEMMOUR Éric
------------
Left: 2.564%
Center: 20.51%
Right: 76.92%

DUPONT-AIGNAN Nicolas
---------------------
Left: 4.761%
Center: 52.38%
Right: 42.85%

JADOT Yannick
-------------
Left: 63.26%
Center: 28.57%
Right: 8.163%

LASSALLE Jean
-------------
Left: 25.0%
Center: 16.66%
Right: 58.33%

POUTOU Philippe
---------------
Left: 35.48%
Center: 32.25%
Right: 32.25%


## Writing the results on disk as data

In [60]:
with open("relevant-endorsements.json", "w") as f:
    json.dump(RELEVANT_ENDORSEMENTS, f, indent=2)

In [59]:
with open("relevant-endorsements.json") as f:
    print(json.load(f)[0])

{'Civilite': 'M.', 'Nom': 'CORDIVAL', 'Prenom': 'Gilles', 'Mandat': 'Maire', 'Circonscription': 'Mont-Saint-Père', 'Departement': 'Aisne', 'Candidat': 'ARTHAUD Nathalie', 'DatePublication': '2022-02-01T00:00:00', 'political_leaning': ''}
