# Exploring presidential candidate endorsements

In [1]:
import json
import csv
from collections import Counter

## 1. Loading the endorsement JSON file

In [2]:
# NOTE: if you are using windows, you need to mind two additional things:
#  1. You must double-escape your antislashes likewise: C:\\path\\to\\thing, else it will trigger syntax errors
#  2. You will need to pass the `encoding` kwarg set to "utf8" to the `open` function
with open('../data/parrainages.json') as f:
    ENDORSEMENTS_RAW_DATA = json.load(f)

In [3]:
ENDORSEMENTS_RAW_DATA[0]

{'Civilite': 'M.',
 'Nom': 'CORDIVAL',
 'Prenom': 'Gilles',
 'Mandat': 'Maire',
 'Circonscription': 'Mont-Saint-Père',
 'Departement': 'Aisne',
 'Candidat': 'ARTHAUD Nathalie',
 'DatePublication': '2022-02-01T00:00:00'}

In [4]:
len(ENDORSEMENTS_RAW_DATA), type(ENDORSEMENTS_RAW_DATA)

(13427, list)

In [5]:
# Reshaping the data
ENDORSEMENTS_DATA = []

for record in ENDORSEMENTS_RAW_DATA:
    ENDORSEMENTS_DATA.append({
        'name': record['Nom'],
        'surname': record['Prenom'],
        'mandate': record['Mandat'],
        'department': record['Departement'],
        'endorsement_date': record['DatePublication'],
        'gender': 'male' if record['Civilite'] == 'M.' else 'female',
        'candidate': record['Candidat']
    })

In [6]:
ENDORSEMENTS_DATA[0]

{'name': 'CORDIVAL',
 'surname': 'Gilles',
 'mandate': 'Maire',
 'department': 'Aisne',
 'endorsement_date': '2022-02-01T00:00:00',
 'gender': 'male',
 'candidate': 'ARTHAUD Nathalie'}

### Some stats about gender and candidates

In [7]:
gender_stats = {}

for record in ENDORSEMENTS_DATA:
    gender = record['gender']
    
    if gender in gender_stats:
        gender_stats[gender] += 1 # same as `gender_stats[gender] = gender_stats[gender] + 1`
    else:
        gender_stats[gender] = 1

In [8]:
gender_stats

{'male': 9889, 'female': 3538}

In [9]:
def ratio(d, n):
    ratios = {}
    
    for k, v in d.items():
        ratios[k] = v / n
        
    return ratios

In [10]:
ratio(gender_stats, len(ENDORSEMENTS_DATA))

{'male': 0.7365010799136069, 'female': 0.2634989200863931}

In [11]:
candidates_stats = Counter()

for record in ENDORSEMENTS_DATA:
    candidate = record['candidate']
    candidates_stats[candidate] += 1

In [12]:
sorted(candidates_stats.items(), key=lambda x: x[1], reverse=True)[:15]

[('PÉCRESSE Valérie', 2636),
 ('MACRON Emmanuel', 2098),
 ('HIDALGO Anne', 1440),
 ('MÉLENCHON Jean-Luc', 906),
 ('ZEMMOUR Éric', 741),
 ('JADOT Yannick', 712),
 ('LASSALLE Jean', 642),
 ('ROUSSEL Fabien', 626),
 ('LE PEN Marine', 622),
 ('DUPONT-AIGNAN Nicolas', 600),
 ('POUTOU Philippe', 596),
 ('ARTHAUD Nathalie', 576),
 ('ASSELINEAU François', 293),
 ('TAUBIRA Christiane', 274),
 ('KAZIB Anasse', 160)]

In [13]:
candidates_stats.most_common(5)

[('PÉCRESSE Valérie', 2636),
 ('MACRON Emmanuel', 2098),
 ('HIDALGO Anne', 1440),
 ('MÉLENCHON Jean-Luc', 906),
 ('ZEMMOUR Éric', 741)]

In [14]:
# Generator comprehension
Counter(record['candidate'] for record in ENDORSEMENTS_DATA).most_common(5)

[('PÉCRESSE Valérie', 2636),
 ('MACRON Emmanuel', 2098),
 ('HIDALGO Anne', 1440),
 ('MÉLENCHON Jean-Luc', 906),
 ('ZEMMOUR Éric', 741)]

In [15]:
# List comprehension
len([record for record in ENDORSEMENTS_DATA if record['gender'] == 'female'])

3538

In [16]:
# Same as above
female_endorsements = []

for record in ENDORSEMENTS_DATA:
    if record['gender'] == 'female':
        female_endorsements.append(record)

len(female_endorsements)

3538

### Goals so we can keep only the relevant endorsements:
  1. Find the proportion of mayors endorsements
  2. Find the running candidates (> 500 endorsements)
  3. Filter out irrelevant endorsements:
      - From non running candidates
      - Endorsements from non mayors

In [17]:
# The mandate value is usually messy and "Maire" only won't cut it
Counter(record['mandate'] for record in ENDORSEMENTS_DATA).most_common(10)

[('Maire', 8712),
 ('Conseillère départementale', 854),
 ('Conseiller départemental', 740),
 ('Conseillère régionale', 581),
 ('Conseiller régional', 511),
 ("Maire délégué d'une commune associée ou d'une commune déléguée", 345),
 ('Député', 309),
 ('Députée', 221),
 ('Sénateur', 191),
 ("Membre d'une assemblée d'une collectivité territoriale d'outre-mer à statut particulier",
  150)]

In [18]:
# Some reminders about substring tests in python
string = 'Hello my friend'
'friend' in string

True

In [19]:
string.lower()

'hello my friend'

In [20]:
'maire' in string.lower()

False

In [21]:
# Defining a function to have a fuzzy condition of mayor
def is_endorsement_mayor(s):
    return 'maire' in s.lower()

In [22]:
# snake_case
# camelCase
# kebab-case
# CapitalizedCase
# CONSTANT_CASE

In [23]:
# Mayor ratio
mayor_endorsement_count = 0

for record in ENDORSEMENTS_DATA:
    if is_endorsement_mayor(record['mandate']):
        mayor_endorsement_count += 1

str(int((mayor_endorsement_count / len(ENDORSEMENTS_DATA)) * 100)) + '%'

'68%'

In [24]:
running_candidates = []
for name, endorsements_count in candidates_stats.items():
    if endorsements_count >= 500:
        running_candidates.append(name)
        
print('The running candidates are:')
for name in running_candidates:
    print('  -', name)
print()
print('We have', len(running_candidates), 'of them')

The running candidates are:
  - ARTHAUD Nathalie
  - DUPONT-AIGNAN Nicolas
  - HIDALGO Anne
  - JADOT Yannick
  - LASSALLE Jean
  - LE PEN Marine
  - MACRON Emmanuel
  - MÉLENCHON Jean-Luc
  - PÉCRESSE Valérie
  - POUTOU Philippe
  - ROUSSEL Fabien
  - ZEMMOUR Éric

We have 12 of them


In [25]:
# Test that something exists in a list
'MACRON Emmanuel' in running_candidates, 'PLIQUE Guillaume' in running_candidates

(True, False)

In [26]:
RELEVANT_ENDORSEMENTS_DATA = []

for record in ENDORSEMENTS_DATA:
    if (
        is_endorsement_mayor(record['mandate']) and
        record['candidate'] in running_candidates
    ):
        RELEVANT_ENDORSEMENTS_DATA.append(record)

len(RELEVANT_ENDORSEMENTS_DATA) / len(ENDORSEMENTS_DATA)

0.6045281894689805

In [27]:
# Updated mayor ratio
mayor_endorsement_count = 0

for record in RELEVANT_ENDORSEMENTS_DATA:
    if is_endorsement_mayor(record['mandate']):
        mayor_endorsement_count += 1

str(int((mayor_endorsement_count / len(ENDORSEMENTS_DATA)) * 100)) + '%'

'60%'

## 2. Loading the RNE CSV file

In [28]:
# `iso8859` and `latin1` are the same encoding but the former works on most windows, not the latter...
with open('../data/rne-maires.csv', encoding='iso8859') as f:
    reader = csv.DictReader(f, delimiter=';')
    RNE = list(reader)

In [29]:
len(RNE)

34921

In [30]:
RNE[0]

{'Code du département': '01',
 'Libellé du département': 'Ain',
 'Code de la collectivité à statut particulier': '',
 'Libellé de la collectivité à statut particulier': '',
 'Code de la commune': '01001',
 'Libellé de la commune': "L'Abergement-Clémenciat",
 "Nom de l'élu": 'BOULON',
 "Prénom de l'élu": 'Daniel',
 'Code sexe': 'M',
 'Date de naissance': '04/03/1951',
 'Code de la catégorie socio-professionnelle': '74',
 'Libellé de la catégorie socio-professionnelle': 'Ancien cadre',
 'Date de début du mandat': '18/05/2020',
 'Date de début de la fonction': '26/05/2020'}

In [31]:
def endorsement_key(endorsement):
    return (
        endorsement['name'].lower(),
        endorsement['surname'].lower(),
        endorsement['department'].lower()
    )

In [32]:
print(RELEVANT_ENDORSEMENTS_DATA[0])
print(endorsement_key(RELEVANT_ENDORSEMENTS_DATA[0]))
print(type(endorsement_key(RELEVANT_ENDORSEMENTS_DATA[0])))

{'name': 'CORDIVAL', 'surname': 'Gilles', 'mandate': 'Maire', 'department': 'Aisne', 'endorsement_date': '2022-02-01T00:00:00', 'gender': 'male', 'candidate': 'ARTHAUD Nathalie'}
('cordival', 'gilles', 'aisne')
<class 'tuple'>


In [35]:
ENDORSEMENTS_INDEX = {}

for record in RELEVANT_ENDORSEMENTS_DATA:
    key = endorsement_key(record)
    
    if key in ENDORSEMENTS_INDEX:
        print('Regional homonymy')
        print(record)
        print(ENDORSEMENTS_INDEX[key])
        continue
    
    ENDORSEMENTS_INDEX[key] = record
    
next(iter(ENDORSEMENTS_INDEX.items()))

Regional homonymy
{'name': 'MARTIN', 'surname': 'Gérard', 'mandate': 'Maire', 'department': 'Dordogne', 'endorsement_date': '2022-02-15T00:00:00', 'gender': 'male', 'candidate': 'POUTOU Philippe'}
{'name': 'MARTIN', 'surname': 'Gérard', 'mandate': 'Maire', 'department': 'Dordogne', 'endorsement_date': '2022-02-08T00:00:00', 'gender': 'male', 'candidate': 'MÉLENCHON Jean-Luc'}


(('cordival', 'gilles', 'aisne'),
 {'name': 'CORDIVAL',
  'surname': 'Gilles',
  'mandate': 'Maire',
  'department': 'Aisne',
  'endorsement_date': '2022-02-01T00:00:00',
  'gender': 'male',
  'candidate': 'ARTHAUD Nathalie'})