# Exploring presidential candidate endorsements

In [1]:
import json
from collections import Counter

## Loading the endorsement JSON file

In [2]:
# NOTE: if you are using windows, you need to mind two additional things:
#  1. You must double-escape your antislashes likewise: C:\\path\\to\\thing, else it will trigger syntax errors
#  2. You will need to pass the `encoding` kwarg set to "utf8" to the `open` function
with open('../data/parrainages.json') as f:
    ENDORSEMENTS_RAW_DATA = json.load(f)

In [3]:
ENDORSEMENTS_RAW_DATA[0]

{'Civilite': 'M.',
 'Nom': 'CORDIVAL',
 'Prenom': 'Gilles',
 'Mandat': 'Maire',
 'Circonscription': 'Mont-Saint-Père',
 'Departement': 'Aisne',
 'Candidat': 'ARTHAUD Nathalie',
 'DatePublication': '2022-02-01T00:00:00'}

In [4]:
len(ENDORSEMENTS_RAW_DATA), type(ENDORSEMENTS_RAW_DATA)

(13427, list)

In [5]:
# Reshaping the data
ENDORSEMENTS_DATA = []

for record in ENDORSEMENTS_RAW_DATA:
    ENDORSEMENTS_DATA.append({
        'name': record['Nom'],
        'surname': record['Prenom'],
        'mandate': record['Mandat'],
        'department': record['Departement'],
        'endorsement_date': record['DatePublication'],
        'gender': 'male' if record['Civilite'] == 'M.' else 'female',
        'candidate': record['Candidat']
    })

In [6]:
ENDORSEMENTS_DATA[0]

{'name': 'CORDIVAL',
 'surname': 'Gilles',
 'mandate': 'Maire',
 'department': 'Aisne',
 'endorsement_date': '2022-02-01T00:00:00',
 'gender': 'male',
 'candidate': 'ARTHAUD Nathalie'}

## Some stats about gender and candidates

In [7]:
gender_stats = {}

for record in ENDORSEMENTS_DATA:
    gender = record['gender']
    
    if gender in gender_stats:
        gender_stats[gender] += 1 # same as `gender_stats[gender] = gender_stats[gender] + 1`
    else:
        gender_stats[gender] = 1

In [8]:
gender_stats

{'male': 9889, 'female': 3538}

In [9]:
def ratio(d, n):
    ratios = {}
    
    for k, v in d.items():
        ratios[k] = v / n
        
    return ratios

In [10]:
ratio(gender_stats, len(ENDORSEMENTS_DATA))

{'male': 0.7365010799136069, 'female': 0.2634989200863931}

In [11]:
candidates_stats = Counter()

for record in ENDORSEMENTS_DATA:
    candidate = record['candidate']
    candidates_stats[candidate] += 1

In [12]:
sorted(candidates_stats.items(), key=lambda x: x[1], reverse=True)[:15]

[('PÉCRESSE Valérie', 2636),
 ('MACRON Emmanuel', 2098),
 ('HIDALGO Anne', 1440),
 ('MÉLENCHON Jean-Luc', 906),
 ('ZEMMOUR Éric', 741),
 ('JADOT Yannick', 712),
 ('LASSALLE Jean', 642),
 ('ROUSSEL Fabien', 626),
 ('LE PEN Marine', 622),
 ('DUPONT-AIGNAN Nicolas', 600),
 ('POUTOU Philippe', 596),
 ('ARTHAUD Nathalie', 576),
 ('ASSELINEAU François', 293),
 ('TAUBIRA Christiane', 274),
 ('KAZIB Anasse', 160)]

In [13]:
candidates_stats.most_common(5)

[('PÉCRESSE Valérie', 2636),
 ('MACRON Emmanuel', 2098),
 ('HIDALGO Anne', 1440),
 ('MÉLENCHON Jean-Luc', 906),
 ('ZEMMOUR Éric', 741)]