In [1]:
import numpy as np
import pandas as pd

import seaborn
import matplotlib.pyplot as plt

from math import ceil
seaborn.set_context("talk", font_scale=1.5, rc={"lines.linewidth": 3.5})

**Read election data, and replace NaNs with 0**

In [17]:
data = pd.read_csv("historical_election_results.csv", encoding='ISO-8859-1')

# Fudge because there were 2 elections in 1974, so we treat the 1st as 1974 and the 2nd as 1975
data.election.replace("1974F", "1974", inplace=True)
data.election.replace("1974O", "1975", inplace=True)


# Convert the election year into an integer, so I can select on it
election_years = pd.to_numeric(data['election'])
data['election'] = election_years

# Just want to look at (relatively) recent elections
data = data[data['election'] > 1975]

# Get rid of NaNs
data.fillna(0, inplace=True)

data.rename(columns={"lib_votes ": "lib_votes"}, inplace=True)

In [18]:
data.head()

Unnamed: 0,constituency_id,seats,constituency,country/region,electorate,con_votes,con_share,lib_votes,lib_share,lab_votes,lab_share,natSW_votes,natSW_share,oth_votes,oth_share,total_votes,turnout,election,boundary_set
10548,1,1,ABERAVON,Wales,64864,12692.0,0.247,4624,0.09,31665,0.617,1954.0,0.038,406.0,0.008,51341,0.792,1979,1974-79
10549,2,1,ABERDARE,Wales,47500,6453.0,0.173,0,0.0,26716,0.715,3652.0,0.098,518.0,0.014,37339,0.786,1979,1974-79
10550,3,1,ABERDEEN NORTH,Scotland,64747,7657.0,0.17,4887,0.108,26771,0.593,5796.0,0.128,0.0,0.0,45111,0.697,1979,1974-79
10551,4,1,ABERDEEN SOUTH,Scotland,65090,20820.0,0.407,5901,0.115,20048,0.392,4361.0,0.085,0.0,0.0,51130,0.786,1979,1974-79
10552,7,1,ABERTILLERY,Wales,35602,4613.0,0.162,0,0.0,21698,0.76,2248.0,0.079,0.0,0.0,28559,0.802,1979,1974-79


In [28]:
vote_columns = [col for col in data if "votes" in col and "total" not in col]
new_votes = pd.DataFrame()

for col in vote_columns:
    new_votes[col] = pd.to_numeric(data[col], errors="coerce").fillna(0.0)

data[vote_columns] = new_votes
data["won_by"] = new_votes.idxmax(axis="columns").str.replace("_votes", "").str.replace(" ", "")
data.head()

Unnamed: 0,constituency_id,seats,constituency,country/region,electorate,con_votes,con_share,lib_votes,lib_share,lab_votes,lab_share,natSW_votes,natSW_share,oth_votes,oth_share,total_votes,turnout,election,boundary_set,won_by
10548,1,1,ABERAVON,Wales,64864,12692.0,0.247,4624,0.09,31665,0.617,1954.0,0.038,406.0,0.008,51341,0.792,1979,1974-79,lab
10549,2,1,ABERDARE,Wales,47500,6453.0,0.173,0,0.0,26716,0.715,3652.0,0.098,518.0,0.014,37339,0.786,1979,1974-79,lab
10550,3,1,ABERDEEN NORTH,Scotland,64747,7657.0,0.17,4887,0.108,26771,0.593,5796.0,0.128,0.0,0.0,45111,0.697,1979,1974-79,lab
10551,4,1,ABERDEEN SOUTH,Scotland,65090,20820.0,0.407,5901,0.115,20048,0.392,4361.0,0.085,0.0,0.0,51130,0.786,1979,1974-79,con
10552,7,1,ABERTILLERY,Wales,35602,4613.0,0.162,0,0.0,21698,0.76,2248.0,0.079,0.0,0.0,28559,0.802,1979,1974-79,lab


In [35]:
election_data = {}
seat_margins = []
winning_parties = {}
second_parties = {}
winning_seats = {}
second_seats = {}
num_vote_changes_for_gains_by_second_party = {}


# First pass -- determine overall winner and 2nd place parties in election
for index, row in enumerate(data.itertuples()):
    if row.election not in election_data:
        election_data[row.election] = {
            "lab": 0,
            "con": 0,
            "lib": 0,
            "natSW": 0,
            "oth": 0,
        }
    
    election_data[row.election][row.won_by] += 1


for election in data.election.unique():
    winning_party, winning_seats = sorted(election_data[election].items(), key=lambda x: x[1])[-1]
    second_party, second_seats = sorted(election_data[election].items(), key=lambda x: x[1])[-2]
    print(f"Election in year {election} was won by {winning_party} with {winning_seats} seats")
    winning_parties[election] = winning_party
    winning_seats[election] = winning_seats
    second_parties[election] = second_party
    second_parties[election] = second_seats


for index, row in enumerate(data.itertuples()):
    votes = [
        ("lab", row.lab_votes),
        ("con", row.con_votes),
        ("lib", row.lib_votes),
        ("natSW", row.natSW_votes),
        ("oth", row.oth_votes),
    ]
    
    sorted_votes = sorted(votes, key=lambda x: x[1])
    
    if index < 5:
        print(sorted_votes)
    
    num_vote_changes_needed_for_2nd_party_to_win_seat = ceil( (sorted_votes[-1][1] - sorted_votes[-2][1]) / 2.0 )


majorities = []

for year, seats in election_data.items():
    winning_party_seats = max(seats.values())
    second_party_seats = sorted(seats.values())[-2]
    total_seats = sum(seats.values())
    seats_won_by_other_parties = total_seats - winning_party_seats
    majority = winning_party_seats - seats_won_by_other_parties
    majorities.append((year, majority))
    num_seat_gains_needed_to_remove_majority = ceil(majority / 2)
    print(f"year = {year}, seats = {seats}, 2nd #seats = {second_party_seats} total_seats = {total_seats}, majority = {majority}, num seat changes = {num_seat_gains_needed_to_remove_majority}")


Election in year 1979 was won by con with 339 seats
Election in year 1983 was won by con with 397 seats
Election in year 1987 was won by con with 376 seats
Election in year 1992 was won by con with 336 seats
Election in year 1997 was won by lab with 418 seats
Election in year 2001 was won by lab with 412 seats
Election in year 2005 was won by lab with 355 seats
Election in year 2010 was won by con with 306 seats
Election in year 2015 was won by con with 330 seats
Election in year 2017 was won by con with 317 seats
Election in year 2019 was won by con with 365 seats
[('oth', 406.0), ('natSW', 1954.0), ('lib', 4624), ('con', 12692.0), ('lab', 31665)]
[('lib', 0), ('oth', 518.0), ('natSW', 3652.0), ('con', 6453.0), ('lab', 26716)]
[('oth', 0.0), ('lib', 4887), ('natSW', 5796.0), ('con', 7657.0), ('lab', 26771)]
[('oth', 0.0), ('natSW', 4361.0), ('lib', 5901), ('lab', 20048), ('con', 20820.0)]
[('lib', 0), ('oth', 0.0), ('natSW', 2248.0), ('con', 4613.0), ('lab', 21698)]
year = 1979, seats

In [None]:
data = pd.DataFrame({
    "Year": [x[0] for x in majorities], 
    "Majority": [x[1] for x in majorities]
})

plt.rcParams['figure.figsize'] = [13, 5]
chart = seaborn.lineplot(x="Year", y="Majority", data=data, marker='o')
chart.set_xticklabels(
    chart.get_xticklabels(), 
    rotation=360, 
    horizontalalignment='right',
    fontweight='light',
    fontsize='x-large'

)
plt.title("Parliamentary majorities since 1919");