# Breaking Down the Data - Why First Count Position Matters More than Anything in Irish Elections

In [5]:
import pandas as pd
import json
import pickle

In [6]:
with open('data.pkl', 'rb') as f: data = pickle.load(f)

## Nomenclature

We need to coin some nomemclature, so that we're always sure what we're talking about. There are two terms I'd like to use throughout - `inside` and `outside`.

### Inside
If a candidate is within the first $n$ positions after the first count in a consituency with $n$ seats, she shall be designated `inside`. That is, all candidates in the first three positions in a three-seat consituency on the first count, the first four positions in a four-seat constituency on the first count, or the first five positions in a five-seat constituency on the first count are designated `inside`.

### Outside
All candidates who are not `inside` are `outside`.

In [1]:
def insideOutside(someDf):
    holder = []
    grouper = someDf.groupby(['Result', 'seats', 'placement'])
    for a, b in grouper:
        holder.append({"result": a[0],
                       "seats": a[1],
                       "placement": a[2],
                       "candidates": b.shape[0]})
    df = pd.DataFrame(holder)
    
    df2 = pd.pivot_table(df, index=['seats', 'result'], columns='placement', values='candidates')
    df2.fillna(0, inplace=True)
    
    return df2.astype(int)

In [2]:
def insideOutsideSimple(someDf):
    holder = []
    grouper = someDf.groupby(['Result', 'placement'])
    for a, b in grouper:
        holder.append({"result": a[0],
                       "placement": a[1],
                       "candidates": b.shape[0]})
    df = pd.DataFrame(holder)
    
    df2 = pd.pivot_table(df, index='result', columns='placement', values='candidates')
    df2.fillna(0, inplace=True)
    
    return df2.astype(int)

## All Candidates

### Inside vs Outside, All Candidates

This is the breakdown of who got elected according to whether or not they were inside or outside in either of the elections.

In [7]:
all_candidates =pd.concat([data[2016]['candidates'], data[2020]['candidates']])

In [8]:
ac = insideOutsideSimple(all_candidates)
# ac.style.bar(color='darkgoldenrod', align='zero')
ac.style.background_gradient(cmap='RdYlGn')

placement,inside,outside
result,Unnamed: 1_level_1,Unnamed: 2_level_1
Elected,282,34
Not Elected,34,732


Across the two general elections for which data is available on https://data.gov.ie/, inside candidates won 282 out of 316 seats. Nine out of every ten candidates who finished in the top three places in three-seat constituencies, the top four in four-seat constituencies or the top five in five-seat constituencies were elected.

### Inside versus Outside in 2016

In [9]:
io2016 = insideOutsideSimple(data[2016]['candidates'])
io2016.style.background_gradient(cmap='RdYlGn')

placement,inside,outside
result,Unnamed: 1_level_1,Unnamed: 2_level_1
Elected,144,13
Not Elected,13,381


Finishing inside on the first count lead to a 92% likelihood of getting elected.

### Inside versus Outside in 2020

In [8]:
io2020 = insideOutsideSimple(data[2020]['candidates'])
io2020.style.background_gradient(cmap='RdYlGn')

placement,inside,outside
result,Unnamed: 1_level_1,Unnamed: 2_level_1
Elected,138,21
Not Elected,21,351


Finishing inside had an 87% likelihood of getting elected in 2020.

### Inside versus Outside in Seats, All

In [10]:
ac_seats = insideOutside(all_candidates)
ac_seats.style.background_gradient(cmap='RdYlGn')

Unnamed: 0_level_0,placement,inside,outside
seats,result,Unnamed: 2_level_1,Unnamed: 3_level_1
3,Elected,63,9
3,Not Elected,9,177
4,Elected,106,18
4,Not Elected,18,300
5,Elected,113,7
5,Not Elected,7,255


Finishing inside lead to an
- 88% likelihood of getting elected in a three-seater,
- an 85% likelihood of getting elected in a four-seater and
- a 94% likelihood of getting elected in a five-seater.

Isn't it interesting that finishing in the first five in a five-seater in the general elections of 2016 and 2020 lead to a 94% chance of getting elected? We think of five seaters as being the most likely constituencies to elected underdogs. Perhaps not.

### Inside versus Outside in Seats, 2016

In [11]:
io2016_seats = insideOutside(data[2016]['candidates'])
io2016_seats.style.background_gradient(cmap='RdYlGn')

Unnamed: 0_level_0,placement,inside,outside
seats,result,Unnamed: 2_level_1,Unnamed: 3_level_1
3,Elected,38,4
3,Not Elected,4,103
4,Elected,55,5
4,Not Elected,5,152
5,Elected,51,4
5,Not Elected,4,126


### Inside versus Outside in Seats, 2020

In [12]:
ac_seats = insideOutside(all_candidates)
ac_seats.style.background_gradient(cmap='RdYlGn')

Unnamed: 0_level_0,placement,inside,outside
seats,result,Unnamed: 2_level_1,Unnamed: 3_level_1
3,Elected,63,9
3,Not Elected,9,177
4,Elected,106,18
4,Not Elected,18,300
5,Elected,113,7
5,Not Elected,7,255


In [13]:
elected_columns =[( 'Elected', 3),( 'Elected', 4), ( 'Elected', 5)]

In [14]:
def fcp(someDf):
    holder = []
    grouper = someDf.groupby(['Result', 'seats', 'first_count_position'])
    for a, b in grouper:
        holder.append({"result": a[0],
                       "seats": a[1],
                       "first count position": a[2],
                       "election": b['election'].values[0] or None,
                       "candidates": b.shape[0]})
    df = pd.DataFrame(holder)

    value_of_first_count_position_all = pd.pivot_table(df, index='first count position', columns=['result', 'seats'], values='candidates')
    value_of_first_count_position_all.fillna(0, inplace=True)
    return value_of_first_count_position_all.astype(int)

## All Candidates by First Count Position

Here's a breakdown of first-count-position versus seats versus elected or not elected across the two elections. Everyone who topped the poll was elected. Just one candidate who was second in a three-seater failed to get elected.

Everyone who finished first, second or third in a five-seater was elected. 19 of the 24 who finished fourth were elected. 22 of the 24 who finished fifth were elected. Leapfrogging did not occur in five-seat constituencies.

In [16]:
x = fcp(all_candidates)
x.style.background_gradient(cmap="RdYlGn", subset=elected_columns)

result,Elected,Elected,Elected,Not Elected,Not Elected,Not Elected
seats,3,4,5,3,4,5
first count position,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1,24,31,24,0,0,0
2,23,28,24,1,3,0
3,16,27,24,8,4,0
4,4,20,19,20,11,5
5,4,14,22,20,17,2
6,0,3,4,24,28,20
7,1,1,3,22,30,21
8,0,0,0,23,31,24
9,0,0,0,23,31,24
10,0,0,0,18,31,24


## 2016 Candidates by First Count Position

In [17]:
x = fcp(data[2016]['candidates'])
x.style.background_gradient(cmap="RdYlGn", subset=elected_columns)

result,Elected,Elected,Elected,Not Elected,Not Elected,Not Elected
seats,3,4,5,3,4,5
first count position,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1,14,15,11,0,0,0
2,14,14,11,0,1,0
3,10,14,11,4,1,0
4,2,12,8,12,3,3
5,1,5,10,13,10,1
6,0,0,3,14,15,8
7,1,0,1,12,15,10
8,0,0,0,13,15,11
9,0,0,0,13,15,11
10,0,0,0,10,15,11


## 2020 Candidates by First Count Position

In [18]:
x = fcp(data[2020]['candidates'])
x.style.background_gradient(cmap="RdYlGn", subset=elected_columns)

result,Elected,Elected,Elected,Not Elected,Not Elected,Not Elected
seats,3,4,5,3,4,5
first count position,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1,10,16,13,0,0,0
2,9,14,13,1,2,0
3,6,13,13,4,3,0
4,2,8,11,8,8,2
5,3,9,12,7,7,1
6,0,3,1,10,13,12
7,0,1,2,10,15,11
8,0,0,0,10,16,13
9,0,0,0,10,16,13
10,0,0,0,8,16,13
