# UK Elections

The files for this dataset comprise of 5 files:
* `HoC-GE2019-results-by-constituency.csv` - The results of the 2019 election, by constituency.
* `HoC-GE2019-results-by-candidate.csv` - The results of the 2019 election, by candidate.
* `HoC-GE2017-constituency-results.csv` - The results of the 2017 election, by constituency.
* `HoC-GE2017-results-by-candidate.csv` - The results of the 2017 election, by candidate.
* `EU-referendum-result-data.csv` - More detailed data about the EU Referendum vote, by constituency.

The metadata (column descriptions) for the various files can be found in the included `metadata.txt` file.

### Import required dependencies

In [1]:
# Import necessary libraries
import pandas as pd

### Read in the datafiles

In [2]:
# Read in the datafiles
ca2019 = pd.read_csv("data/HoC-GE2019-results-by-candidate.csv")
ca2017 = pd.read_csv("data/HoC-GE2017-results-by-candidate.csv")
con2019 = pd.read_csv("data/HoC-GE2019-results-by-constituency.csv")
con2017 = pd.read_csv("data/HoC-GE2017-constituency-results.csv")
referendum = pd.read_csv("data/EU-referendum-result-data.csv")

---

In [28]:
con2017.columns

Index(['ons_id', 'ons_region_id', 'constituency_name', 'county_name',
       'region_name', 'country_name', 'constituency_type', 'declaration_time',
       'result', 'first_party', 'second_party', 'electorate', 'valid_votes',
       'invalid_votes', 'majority', 'con', 'lab', 'ld', 'ukip', 'green', 'snp',
       'pc', 'dup', 'sf', 'sdlp', 'uup', 'alliance', 'other', 'other_winner',
       'year'],
      dtype='object')

In [8]:
for col in referendum.columns:
    print("{}:".format(col))

id:
Region_Code:
Region:
Area_Code:
Area:
Electorate:
ExpectedBallots:
VerifiedBallotPapers:
Pct_Turnout:
Votes_Cast:
Valid_Votes:
Remain:
Leave:
Rejected_Ballots:
No_official_mark:
Voting_for_both_answers:
Writing_or_mark:
Unmarked_or_void:
Pct_Remain:
Pct_Leave:
Pct_Rejected:


In [33]:
print("2019 not in 2017:")
for col in con2019.columns:
    if not (col in con2017.columns):
        print(col)

print("\n2017 not in 2019:")
for col in con2017.columns:
    if not (col in con2019.columns):
        print(col)

2019 not in 2017:
mp_firstname
mp_surname
mp_gender
brexit

2017 not in 2019:
ukip


In [3]:
referendum.head()

Unnamed: 0,id,Region_Code,Region,Area_Code,Area,Electorate,ExpectedBallots,VerifiedBallotPapers,Pct_Turnout,Votes_Cast,...,Remain,Leave,Rejected_Ballots,No_official_mark,Voting_for_both_answers,Writing_or_mark,Unmarked_or_void,Pct_Remain,Pct_Leave,Pct_Rejected
0,108,E12000006,East,E06000031,Peterborough,120892,87474,87469,72.35,87469,...,34176,53216,77,0,32,7,38,39.11,60.89,0.09
1,109,E12000006,East,E06000032,Luton,127612,84633,84636,66.31,84616,...,36708,47773,135,0,85,0,50,43.45,56.55,0.16
2,112,E12000006,East,E06000033,Southend-on-Sea,128856,93948,93939,72.9,93939,...,39348,54522,69,0,21,0,48,41.92,58.08,0.07
3,113,E12000006,East,E06000034,Thurrock,109897,79969,79954,72.75,79950,...,22151,57765,34,0,8,3,23,27.72,72.28,0.04
4,110,E12000006,East,E06000055,Bedford,119530,86136,86136,72.06,86135,...,41497,44569,69,0,26,1,42,48.22,51.78,0.08


In [7]:
for col in referendum.columns:
    print(col)
    print(referendum[col][:5])
    print()

id
0    108
1    109
2    112
3    113
4    110
Name: id, dtype: int64

Region_Code
0    E12000006
1    E12000006
2    E12000006
3    E12000006
4    E12000006
Name: Region_Code, dtype: object

Region
0    East
1    East
2    East
3    East
4    East
Name: Region, dtype: object

Area_Code
0    E06000031
1    E06000032
2    E06000033
3    E06000034
4    E06000055
Name: Area_Code, dtype: object

Area
0       Peterborough
1              Luton
2    Southend-on-Sea
3           Thurrock
4            Bedford
Name: Area, dtype: object

Electorate
0    120892
1    127612
2    128856
3    109897
4    119530
Name: Electorate, dtype: int64

ExpectedBallots
0    87474
1    84633
2    93948
3    79969
4    86136
Name: ExpectedBallots, dtype: int64

VerifiedBallotPapers
0    87469
1    84636
2    93939
3    79954
4    86136
Name: VerifiedBallotPapers, dtype: int64

Pct_Turnout
0    72.35
1    66.31
2    72.90
3    72.75
4    72.06
Name: Pct_Turnout, dtype: float64

Votes_Cast
0    87469
1    84616
2  

In [11]:
ref = referendum
sorted(ref['Region'].unique())

['East',
 'East Midlands',
 'London',
 'North East',
 'North West',
 'Northern Ireland',
 'Scotland',
 'South East',
 'South West',
 'Wales',
 'West Midlands',
 'Yorkshire and The Humber']