This Python Jupyter Notebook creates the intial information for use within the UK General Elections model

In [1]:
#Import required packages
import pyodbc
import sqlalchemy
from sqlalchemy import create_engine
import urllib
import pandas as pd

In [2]:
#Connect to database 'UK_General_Election' using SQlAlchemy
connection_str = "DRIVER={SQL SERVER};SERVER=DANZPOOTA;DATABASE=UK_General_Election;TRUSTED_CONNECTION=YES"
params = urllib.parse.quote_plus(connection_str)
engine = create_engine('mssql+pyodbc:///?odbc_connect=%s' % params)
conn = engine.connect()

In [3]:
#SQL statement to delete any data that might already exist in the tables
#This is the only initial information required so anything that currnetly exists is not needed
DeleteTableData = """
DELETE ElectionPredictionPollsUsed
DELETE ElectionPredictionCandidates
DELETE ElectionPredictionConstituencies
DELETE ElectionPredictionOverall
DELETE ElectionPredictionMeta

DELETE PollAnalysisConstituencies
DELETE PollAnalysisRegions
DELETE PollAnalysisMeta

DELETE PollDetails
DELETE PollMeta

DELETE Candidates
DELETE RegionConstituencies
DELETE Constituencies
DELETE Pollsters
DELETE Parties
DELETE RegionRegionTypes
DELETE Regions
DELETE RegionTypes"""

In [4]:
engine.execute(DeleteTableData)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x2121b706100>

In [5]:
#Location of file to be used
filename = "C:\\Users\\danmu\\Documents\\Elections\\2024_Python\\Initial_Data.xlsx"

In [6]:
regiontypes_df = pd.read_excel(filename, sheet_name='RegionTypes')
regiontypes_df.head()

Unnamed: 0,RegionType,RegionTypeRank
0,Constituency,2
1,GB,8
2,GBFiveLonSouth,6
3,GBFiveMidWales,6
4,GBSix,6


In [7]:
#Insert into database then read back out into a dataframe to check it has been inserted
regiontypes_df.to_sql('RegionTypes', conn, if_exists='append', index=False)
db_regiontypes_df = pd.read_sql('SELECT * From RegionTypes',conn)
db_regiontypes_df.head()

Unnamed: 0,RegionType,RegionTypeRank
0,Constituency,2
1,GB,8
2,GBFiveLonSouth,6
3,GBFiveMidWales,6
4,GBSix,6


In [8]:
regions_df = pd.read_excel(filename, sheet_name='Regions')
regions_df.head()

Unnamed: 0,RegionName
0,Cardiff and South Central Wales
1,Central London
2,Central Scotland
3,East England
4,East London


In [9]:
regions_df.to_sql('Regions', conn, if_exists='append', index=False)
db_regions_df = pd.read_sql('SELECT * From Regions',conn)
db_regions_df.head()

Unnamed: 0,RegionName
0,Cardiff and South Central Wales
1,Central London
2,Central Scotland
3,East England
4,East London


In [10]:
regionregiontypes_df = pd.read_excel(filename, sheet_name='RegionRegionTypes')
regionregiontypes_df.head()

Unnamed: 0,RegionType,RegionName
0,GB,GB
1,GBFiveLonSouth,Midlands
2,GBFiveLonSouth,North England
3,GBFiveLonSouth,Scotland
4,GBFiveLonSouth,South+London


In [11]:
regionregiontypes_df.to_sql('RegionRegionTypes', conn, if_exists='append', index=False)
db_regionregiontypes_df = pd.read_sql('SELECT * From RegionRegionTypes',conn)
db_regionregiontypes_df.head()

Unnamed: 0,RegionXTypesID,RegionName,RegionType
0,GBFiveLonSouthMidlands,Midlands,GBFiveLonSouth
1,GBFiveLonSouthNorth England,North England,GBFiveLonSouth
2,GBFiveLonSouthScotland,Scotland,GBFiveLonSouth
3,GBFiveLonSouthSouth+London,South+London,GBFiveLonSouth
4,GBFiveLonSouthWales,Wales,GBFiveLonSouth


In [12]:
parties_df = pd.read_excel(filename, sheet_name='Parties')
parties_df.head()

Unnamed: 0,PartyAbbreviation,PartyFullName
0,APNI,Alliance
1,Con,Conservative
2,DUP,Democratic Unionist Party
3,Green,Green
4,Lab,Labour


In [13]:
parties_df.to_sql('Parties', conn, if_exists='append', index=False)
db_parties_df = pd.read_sql('SELECT * From Parties',conn)
db_parties_df.head()

Unnamed: 0,PartyAbbreviation,PartyFullName
0,APNI,Alliance
1,Con,Conservative
2,DUP,Democratic Unionist Party
3,Green,Green
4,Lab,Labour


In [14]:
pollsters_df = pd.read_excel(filename, sheet_name='Pollsters')
pollsters_df.head()

Unnamed: 0,PollsterName,DefaultRegionType
0,BMG,ITL1Region-NINA
1,Deltapoll,GBSix
2,FindOutNow,ITL1Region-NINA
3,Focaldata,GB
4,ICM,GBFiveLonSouth


In [15]:
pollsters_df.to_sql('Pollsters', conn, if_exists='append', index=False)
db_pollsters_df = pd.read_sql('SELECT * From Pollsters',conn)
db_pollsters_df.head()

Unnamed: 0,PollsterName,DefaultRegionType
0,BMG,ITL1Region-NINA
1,Deltapoll,GBSix
2,Electoral Calculus,MRP632
3,FindOutNow,ITL1Region-NINA
4,Focaldata,GB


In [16]:
constituencies_df = pd.read_excel(filename, sheet_name='Constituencies')
constituencies_df.head()

Unnamed: 0,ONSID,PANO,ConstituencyName,ClosestOldConstituency,PAName,ConstituencyType,Nation,ITL1Region,Electorate,TotalVotes,MajorityVotes,MajorityShare,FirstParty,SecondParty
0,W07000081,1,Aberafan Maesteg,Aberavon,Aberafan Maesteg,County,Wales,Wales,69817,44423,13457,0.302929,Lab,Con
1,S14000060,2,Aberdeen North,Aberdeen North,Aberdeen North,Borough,Scotland,Scotland,76895,50127,14210,0.28348,SNP,Con
2,S14000061,3,Aberdeen South,Aberdeen South,Aberdeen South,Borough,Scotland,Scotland,76560,50118,5463,0.109003,SNP,Con
3,S14000062,4,Aberdeenshire North and Moray East,Banff and Buchan,Aberdeenshire North & Moray East,County,Scotland,Scotland,71485,45891,2399,0.052276,Con,SNP
4,S14000058,5,West Aberdeenshire and Kincardine,West Aberdeenshire and Kincardine,Aberdeenshire West & Kincardine,County,Scotland,Scotland,73634,53345,843,0.015803,Con,SNP


In [17]:
constituencies_df.to_sql('Constituencies', conn, if_exists='append', index=False)
db_constituencies_df = pd.read_sql('SELECT * From Constituencies',conn)
db_constituencies_df.head()

Unnamed: 0,ONSID,PANO,ConstituencyName,ClosestOldConstituency,PAName,Nation,ConstituencyType,ITL1Region,FirstParty,SecondParty,Electorate,TotalVotes,MajorityVotes,MajorityShare
0,W07000081,1,Aberafan Maesteg,Aberavon,Aberafan Maesteg,Wales,County,Wales,Lab,Con,69817,44423,13457,0.302929
1,S14000060,2,Aberdeen North,Aberdeen North,Aberdeen North,Scotland,Borough,Scotland,SNP,Con,76895,50127,14210,0.28348
2,S14000061,3,Aberdeen South,Aberdeen South,Aberdeen South,Scotland,Borough,Scotland,SNP,Con,76560,50118,5463,0.109003
3,S14000062,4,Aberdeenshire North and Moray East,Banff and Buchan,Aberdeenshire North & Moray East,Scotland,County,Scotland,Con,SNP,71485,45891,2399,0.052276
4,S14000063,6,Airdrie and Shotts,Airdrie and Shotts,Airdrie & Shotts,Scotland,County,Scotland,SNP,Lab,70420,46338,5324,0.114895


In [18]:
RegionConstituencies_df = pd.read_excel(filename, sheet_name='RegionConstituencies')
RegionConstituencies_df.head()

Unnamed: 0,RegionName,ConstituencyName
0,GB,Aberafan Maesteg
1,Midlands+Wales,Aberafan Maesteg
2,South West Wales,Aberafan Maesteg
3,UK,Aberafan Maesteg
4,Wales,Aberafan Maesteg


In [19]:
RegionConstituencies_df.to_sql('RegionConstituencies', conn, if_exists='append', index=False)
db_RegionConstituencies_df = pd.read_sql('SELECT * From RegionConstituencies',conn)
db_RegionConstituencies_df.head()

Unnamed: 0,RegionConsID,ConstituencyName,RegionName
0,Cardiff and South Central WalesCardiff East,Cardiff East,Cardiff and South Central Wales
1,Cardiff and South Central WalesCardiff North,Cardiff North,Cardiff and South Central Wales
2,Cardiff and South Central WalesCardiff South a...,Cardiff South and Penarth,Cardiff and South Central Wales
3,Cardiff and South Central WalesCardiff West,Cardiff West,Cardiff and South Central Wales
4,Cardiff and South Central WalesMerthyr Tydfil ...,Merthyr Tydfil and Aberdare,Cardiff and South Central Wales


In [20]:
Candidates_df = pd.read_excel(filename, sheet_name='Candidates')
Candidates_df.head()

Unnamed: 0,Constituency,Party,PreviousVotes,PreviousShare,FullName,FirstName,Surname,Gender,SittingMP,FormerMP,PreviousStanding,CurrentStanding
0,Aberafan Maesteg,Lab,23509,0.529208,Stephen Kinnock,Stephen,Kinnock,,1,,1,1
1,Aberafan Maesteg,Con,10052,0.226279,Abigail Mainon,Abigail,Mainon,,0,,1,1
2,Aberafan Maesteg,PC,3991,0.089841,Colin John Deere,Colin John,Deere,,0,,1,1
3,Aberafan Maesteg,Reform,3794,0.085406,Mark Griffiths,Mark,Griffiths,,0,,1,1
4,Aberafan Maesteg,LD,1645,0.03703,Justin Mark Griffiths,Justin Mark,Griffiths,,0,,1,1


In [21]:
Candidates_df.to_sql('Candidates', conn, if_exists='append', index=False)
db_Candidates_df = pd.read_sql('SELECT * From Candidates',conn)
db_Candidates_df.head()

Unnamed: 0,CandidateID,Constituency,Party,SittingMP,FormerMP,FullName,FirstName,Surname,Gender,PreviousVotes,PreviousShare,PreviousStanding,CurrentStanding
0,Aberafan MaestegCon,Aberafan Maesteg,Con,False,,Abigail Mainon,Abigail,Mainon,,10052,0.226279,1,1
1,Aberafan MaestegGreen,Aberafan Maesteg,Green,False,,Nigel Hill,Nigel,Hill,,701,0.01578,1,1
2,Aberafan MaestegLab,Aberafan Maesteg,Lab,True,,Stephen Kinnock,Stephen,Kinnock,,23509,0.529208,1,1
3,Aberafan MaestegLD,Aberafan Maesteg,LD,False,,Justin Mark Griffiths,Justin Mark,Griffiths,,1645,0.03703,1,1
4,Aberafan MaestegOther,Aberafan Maesteg,Other,False,,,,,,731,0.016455,1,1


In [22]:
conn.close()