Jupyter notebook for updating an Excel spreadsheet used to contain the data for the Flourish data visualisation of the model output

In [1]:
# Import required modules
import pandas as pd
import datetime
import ipywidgets as widgets
import numpy as np

#Modules required for database access
import pyodbc
import sqlalchemy
from sqlalchemy import create_engine
import urllib

In [2]:
#Connect to database 'UK_General_Election' using SQlAlchemy
connection_str = "DRIVER={SQL SERVER};SERVER=DANZPOOTA;DATABASE=UK_General_Election;TRUSTED_CONNECTION=YES"
params = urllib.parse.quote_plus(connection_str)
engine = create_engine('mssql+pyodbc:///?odbc_connect=%s' % params)
conn = engine.connect()

In [3]:
# Parties lists
AllPartiesList = [i[0] for i in engine.execute("select PartyAbbreviation from Parties")]
GBPartiesList = ['Lab','Con','LD','Reform','Green','SNP','PC','Other']
GBPartiesAtoZList = ['Con','Green','Lab','LD','Other','PC','Reform','SNP']
UKPartiesList = ['Lab','Con','LD','Reform','Green','SNP','PC','DUP','SF','SDLP','APNI','UUP','Other']

In [4]:
# Regions lists
ITL1RegionList = [i[0] for i in engine.execute("SELECT RegionName FROM RegionRegionTypes WHERE RegionType = 'ITL1Region'")]
GBRegionsList = ['East England', 'East Midlands', 'London', 'North East England', 'North West England', 'Scotland', 'South East England', 'South West England', 'Wales', 'West Midlands', 'Yorkshire and The Humber']

In [5]:
# Determine when the latest prediction was (date) and its ID in the database
LatestPredictionDataQuery = "SELECT TOP(1) ElectionPredictionDate, ElectionPredictionID FROM ElectionPredictionMeta ORDER BY ElectionPredictionDate DESC"
LatestPrediction_df = pd.read_sql(LatestPredictionDataQuery,conn)
PredictionDate = LatestPrediction_df.loc[0,'ElectionPredictionDate']
PredictionID = LatestPrediction_df.loc[0,'ElectionPredictionID']

In [6]:
PredictionCandidatesQuery = """SELECT con.ONSID, can.Constituency, Can.Party, epc.VoteShare*100 AS 'Predicted Share', epc.VoteShare*100-can.PreviousShare*100 As 'Change', can.PreviousShare*100 AS 'Previous Share' FROM ElectionPredictionCandidates AS epc
INNER JOIN Candidates AS can ON can.CandidateID = epc.CandidateID
INNER JOIN Constituencies AS con ON con.ConstituencyName = can.Constituency
WHERE ElectionPredictionID = '<PredictionID>'
ORDER BY can.Constituency ASC, epc.VoteShare DESC"""

In [7]:
PredictionConstituenciesQuery = """SELECT con.ONSID, epc.Constituency, epc.GAIN AS 'Gains', epc.LOSS AS 'Losses', epc.WinningParty AS 'Winning Party', epc.SecondParty AS 'Second Party',
epc.VoteShare*100 AS 'Winning Vote Share', epc.Majority*100 As 'Majority', epc.PreviousWinner As '2019 Winner (Nominal)', epc.Swing*100 As 'Swing'
FROM ElectionPredictionConstituencies AS epc
INNER JOIN Constituencies AS con ON con.ConstituencyName = epc.Constituency
WHERE ElectionPredictionID = '<PredictionID>'"""

In [8]:
PredictionCandidatesQuery = PredictionCandidatesQuery.replace("<PredictionID>",PredictionID)
PredictionConstituenciesQuery = PredictionConstituenciesQuery.replace("<PredictionID>",PredictionID)

In [9]:
PredictionSeatsTimeseriesQuery = """SELECT epm.ElectionPredictionDate, epo.Constituencies AS '<Party>' FROM ElectionPredictionOverall AS epo
INNER JOIN ElectionPredictionMeta AS epm ON epm.ElectionPredictionID = epo.ElectionPredictionID
WHERE epo.Party = '<Party>'
ORDER BY epm.ElectionPredictionDate DESC"""

In [10]:
PredictionShareTimeseriesQuery = """SELECT epm.ElectionPredictionDate, epo.VoteShare*100 AS '<Party>' FROM ElectionPredictionOverall AS epo
INNER JOIN ElectionPredictionMeta AS epm ON epm.ElectionPredictionID = epo.ElectionPredictionID
WHERE epo.Party = '<Party>'
ORDER BY epm.ElectionPredictionDate DESC"""

In [11]:
# Create the SeatsTimeseries_df and ShareTimeseries_df

# Create initial Timeseries_df just from the list of dates
SeatsTimeseries_df = pd.read_sql("SELECT ElectionPredictionDate FROM ElectionPredictionMeta ORDER BY ElectionPredictionDate DESC",conn)
ShareTimeseries_df = SeatsTimeseries_df.copy()

for CurrentParty in GBPartiesList:
    PredictionSeatsTimeseriesQueryParty = PredictionSeatsTimeseriesQuery.replace('<Party>',CurrentParty)
    MergeSeatsTimeSeries_df = pd.read_sql(PredictionSeatsTimeseriesQueryParty,conn)
    SeatsTimeseries_df = SeatsTimeseries_df.merge(MergeSeatsTimeSeries_df, how='left', on='ElectionPredictionDate')
    
    PredictionShareTimeseriesQueryParty = PredictionShareTimeseriesQuery.replace('<Party>',CurrentParty)
    MergeShareTimeSeries_df = pd.read_sql(PredictionShareTimeseriesQueryParty,conn)
    ShareTimeseries_df = ShareTimeseries_df.merge(MergeShareTimeSeries_df, how='left', on='ElectionPredictionDate')

In [12]:
UKShareQuery = """SELECT can.Party,
CAST(SUM(epcan.VoteShare)*100 AS FLOAT) / SUM(SUM(epcan.VoteShare)) OVER() AS 'VoteShare'
FROM ElectionPredictionCandidates AS epcan
INNER JOIN Candidates AS can ON can.CandidateID = epcan.CandidateID
INNER JOIN Constituencies AS con ON con.ConstituencyName = can.Constituency
WHERE ElectionPredictionID  = '<ElectionPredictionID>'
GROUP BY can.Party
ORDER BY can.Party"""

UKShareQuery = UKShareQuery.replace('<ElectionPredictionID>',PredictionID)

In [13]:
UKSeatsQuery = """SELECT par.PartyAbbreviation,
Count(epcon.WinningParty) AS 'Seats'
FROM Parties AS par
LEFT JOIN ElectionPredictionConstituencies AS epcon ON par.PartyAbbreviation = epcon.WinningParty
AND epcon.ElectionPredictionID  = '<ElectionPredictionID>'
GROUP BY par.PartyAbbreviation
ORDER BY par.PartyAbbreviation"""

UKSeatsQuery = UKSeatsQuery.replace('<ElectionPredictionID>',PredictionID)

In [14]:
GBShareQuery = """SELECT can.Party,
CAST(SUM(epcan.VoteShare)*100 AS FLOAT) / SUM(SUM(epcan.VoteShare)) OVER() AS 'VoteShare'
FROM ElectionPredictionCandidates AS epcan
INNER JOIN Candidates AS can ON can.CandidateID = epcan.CandidateID
INNER JOIN Constituencies AS con ON con.ConstituencyName = can.Constituency
WHERE ElectionPredictionID  = '<ElectionPredictionID>'
AND con.ITL1Region != 'Northern Ireland'
GROUP BY can.Party
ORDER BY can.Party"""

GBShareQuery = GBShareQuery.replace('<ElectionPredictionID>',PredictionID)

In [15]:
GBSeatsQuery = """SELECT par.PartyAbbreviation AS Party,
Count(epcon.WinningParty) AS 'Seats'
FROM ElectionPredictionConstituencies AS epcon
INNER JOIN Parties AS par ON par.PartyAbbreviation = epcon.WinningParty
INNER JOIN Constituencies AS con ON con.ConstituencyName = epcon.Constituency
WHERE con.ITL1Region != 'Northern Ireland' AND epcon.ElectionPredictionID  = '<ElectionPredictionID>'
GROUP BY par.PartyAbbreviation
ORDER BY par.PartyAbbreviation"""

GBSeatsQuery = GBSeatsQuery.replace('<ElectionPredictionID>',PredictionID)

In [16]:
RegionalShareQuery = """SELECT can.Party,
CAST(SUM(epcan.VoteShare)*100 AS FLOAT) / SUM(SUM(epcan.VoteShare)) OVER() AS 'VoteShare'
FROM ElectionPredictionCandidates AS epcan
INNER JOIN Candidates AS can ON can.CandidateID = epcan.CandidateID
INNER JOIN Constituencies AS con ON con.ConstituencyName = can.Constituency
WHERE con.ITL1Region = '<Region>' AND ElectionPredictionID  = '<ElectionPredictionID>'
GROUP BY con.ITL1Region, can.Party
ORDER BY can.Party"""

RegionalShareQuery = RegionalShareQuery.replace('<ElectionPredictionID>',PredictionID)

In [17]:
RegionalSeatsQuery = """SELECT par.PartyAbbreviation AS Party,
Count(epcon.WinningParty) AS 'Seats'
FROM ElectionPredictionConstituencies AS epcon
INNER JOIN Parties AS par ON par.PartyAbbreviation = epcon.WinningParty
INNER JOIN Constituencies AS con ON con.ConstituencyName = epcon.Constituency
WHERE con.ITL1Region = '<Region>' AND epcon.ElectionPredictionID  = '<ElectionPredictionID>'
GROUP BY par.PartyAbbreviation
ORDER BY par.PartyAbbreviation"""

RegionalSeatsQuery = RegionalSeatsQuery.replace('<ElectionPredictionID>',PredictionID)

In [18]:
UKSharePreT_df = pd.read_sql(UKShareQuery,conn)
UKShareList = UKSharePreT_df['VoteShare'].tolist()

UKSharePre_df = pd.DataFrame([UKShareList], columns=AllPartiesList)
UKShare_df = pd.DataFrame(columns=UKPartiesList)

UKShare_df = pd.concat([UKShare_df,UKSharePre_df],axis=0)
UKShare_df['Region'] = 'UK'

In [19]:
UKSeatsPreT_df = pd.read_sql(UKSeatsQuery,conn)
UKSeatsList = UKSeatsPreT_df['Seats'].tolist()

UKSeatsPre_df = pd.DataFrame([UKSeatsList], columns=AllPartiesList)
UKSeats_df = pd.DataFrame(columns=UKPartiesList)

UKSeats_df = pd.concat([UKSeats_df,UKSeatsPre_df],axis=0)
UKSeats_df['Region'] = 'UK'

In [20]:
# Create GB share data
GBSharePreT_df = pd.read_sql(GBShareQuery,conn)
GBShareList = GBSharePreT_df['VoteShare'].tolist()
GBQueryPartiesList = GBSharePreT_df['Party'].tolist()

GBSharePre_df = pd.DataFrame([GBShareList], columns=GBQueryPartiesList)
GBShare_df = pd.DataFrame(columns=GBPartiesList)

GBShare_df = pd.concat([GBShare_df,GBSharePre_df],axis=0)
GBShare_df['Region'] = 'GB'

In [21]:
# Create GB seats data
GBSeatsPreT_df = pd.read_sql(GBSeatsQuery,conn)
GBSeatsList = GBSeatsPreT_df['Seats'].tolist()
GBQueryPartiesList = GBSeatsPreT_df['Party'].tolist()

GBSeatsPre_df = pd.DataFrame([GBSeatsList], columns=GBQueryPartiesList)
GBSeats_df = pd.DataFrame(columns=GBPartiesList)

GBSeats_df = pd.concat([GBSeats_df,GBSeatsPre_df],axis=0)
GBSeats_df['Region'] = 'GB'

In [22]:
# Now cycle through every ITL1 region to get the vote shares

#Create new df
UKShare_df = pd.concat([UKShare_df,GBShare_df],axis=0)
UKSeats_df = pd.concat([UKSeats_df,GBSeats_df],axis=0)

AllRegionShare_df = UKShare_df.copy()
AllRegionSeats_df = UKSeats_df.copy()

for CurrentRegion in ITL1RegionList:
    RegionalShareQueryReplaced = RegionalShareQuery.replace('<Region>',CurrentRegion)
    RegionalSharePreT_df = pd.read_sql(RegionalShareQueryReplaced,conn)
    RegionalSharePartiesList = RegionalSharePreT_df['Party'].tolist()
    RegionalShareSharesList =RegionalSharePreT_df['VoteShare'].tolist()
    RegionalShare_df = pd.DataFrame([RegionalShareSharesList],columns=RegionalSharePartiesList)
    RegionalShare_df['Region'] = CurrentRegion
    AllRegionShare_df = pd.concat([AllRegionShare_df,RegionalShare_df],axis=0)
    
    RegionalSeatsQueryReplaced = RegionalSeatsQuery.replace('<Region>',CurrentRegion)
    RegionalSeatsPreT_df = pd.read_sql(RegionalSeatsQueryReplaced,conn)
    RegionalSeatsPartiesList = RegionalSeatsPreT_df['Party'].tolist()
    RegionalSeatsSeatssList =RegionalSeatsPreT_df['Seats'].tolist()
    RegionalSeats_df = pd.DataFrame([RegionalSeatsSeatssList],columns=RegionalSeatsPartiesList)
    RegionalSeats_df['Region'] = CurrentRegion
    AllRegionSeats_df = pd.concat([AllRegionSeats_df,RegionalSeats_df],axis=0)

AllRegionShare_df.reset_index(drop=True,inplace=True)
AllRegionShare_df = AllRegionShare_df.fillna(0)

AllRegionSeats_df.reset_index(drop=True,inplace=True)
AllRegionSeats_df = AllRegionSeats_df.fillna(0)

In [23]:
# Determine the swings between different parties
filename = "C:\\Users\\danmu\\Documents\\Elections\\2024_Python\\Flourish_Historical.xlsx"
HistoricalRegionShare_df = pd.read_excel(filename, sheet_name='Share')

AllSwings_df = AllRegionShare_df.merge(HistoricalRegionShare_df, how='left', on='Region')
AllSwings_df['Con To Lab Swing'] = ((AllSwings_df['Lab_x']-AllSwings_df['Lab_y'])-(AllSwings_df['Con_x']-AllSwings_df['Con_y']))/2
AllSwings_df['Con To Reform Swing'] = ((AllSwings_df['Reform_x']-AllSwings_df['Reform_y'])-(AllSwings_df['Con_x']-AllSwings_df['Con_y']))/2
AllSwings_df['Con To LD Swing'] = ((AllSwings_df['LD_x']-AllSwings_df['LD_y'])-(AllSwings_df['Con_x']-AllSwings_df['Con_y']))/2
AllSwings_df['SNP To Lab Swing'] = ((AllSwings_df['Lab_x']-AllSwings_df['Lab_y'])-(AllSwings_df['SNP_x']-AllSwings_df['SNP_y']))/2

ConToLabSwingPreT_df = AllSwings_df[['Region','Con To Lab Swing']].copy()
ConToLabSwingPreT_df.set_index('Region',inplace=True)
ConToLabSwing_df = ConToLabSwingPreT_df.transpose()

ConToLDSwingPreT_df = AllSwings_df[['Region','Con To LD Swing']].copy()
ConToLDSwingPreT_df.set_index('Region',inplace=True)
ConToLDSwing_df = ConToLDSwingPreT_df.transpose()

ConToReformSwingPreT_df = AllSwings_df[['Region','Con To Reform Swing']].copy()
ConToReformSwingPreT_df.set_index('Region',inplace=True)
ConToReformSwing_df = ConToReformSwingPreT_df.transpose()

In [28]:
SNPToLabSwingPreT_df = AllSwings_df[['Region','SNP To Lab Swing']].copy()
SNPToLabSwingPreT_df.set_index('Region',inplace=True)
SNPToLabSwing_df = SNPToLabSwingPreT_df.transpose()
SNPToLabSwing_df = SNPToLabSwing_df['Scotland'].copy()
SNPToLabSwing_df 

SNP To Lab Swing    16.74156
Name: Scotland, dtype: float64

In [24]:
Candidates_df = pd.read_sql(PredictionCandidatesQuery,conn)
Constituencies_df = pd.read_sql(PredictionConstituenciesQuery,conn)

In [30]:
with pd.ExcelWriter(r'C:\Users\danmu\Documents\Elections\2024_Python\Flourish_Export.xlsx') as writer:  
    Candidates_df.to_excel(writer, sheet_name='Candidates',index=False)
    Constituencies_df.to_excel(writer, sheet_name='Constituencies',index=False)
    SeatsTimeseries_df.to_excel(writer, sheet_name='SeatsTimeseries',index=False)
    ShareTimeseries_df.to_excel(writer, sheet_name='ShareTimeseries',index=False)
    AllRegionShare_df.to_excel(writer, sheet_name='Share',index=False)
    AllRegionSeats_df.to_excel(writer, sheet_name='Seats',index=False)
    LatestPrediction_df.to_excel(writer, sheet_name='LatestPrediction',index=False)
    ConToLabSwing_df.to_excel(writer, sheet_name='ConToLabSwing',index=True)
    ConToLDSwing_df.to_excel(writer, sheet_name='ConToLDSwing',index=True)
    ConToReformSwing_df.to_excel(writer, sheet_name='ConToReformSwing',index=True)
    SNPToLabSwing_df.to_excel(writer, sheet_name='SNPtoLabSwing',index=True)

In [26]:
#Close the connection with the database
conn.close()