Python Jupyter Notebook that combines two previously separated algorithm for:
1. Taking imported raw polls and converting to vote shares for every candidate in every constituency
2. Analysing election results for a range of specified dates

In [1]:
# Algorithm details
AlgorithmName = "CoreComboPollElectionAlogrithm"
AlgorithmVersion = "1_0"
AlgorithmDate = "20240703"

# Version 0_1 New combined 'core' and 'MRP' algorithms taking out the MRPs to be used in a different analysis
# Version 1_0 Fully tested version of the core and MRP combined algorithm

# Construct PollAnalysisAlogrithm string
ElectionAnalysisAlgorithm = AlgorithmName + "_" + AlgorithmVersion + "_" + AlgorithmDate
PollAnalysisAlgorithm = AlgorithmName + "_" + AlgorithmVersion + "_" + AlgorithmDate

In [2]:
# Import required modules
import pandas as pd
import datetime
from datetime import timedelta
import numpy as np
import pyodbc
import sqlalchemy
from sqlalchemy import create_engine
import urllib

In [3]:
# Set core variables for the rest of the algorithm
ElectionDate = datetime.date(2024, 7, 4) # Set the date of the election

AutoStartDateFlag = True # if true, use the next date after the last date analysed in the database
# OR the date of the earliest poll to added, whichever is sooner
AutoEndDateFlag = True # if true, use the date of the last poll

ManualStartDate = datetime.date(2024, 1, 1) # start date of election analyses if auto is not used
ManualEndDate = datetime.date(2024, 1, 15) # end date of election analyses if auto is not used
EarliestStartDate = datetime.date(2024, 1, 1) # the earliest possible start date that the election analysis will use

DeleteAnalysesFlag = True # if true, already existing analyses will be delete

DetailedRankThreshold = 5 # Variable for identifying from a poll's rank, whether it is a 'national' or 'detailed' poll

In [4]:
#Connect to database 'UK_General_Election' using SQlAlchemy
connection_str = "DRIVER={SQL SERVER};SERVER=DANZPOOTA;DATABASE=UK_General_Election;TRUSTED_CONNECTION=YES"
params = urllib.parse.quote_plus(connection_str)
engine = create_engine('mssql+pyodbc:///?odbc_connect=%s' % params)
conn = engine.connect()

In [5]:
# Determine the start and end date for the election analysis loop
if AutoStartDateFlag == False:
    StartDate = ManualStartDate
else:
    LastAnalysisDateQuery = "SELECT TOP(1) ElectionPredictionDate FROM ElectionPredictionMeta ORDER BY ElectionPredictionDate DESC"
    LastAnalysisDateList = [i[0] for i in engine.execute(LastAnalysisDateQuery)]
    
    try:
        LastAnalysisDate = LastAnalysisDateList[0]
    except:
        LastAnalysisDate = ""
    
    EarliestNewPollQuery = "SELECT TOP(1) pm.PollDate from PollMeta AS pm LEFT JOIN PollAnalysisMeta AS pam ON pm.PollID = pam.PollID WHERE pam.PollID IS NULL ORDER BY pm.PollDate ASC"
    EarliestNewPollList = [i[0] for i in engine.execute(EarliestNewPollQuery)]
    try:
        EarliestNewPollDate = EarliestNewPollList[0]
    except:
        EarliestNewPollDate = ""
    
    if LastAnalysisDate == "":
        StartDate = pd.to_datetime(EarliestStartDate).date()
    elif LastAnalysisDate < EarliestNewPollDate or EarliestNewPollDate == "":
        StartDate = pd.to_datetime(LastAnalysisDate).date() + timedelta(days=1)
    elif pd.to_datetime(EarliestNewPollDate).date() > EarliestStartDate:    
        StartDate = pd.to_datetime(EarliestNewPollDate).date()
    else:
        StartDate = pd.to_datetime(EarliestStartDate).date()
    
if AutoEndDateFlag == False:
    EndDate = ManualEndDate
else:
    LastPollDateQuery = """SELECT TOP(1) PollDate from PollMeta Order by PollDate DESC"""
    LastPollDateList = [i[0] for i in engine.execute(LastPollDateQuery)]
    LastPollDate = LastPollDateList[0]
    EndDate = pd.to_datetime(LastPollDate).date()
    
print(StartDate)
print(EndDate)

2024-07-03
2024-07-03


In [6]:
# Get list of unanalysed polls to the loop around
PollsListQuery = "SELECT A.PollID from PollMeta A LEFT JOIN PollAnalysisMeta B ON A.PollID = B.PollID WHERE B.PollID IS NULL"
PollsList = [i[0] for i in engine.execute(PollsListQuery)]
PollsList

['20240703JL PartnersITL1Region-NINAAll']

In [7]:
%%time

# Main algorithm loop to convert poll vote shres into candidate vote shares
for PollID in PollsList:
    
    # Get the Poll Meta data from the database    
    PollMetaQuery = "SELECT * From PollMeta WHERE PollID = '<PollID>'"
    PollMetaQuery = PollMetaQuery.replace("<PollID>",PollID)
    PollMeta_df = pd.read_sql(PollMetaQuery,conn)
    
    # Get key variables for polls
    PollType = PollMeta_df.at[0,'PollType']
    PollScope = PollMeta_df.at[0,'PollScope']    
    
    #Query for generating list of applicable poll regions
    AllRegionsQuery = "SELECT RegionName FROM RegionRegionTypes WHERE RegionType = '<PollType>'"
    
    # Get from database all of the region vote shares relating to the PollID
    PollSharesQuery = "SELECT PollDetailsID, RegionName, Constituency, Party, VoteShare AS PollShare FROM PollDetails WHERE PollID = '<PollID>' ORDER BY RegionName, Party"
    PollSharesQuery = PollSharesQuery.replace("<PollID>",PollID)
    PollShares_df = pd.read_sql(PollSharesQuery,conn)
    
    # Check to see if it is a constituency only poll as this needs to be handled very differently
    # The alogrithm cannot be allowed to look for region shares as it will not find any for a constituency poll
    if PollType == "Constituency":
        ConstituencyPollFlag = True
        MRPPollFlag = False

        PreviousRegionSharesQuery = "SELECT CandidateID, Constituency, Party, PreviousShare FROM Candidates WHERE Constituency = '<Constituency>'"
        PreviousRegionSharesQuery = PreviousRegionSharesQuery.replace("<Constituency>",PollScope)
        PreviousRegionShares_df = pd.read_sql(PreviousRegionSharesQuery,conn)
        
        ConstituencyPollRegion = PollShares_df.at[0,'RegionName']      
        
        # Join the dataframes on 'Party'
        PreviousRegionShares_df = PreviousRegionShares_df.merge(PollShares_df[['Party','PollDetailsID','PollShare']], how='left', on='Party')

        # Determine swing
        PreviousRegionShares_df['Swing'] = PreviousRegionShares_df['PollShare'] - PreviousRegionShares_df['PreviousShare']
        
        ConstituencyShares_df = PreviousRegionShares_df.copy()
        ConstituencyShares_df['RegionName'] = ConstituencyShares_df['Constituency']
        
    elif PollType == "MRP632" or PollType == "MRP631":
        MRPPollFlag = True
        ConstituencyPollFlag = False
        
        PreviousRegionSharesQuery = """SELECT can.CandidateID, can.Constituency, can.Party, can.PreviousShare FROM Candidates AS can
        INNER JOIN Constituencies AS con ON con.ConstituencyName = can.Constituency
        INNER JOIN RegionConstituencies AS rc ON rc.ConstituencyName = con.ConstituencyName
        WHERE rc.RegionName = '<PollType>'"""
        PreviousRegionSharesQuery = PreviousRegionSharesQuery.replace("<PollType>",PollType)
        PreviousRegionShares_df = pd.read_sql(PreviousRegionSharesQuery,conn)
        
        PollShares_df['CandidateID'] = PollShares_df['Constituency'] + PollShares_df['Party']
        
        PreviousRegionShares_df = PreviousRegionShares_df.merge(PollShares_df[['CandidateID','PollDetailsID','PollShare']], how='left', on='CandidateID')
        
        PreviousRegionShares_df['Swing'] = PreviousRegionShares_df['PollShare'] - PreviousRegionShares_df['PreviousShare']
        
        ConstituencyShares_df = PreviousRegionShares_df.copy()
        ConstituencyShares_df['RegionName'] = ConstituencyShares_df['Constituency']
        
    else:
        ConstituencyPollFlag = False
        MRPPollFlag = False
        
        # Generate a list of all of the regions applicable to a poll when it is not a constituency one
        if PollScope == "All":
            AllRegionsQuery = AllRegionsQuery.replace("<PollType>",PollType)
            RegionsList = [i[0] for i in engine.execute(AllRegionsQuery)]
        else:
            RegionsList = [PollScope]     

        # Query for the region vote shares from the previous election
        PreviousRegionSharesQuery = """SELECT r.RegionName, can.Party, SUM(can.PreviousVotes) AS 'TotalVotes',
        CAST(SUM(can.PreviousVotes) AS FLOAT) / SUM(SUM(can.PreviousVotes)) OVER() AS 'RawPreviousShare',
        SUM(can.PreviousStanding) AS 'PreviousCandidates',
        SUM(can.CurrentStanding) AS 'CurrentCandidates'
        FROM Candidates AS can
        INNER JOIN Constituencies as con ON con.ConstituencyName = can.Constituency
        INNER JOIN RegionConstituencies AS rc ON  rc.ConstituencyName = con.ConstituencyName
        INNER JOIN Regions AS r ON r.RegionName = rc.RegionName
        INNER JOIN RegionRegionTypes AS rrt ON rrt.RegionName = r.RegionName
        WHERE r.RegionName = '<RegionName>' AND rrt.RegionType = '<RegionType>'
        GROUP BY r.RegionName, can.Party
        ORDER BY r.RegionName, can.Party"""

        PreviousRegionSharesQuery = PreviousRegionSharesQuery.replace("<RegionType>",PollType)

        # Get from database all of the vote shares for the poll regions from the previous election
        PreviousRegionShares_df = pd.DataFrame(columns=["RegionName","Party","TotalVotes","RawPreviousShare","PreviousCandidates","CurrentCandidates"])

        # Loop through all regions applicable to this particular poll
        for Region in RegionsList:
            # Run query for the raw previous region shares
            ModRegionSharesQuery = PreviousRegionSharesQuery.replace("<RegionName>",Region)
            IndRegionShares_df = pd.read_sql(ModRegionSharesQuery,conn)  

            # Join this particular region's numbers to the overall dataframe for this poll
            PreviousRegionShares_df = pd.concat([PreviousRegionShares_df,IndRegionShares_df],axis=0)

        # Reset the index column
        PreviousRegionShares_df.reset_index(drop=True,inplace=True)

        # Replace all 0 values with 1000 to ensure the new vote share can be calculated without dividing by zero
        PreviousRegionShares_df['PreviousCandidates'].replace(0,1000,inplace=True)

        #Calculated the adjusted previous share based on the number of candidates actually standing
        PreviousRegionShares_df['PreviousShare'] = PreviousRegionShares_df['RawPreviousShare'] * PreviousRegionShares_df['CurrentCandidates']/PreviousRegionShares_df['PreviousCandidates']

        # Revert previous candidates back to zero
        PreviousRegionShares_df['PreviousCandidates'].replace(1000,0,inplace=True)    
    
        # Create a column to join the two dataframes
        PollShares_df['RegionParty'] = PollShares_df['RegionName'] + PollShares_df['Party']
        PreviousRegionShares_df['RegionParty'] = PreviousRegionShares_df['RegionName'] + PreviousRegionShares_df['Party']
        
        # Join the dataframes on 'RegionParty'
        PreviousRegionShares_df = PreviousRegionShares_df.merge(PollShares_df[['PollDetailsID','RegionParty','PollShare']], how='left', on='RegionParty')

        # Determine swing
        PreviousRegionShares_df['Swing'] = PreviousRegionShares_df['PollShare'] - PreviousRegionShares_df['PreviousShare']  

        # Query for pulling out previous election shares for applicable constituencies
        CandidatesQuery = """SELECT can.CandidateID, r.RegionName, can.Constituency, can.Party, can.PreviousShare
        FROM Candidates AS can
        INNER JOIN Constituencies as con ON con.ConstituencyName = can.Constituency
        INNER JOIN RegionConstituencies AS rc ON  rc.ConstituencyName = con.ConstituencyName
        INNER JOIN Regions AS r ON r.RegionName = rc.RegionName
        INNER JOIN RegionRegionTypes AS rrt ON rrt.RegionName = r.RegionName
        WHERE r.RegionName = '<RegionName>' AND rrt.RegionType = '<RegionType>'
        AND can.CurrentStanding = 1
        ORDER BY can.Constituency, can.Party"""

        CandidatesQuery = CandidatesQuery.replace("<RegionType>",PollType)

        # Calculate the swings for every candidate in every constituency
        ConstituencyShares_df = pd.DataFrame(columns=["CandidateID","RegionName","Constituency","Party","PreviousShare","NewShareRaw","VoteShare"])
        IndConstituencyShares_df = pd.DataFrame(columns=["CandidateID","RegionName","Constituency","Party","PreviousShare","NewShareRaw","VoteShare"])

        # Need to cycle through every region to pull out the candidates for each in turn, then get the swing for each
        for Region in RegionsList:

            ModCandidatesQuery = CandidatesQuery.replace("<RegionName>",Region)
            IndConstituencyShares_df = pd.read_sql(ModCandidatesQuery,conn)   

            ConstituencyShares_df = pd.concat([ConstituencyShares_df,IndConstituencyShares_df],axis=0)

        ConstituencyShares_df.reset_index(drop=True,inplace=True)
    
        # Create 'RegionParty' to allow merging with the previous region shares dataframe
        ConstituencyShares_df['RegionParty'] = ConstituencyShares_df['RegionName'] + ConstituencyShares_df['Party']
        
        # Join the dataframes on 'RegionParty'
        ConstituencyShares_df = ConstituencyShares_df.merge(PreviousRegionShares_df[['PollDetailsID','RegionParty','Swing']], how='left', on='RegionParty')
                
    # Exit of Constituency poll IF statement    
    # Ensure the datatypes are numeric of the columns to be used in the calculation
    ConstituencyShares_df["PreviousShare"] = pd.to_numeric(ConstituencyShares_df["PreviousShare"])
    ConstituencyShares_df["Swing"] = pd.to_numeric(ConstituencyShares_df["Swing"])

    ConstituencyShares_df["NewShareRaw"] = ConstituencyShares_df["PreviousShare"] + ConstituencyShares_df["Swing"] 
    ConstituencyShares_df["NewShareRaw"] = np.where(ConstituencyShares_df["NewShareRaw"] < 0, 0,ConstituencyShares_df["NewShareRaw"])

    # Determine the factor needed to ensure vote shares for each constituency sum to 1
    ConstituencyShares_df['ConstRawShareTotals'] = ConstituencyShares_df['NewShareRaw'].groupby(ConstituencyShares_df['Constituency']).transform('sum')

    # Modify the raw vote shares to ensure they sum to 1
    ConstituencyShares_df['VoteShare'] = ConstituencyShares_df['NewShareRaw']/ConstituencyShares_df['ConstRawShareTotals']

    ConstituencyShares_df['VoteShareCheck'] = ConstituencyShares_df['VoteShare'].groupby(ConstituencyShares_df['Constituency']).transform('sum')

    # Create PollAnalysisMeta details for inserting into database
    PollAnalysisMeta_df = pd.DataFrame(columns=["PollID","PollAnalysisDate","PollAnalysisAlgorithm"])

    PollAnalysisMeta_df.at[0,"PollID"] = PollID
    PollAnalysisMeta_df.at[0,"PollAnalysisAlgorithm"] = PollAnalysisAlgorithm

    # The date of the analysis is always today's date
    PollAnalysisMeta_df.at[0,"PollAnalysisDate"] = datetime.date.today()

    PollAnalysisMeta_df.to_sql('PollAnalysisMeta', conn, if_exists='append', index=False)

    # Initial poll analysis values are now inserted into the database to allow these to be queried for the constituency shares
    PollAnalysisRegions_df = PreviousRegionShares_df[['PollDetailsID','Swing']].copy()

    # Get the recently inserted PollAnalysis ID from the database
    PollAnalysisIDQuery = "SELECT PollAnalysisID FROM PollAnalysisMeta WHERE PollID = '<PollID>'"
    PollAnalysisIDQuery = PollAnalysisIDQuery.replace("<PollID>",PollID)

    PollAnalysisID = [i[0] for i in engine.execute(PollAnalysisIDQuery)][0]
    PollAnalysisRegions_df['PollAnalysisID'] = PollAnalysisID

    PollAnalysisRegions_df.to_sql('PollAnalysisRegions', conn, if_exists='append', index=False)
    
    # Create the dataframe for insertion into the database and insert
    ConstituencyShares_df['PollAnalysisRegionID'] = ConstituencyShares_df['PollDetailsID'] + PollAnalysisID
    PollAnalysisConstituencies_df = ConstituencyShares_df[['PollAnalysisRegionID','CandidateID','VoteShare']]
    PollAnalysisConstituencies_df.to_sql('PollAnalysisConstituencies', conn, if_exists='append', index=False)

CPU times: total: 46.9 ms
Wall time: 1.45 s


Separation between the previoulsy segragated poll analysis and election analysis algorithms

In [8]:
DeleteAnalysisQueryRaw = """DELETE eppu FROM ElectionPredictionPollsUsed AS eppu
INNER JOIN ElectionPredictionMeta AS epm ON epm.ElectionPredictionID = eppu.ElectionPredictionID
WHERE epm.ElectionPredictionDate = '<ElectionPredictionDate>' AND epm.ElectionPredictionID LIKE '%<Type>%'

DELETE epc FROM ElectionPredictionCandidates AS epc
INNER JOIN ElectionPredictionMeta AS epm ON epm.ElectionPredictionID = epc.ElectionPredictionID
WHERE epm.ElectionPredictionDate = '<ElectionPredictionDate>' AND epm.ElectionPredictionID LIKE '%<Type>%'

DELETE epcon FROM ElectionPredictionConstituencies AS epcon
INNER JOIN ElectionPredictionMeta AS epm ON epm.ElectionPredictionID = epcon.ElectionPredictionID
WHERE epm.ElectionPredictionDate = '<ElectionPredictionDate>' AND epm.ElectionPredictionID LIKE '%<Type>%'

DELETE epo FROM ElectionPredictionOverall AS epo
INNER JOIN ElectionPredictionMeta AS epm ON epm.ElectionPredictionID = epo.ElectionPredictionID
WHERE epm.ElectionPredictionDate = '<ElectionPredictionDate>' AND epm.ElectionPredictionID LIKE '%<Type>%'

DELETE FROM ElectionPredictionMeta WHERE ElectionPredictionDate = '<ElectionPredictionDate>' AND ElectionPredictionID LIKE '%<Type>%'"""

In [9]:
%%time
# Combined Election Analysis

# Setup variables for the loop
ElectionPredictionDate = StartDate
Delta = timedelta(days=1)

# Loop around all of the analysis dates
while ElectionPredictionDate <= EndDate:    
    #print("Start:",ElectionPredictionDate)

    DaysToElection = ElectionDate - ElectionPredictionDate
    if DaysToElection.days <= 35:
        NationalValidPeriod = 7
        DetailedValidPeriod = 35
    else:
        NationalValidPeriod = 30
        DetailedValidPeriod = 100

    #print("Prediction Date:",ElectionPredictionDate,"Days To Election:",DaysToElection.days,"NationalValidPeriod:",NationalValidPeriod)

    #Check if there is already an analysis for this analysis date
    ElectionPredictionDateStr = datetime.datetime.strftime(ElectionPredictionDate, '%Y%m%d')
    AnalysisExistsQuery = "SELECT Count(ElectionPredictionID) FROM ElectionPredictionMeta WHERE ElectionPredictionDate = '<ElectionPredictionDate>' AND ElectionPredictionID LIKE '%Core%'"
    AnalysisExistsQuery = AnalysisExistsQuery.replace('<ElectionPredictionDate>',ElectionPredictionDateStr)
    AnalysisExistsList = [i[0] for i in engine.execute(AnalysisExistsQuery)]
    AnalysisExistsInt = AnalysisExistsList[0]
    #print(AnalysisExistsInt)

    if AnalysisExistsInt < 1 or DeleteAnalysesFlag == True:

        if AnalysisExistsInt >=1:
            # Delete the analysis
            DeleteAnalysisQueryType = DeleteAnalysisQueryRaw.replace('<Type>','Core')
            DeleteAnalysisQuery = DeleteAnalysisQueryType.replace('<ElectionPredictionDate>',ElectionPredictionDateStr)
            engine.execute(DeleteAnalysisQuery)

        # Get the list of polls that have actually been analysed and incorporated into the database
        AnalysedPollsQuery = """SELECT pam.PollID, pm.Pollster, pm.PollType, pm.PollScope, pm.PollDate, rt.RegionTypeRank  FROM PollAnalysisMeta AS pam
        INNER JOIN PollMeta AS pm ON pm.PollID = pam.PollID
        INNER JOIN RegionTypes AS rt ON rt.RegionType = pm.PollType
        WHERE pm.PollType NOT LIKE '%MRP%'"""

        AnalysedPolls_df = pd.read_sql(AnalysedPollsQuery,conn)

        # Add prediction date and convert to datetime date so that it can be used in a calcualtion
        AnalysedPolls_df['PredictionDate'] = ElectionPredictionDate
        AnalysedPolls_df['PredictionDate'] = pd.to_datetime(AnalysedPolls_df['PredictionDate'])

        # Convert the date column to datetime type
        AnalysedPolls_df['PollDate'] = pd.to_datetime(AnalysedPolls_df['PollDate'])

        # Determine how many days from the prediction date a poll was taken
        AnalysedPolls_df['DateDelta'] = AnalysedPolls_df['PredictionDate'] - AnalysedPolls_df['PollDate']

        # Assign a rank to each poll
        AnalysedPolls_df['PollRank'] = np.where(AnalysedPolls_df['PollScope']=='All',AnalysedPolls_df['RegionTypeRank'],AnalysedPolls_df['RegionTypeRank']-1)

        # Determine the applicability of each poll based on the day delta
        AnalysedPolls_df['PollApplicability'] = np.where((AnalysedPolls_df['DateDelta'] <= pd.Timedelta(DetailedValidPeriod, unit="d")) \
                                                         & (AnalysedPolls_df['DateDelta'] >= pd.Timedelta(0, unit="d")) \
                                                         & (AnalysedPolls_df['PollRank'] <= DetailedRankThreshold),1, \
                                                         np.where((AnalysedPolls_df['DateDelta'] <= pd.Timedelta(NationalValidPeriod, unit="d")) \
                                                         & (AnalysedPolls_df['DateDelta'] >= pd.Timedelta(0, unit="d")) \
                                                         & (AnalysedPolls_df['PollRank'] > DetailedRankThreshold),1,0))

        # Copy the polls that fall within the date delta to a new data frame
        ApplicablePolls_df = AnalysedPolls_df[AnalysedPolls_df['PollApplicability'] == 1].copy()
        ApplicablePolls_df.reset_index(drop=True,inplace=True)

        # Check for duplicate polls of the same type
        # Create column that would contain the info to check for duplicates
        ApplicablePolls_df['DuplicateCheck'] = ApplicablePolls_df['Pollster'] + ApplicablePolls_df['PollType'] + ApplicablePolls_df['PollScope'] 

        # The dataframe needs to be sorted to ensure the latest poll is kept and older duplicates are removed
        ApplicablePolls_df.sort_values(by='DateDelta',inplace=True)

        # Drop duplicates
        ApplicablePolls_df.drop_duplicates(subset=['DuplicateCheck'],inplace=True)

        # Reset the dataframe index after being sorted and duplicates removed
        ApplicablePolls_df.reset_index(drop=True,inplace=True)

        # Main algorithm to create resultant candidate shares
        # Create dataframe with every candidate as rows
        SelectCandidatesQuery = "SELECT can.CandidateID, can.Constituency, can.Party, can.PreviousShare FROM Candidates AS can WHERE can.CurrentStanding = 1"
        Candidates_df = pd.read_sql(SelectCandidatesQuery,conn)

        # Descending Loop around every rank
        # start point = max rank from applicable polls df
        MaxRank = ApplicablePolls_df['PollRank'].max()
        MinRank = ApplicablePolls_df['PollRank'].min()

        # Boolean to record if this is the first run through for the running total
        FirstRun = True

        for Rank in range(MaxRank,MinRank-1,-1):

            # Create df from all polls relating to the current rank
            # First create list of applicable polls
            RankApplicablePolls_df = ApplicablePolls_df[ApplicablePolls_df['PollRank'] == Rank].copy()
            RankApplicablePolls = RankApplicablePolls_df['PollID'].tolist()

            for PollID in RankApplicablePolls:
                # print(Rank,":",PollID)        

                # Query for getting the poll analysis details
                SelectPollDetailsQuery = """SELECT pac.CandidateID, pac.VoteShare AS '<PollID>' FROM PollAnalysisConstituencies AS pac
                INNER JOIN PollAnalysisRegions AS par on par.PollAnalysisRegionID = pac.PollAnalysisRegionID
                INNER JOIN PollAnalysisMeta AS pam ON pam.PollAnalysisID = par.PollAnalysisID
                WHERE pam.PollID = '<PollID>'
                ORDER BY pac.CandidateID"""

                # Insert the PollID into the query
                SelectPollDetailsQuery = SelectPollDetailsQuery.replace("<PollID>",PollID)

                # Pull query into a data frame for the results
                PollAnalysis_df = pd.read_sql(SelectPollDetailsQuery,conn)

                # Merge with Candidates_df
                # PreviousRegionShares_df = PreviousRegionShares_df.merge(PollRegionShares_df[['PollDetailsID','RegionParty','PollShare']], how='left', on='RegionParty')
                Candidates_df = Candidates_df.merge(PollAnalysis_df, how='left', on='CandidateID')

            # Average the columns for each row (candidate) for this rank
            if RankApplicablePolls != []:
                if FirstRun == True:
                    Candidates_df[Rank] = Candidates_df[RankApplicablePolls].mean(axis=1)
                    # Set the first run to false now that the loop has been gone though
                    FirstRun = False
                else:
                    RankApplicablePolls.append(PreviousRank)
                    Candidates_df[Rank] = Candidates_df[RankApplicablePolls].mean(axis=1)

                # Record the last rank to be evaluated        
                PreviousRank = Rank

        Candidates_df['VoteShareRaw'] = Candidates_df[Rank].copy()

        # Determine the factor needed to ensure vote shares for each constituency sum to 1
        Candidates_df['ConstRawShareTotals'] = Candidates_df['VoteShareRaw'].groupby(Candidates_df['Constituency']).transform('sum')

        # Modify the raw vote shares to ensure they sum to 1
        Candidates_df['VoteShare'] = Candidates_df['VoteShareRaw']/Candidates_df['ConstRawShareTotals']

        Candidates_df['VoteShareCheck'] = Candidates_df['VoteShare'].groupby(Candidates_df['Constituency']).transform('sum')

        Candidates_df['VoteShareChange'] = Candidates_df['VoteShare'] - Candidates_df['PreviousShare']

        # Determine the constituency winners
        # https://stackoverflow.com/questions/15705630/get-the-rows-which-have-the-max-value-in-groups-using-groupby
        #df.sort_values('count').drop_duplicates(['Sp', 'Mt'], keep='last')
        ConstWinners_df = Candidates_df[['Constituency','Party','VoteShare','VoteShareChange']].copy()
        ConstWinners_df = ConstWinners_df.sort_values(['Constituency','VoteShare'],ascending = [True, False])
        ConstWinners_df['Majority'] = ConstWinners_df['VoteShare'] - ConstWinners_df['VoteShare'].shift(-1)
        ConstWinners_df['SecondParty'] = ConstWinners_df['Party'].shift(-1)

        # Calculate the swing for each winner
        ConstWinners_df['Swing'] = (ConstWinners_df['VoteShareChange'] - ConstWinners_df['VoteShareChange'].shift(-1))/2

        # Keep only the winners of each constituency
        ConstWinners_df = ConstWinners_df.sort_values('VoteShare').drop_duplicates(['Constituency'], keep='last')
        ConstWinners_df = ConstWinners_df.sort_values('Constituency')

        ConstWinners_df.reset_index(drop=True,inplace=True)

        # Determine how many constituencies each party has won
        OverallResults_df = ConstWinners_df[['Party','Constituency']].groupby('Party').count()
        OverallResults_df.reset_index(drop=False,inplace=True)

        # Determine total vote shares for each party
        OverallVoteShare_df = Candidates_df[['Party','VoteShare']].copy()
        OverallVoteShare_df['PartyShare'] = OverallVoteShare_df['VoteShare'].groupby(OverallVoteShare_df['Party']).transform('sum')
        OverallVoteShare_df = OverallVoteShare_df.drop_duplicates(['Party'], keep='last')
        OverallVoteShare_df.drop(['VoteShare'], axis=1, inplace=True)
        OverallVoteShare_df.sort_values(by='PartyShare', ascending=False, inplace=True)
        OverallVoteShare_df['PartyShare'] = OverallVoteShare_df['PartyShare']/OverallVoteShare_df['PartyShare'].sum()
        OverallVoteShare_df.reset_index(drop=True,inplace=True)

        # Combine the overall vote shares and constituency numbers into one dataframe
        Overall_df = OverallVoteShare_df.merge(OverallResults_df, how='left', on='Party')
        Overall_df = Overall_df.fillna(0)
        Overall_df.sort_values(by='Constituency', ascending=False, inplace=True)
        Overall_df.reset_index(drop=True,inplace=True)

        # Election prediction analysis date is today
        ElectionAnalysisDate = datetime.date.today()

        # Create the election prediction ID for the database
        ElectionPredictionID = datetime.datetime.strftime(ElectionAnalysisDate, '%Y%m%d') + datetime.datetime.strftime(ElectionPredictionDate, '%Y%m%d') + ElectionAnalysisAlgorithm

        # Create the ElectionPredictionMeta table content for this analysis
        ElectionPredictionMeta_df = pd.DataFrame(columns=["ElectionAnalysisDate","ElectionPredictionDate","ElectionAnalysisAlgorithm"])
        ElectionPredictionMeta_df.at[0,"ElectionAnalysisDate"] = ElectionAnalysisDate
        ElectionPredictionMeta_df.at[0,"ElectionPredictionDate"] = ElectionPredictionDate
        ElectionPredictionMeta_df.at[0,"ElectionAnalysisAlgorithm"] = ElectionAnalysisAlgorithm

        # Create the ElectionPredictionPolls used content
        ElectionPredictionPollsUsed_df = pd.DataFrame(columns=['ElectionPredictionID','PollID'])
        ElectionPredictionPollsUsed_df['PollID'] = ApplicablePolls_df['PollID'].copy()
        ElectionPredictionPollsUsed_df['ElectionPredictionID'] = ElectionPredictionID

        # Create ElectionPredictionCandidates data
        ElectionPredictionCandidates_df = pd.DataFrame(columns=['ElectionPredictionID','CandidateID','VoteShare'])
        ElectionPredictionCandidates_df[['CandidateID','VoteShare']] = Candidates_df[['CandidateID','VoteShare']].copy()
        ElectionPredictionCandidates_df['ElectionPredictionID'] = ElectionPredictionID

        # Create ElectionPredictionConstituencies data
        ElectionPredictionConstituencies_df = pd.DataFrame(columns=['ElectionPredictionID','Constituency','WinningParty','SecondParty','VoteShare','Majority','GAIN','LOSS','Swing'])
        ElectionPredictionConstituencies_df[['Constituency','WinningParty','SecondParty','VoteShare','Majority','Swing']] = ConstWinners_df[['Constituency','Party','SecondParty','VoteShare','Majority','Swing']].copy()

        PreviousWinnersQuery = "SELECT ConstituencyName AS 'Constituency', FirstParty AS 'PreviousWinner' FROM Constituencies"
        PreviousWinners_df = pd.read_sql(PreviousWinnersQuery,conn)
        ElectionPredictionConstituencies_df = ElectionPredictionConstituencies_df.merge(PreviousWinners_df, how='left', on='Constituency')
        ElectionPredictionConstituencies_df['GAIN'] = np.where(ElectionPredictionConstituencies_df['WinningParty'] == ElectionPredictionConstituencies_df['PreviousWinner'],ElectionPredictionConstituencies_df['WinningParty']+" HOLD",ElectionPredictionConstituencies_df['WinningParty']+" GAIN")
        ElectionPredictionConstituencies_df['LOSS'] = np.where(ElectionPredictionConstituencies_df['WinningParty'] == ElectionPredictionConstituencies_df['PreviousWinner'],ElectionPredictionConstituencies_df['PreviousWinner']+" HOLD",ElectionPredictionConstituencies_df['PreviousWinner']+" LOSS")
        ElectionPredictionConstituencies_df['ElectionPredictionID'] = ElectionPredictionID

        # Create ElectionPredictionOverall data
        ElectionPredictionOverall_df = pd.DataFrame(columns=['ElectionPredictionID','Party','VoteShare','Constituencies'])
        ElectionPredictionOverall_df[['Party','VoteShare','Constituencies']] = Overall_df[['Party','PartyShare','Constituency']].copy()
        ElectionPredictionOverall_df['Constituencies'] = ElectionPredictionOverall_df['Constituencies'].astype('int')
        ElectionPredictionOverall_df['ElectionPredictionID'] = ElectionPredictionID

        # Insert data into the database
        ElectionPredictionMeta_df.to_sql('ElectionPredictionMeta', conn, if_exists='append', index=False)
        ElectionPredictionPollsUsed_df.to_sql('ElectionPredictionPollsUsed', conn, if_exists='append', index=False)
        ElectionPredictionCandidates_df.to_sql('ElectionPredictionCandidates', conn, if_exists='append', index=False)
        ElectionPredictionConstituencies_df.to_sql('ElectionPredictionConstituencies', conn, if_exists='append', index=False)
        ElectionPredictionOverall_df.to_sql('ElectionPredictionOverall', conn, if_exists='append', index=False)

    print("End:",ElectionPredictionDate)
    ElectionPredictionDate += Delta # Increment ElectionPredictionDate

End: 2024-07-03
CPU times: total: 266 ms
Wall time: 14.4 s


In [10]:
try:
    print(Overall_df)
except:
    print('No Overall_df to show')

     Party  PartyShare  Constituency
0      Lab    0.396562         411.0
1      Con    0.215925         142.0
2       LD    0.106831          53.0
3      SNP    0.029277          18.0
4       SF    0.006485           7.0
5      DUP    0.006294           6.0
6    Other    0.019951           3.0
7       PC    0.006076           3.0
8     APNI    0.005289           2.0
9     SDLP    0.003572           2.0
10  Reform    0.139072           1.0
11   Green    0.060132           1.0
12     UUP    0.003699           1.0
13     TUV    0.000835           0.0


In [11]:
# Save the candidates dataframe to a csv for testing
try:
    Candidates_df.to_csv('C:/Users/danmu/Documents/Elections/2024_Python/candidates.csv',encoding='utf-8')
except:
    print('No Candidates_df to show')

In [12]:
# Change algorithm details for the MRP algorithm
AlgorithmName = "MRPComboPollElectionAlogrithm"
AlgorithmVersion = "1_0"
AlgorithmDate = "20240703"

# Version 0_1 New combine 'core' and 'MRP' algorithms taking out the MRPs to be used in a different analysis
# Version 1_0 of the combined core and MRP model

# Construct PollAnalysisAlogrithm string
ElectionAnalysisAlgorithm = AlgorithmName + "_" + AlgorithmVersion + "_" + AlgorithmDate
PollAnalysisAlgorithm = AlgorithmName + "_" + AlgorithmVersion + "_" + AlgorithmDate

In [13]:
%%time
# Combined Election Analysis

# Setup variables for the loop
ElectionPredictionDate = StartDate
Delta = timedelta(days=1)

# Loop around all of the analysis dates
while ElectionPredictionDate <= EndDate:    
    #print("Start:",ElectionPredictionDate)

    DaysToElection = ElectionDate - ElectionPredictionDate
    if DaysToElection.days <= 35:
        NationalValidPeriod = 7
        DetailedValidPeriod = 35
    else:
        NationalValidPeriod = 30
        DetailedValidPeriod = 100

    #print("Prediction Date:",ElectionPredictionDate,"Days To Election:",DaysToElection.days,"NationalValidPeriod:",NationalValidPeriod)

    #Check if there is already an analysis for this analysis date
    ElectionPredictionDateStr = datetime.datetime.strftime(ElectionPredictionDate, '%Y%m%d')
    AnalysisExistsQuery = "SELECT Count(ElectionPredictionID) FROM ElectionPredictionMeta WHERE ElectionPredictionDate = '<ElectionPredictionDate>' AND ElectionPredictionID LIKE '%MRP%'"
    AnalysisExistsQuery = AnalysisExistsQuery.replace('<ElectionPredictionDate>',ElectionPredictionDateStr)
    AnalysisExistsList = [i[0] for i in engine.execute(AnalysisExistsQuery)]
    AnalysisExistsInt = AnalysisExistsList[0]
    #print(AnalysisExistsInt)

    if AnalysisExistsInt < 1 or DeleteAnalysesFlag == True:

        if AnalysisExistsInt >=1:
            # Delete the analysis
            DeleteAnalysisQueryType = DeleteAnalysisQueryRaw.replace('<Type>','MRP')
            DeleteAnalysisQuery = DeleteAnalysisQueryType.replace('<ElectionPredictionDate>',ElectionPredictionDateStr)
            engine.execute(DeleteAnalysisQuery)

        # Get the list of polls that have actually been analysed and incorporated into the database
        AnalysedPollsQuery = """SELECT pam.PollID, pm.Pollster, pm.PollType, pm.PollScope, pm.PollDate, rt.RegionTypeRank  FROM PollAnalysisMeta AS pam
        INNER JOIN PollMeta AS pm ON pm.PollID = pam.PollID
        INNER JOIN RegionTypes AS rt ON rt.RegionType = pm.PollType"""

        AnalysedPolls_df = pd.read_sql(AnalysedPollsQuery,conn)

        # Add prediction date and convert to datetime date so that it can be used in a calcualtion
        AnalysedPolls_df['PredictionDate'] = ElectionPredictionDate
        AnalysedPolls_df['PredictionDate'] = pd.to_datetime(AnalysedPolls_df['PredictionDate'])

        # Convert the date column to datetime type
        AnalysedPolls_df['PollDate'] = pd.to_datetime(AnalysedPolls_df['PollDate'])

        # Determine how many days from the prediction date a poll was taken
        AnalysedPolls_df['DateDelta'] = AnalysedPolls_df['PredictionDate'] - AnalysedPolls_df['PollDate']

        # Assign a rank to each poll
        AnalysedPolls_df['PollRank'] = np.where(AnalysedPolls_df['PollScope']=='All',AnalysedPolls_df['RegionTypeRank'],AnalysedPolls_df['RegionTypeRank']-1)

        # Determine the applicability of each poll based on the day delta
        AnalysedPolls_df['PollApplicability'] = np.where((AnalysedPolls_df['DateDelta'] <= pd.Timedelta(DetailedValidPeriod, unit="d")) \
                                                         & (AnalysedPolls_df['DateDelta'] >= pd.Timedelta(0, unit="d")) \
                                                         & (AnalysedPolls_df['PollRank'] <= DetailedRankThreshold),1, \
                                                         np.where((AnalysedPolls_df['DateDelta'] <= pd.Timedelta(NationalValidPeriod, unit="d")) \
                                                         & (AnalysedPolls_df['DateDelta'] >= pd.Timedelta(0, unit="d")) \
                                                         & (AnalysedPolls_df['PollRank'] > DetailedRankThreshold),1,0))

        # Copy the polls that fall within the date delta to a new data frame
        ApplicablePolls_df = AnalysedPolls_df[AnalysedPolls_df['PollApplicability'] == 1].copy()
        ApplicablePolls_df.reset_index(drop=True,inplace=True)

        # Check for duplicate polls of the same type
        # Create column that would contain the info to check for duplicates
        ApplicablePolls_df['DuplicateCheck'] = ApplicablePolls_df['Pollster'] + ApplicablePolls_df['PollType'] + ApplicablePolls_df['PollScope'] 

        # The dataframe needs to be sorted to ensure the latest poll is kept and older duplicates are removed
        ApplicablePolls_df.sort_values(by='DateDelta',inplace=True)

        # Drop duplicates
        ApplicablePolls_df.drop_duplicates(subset=['DuplicateCheck'],inplace=True)

        # Reset the dataframe index after being sorted and duplicates removed
        ApplicablePolls_df.reset_index(drop=True,inplace=True)

        # Main algorithm to create resultant candidate shares
        # Create dataframe with every candidate as rows
        SelectCandidatesQuery = "SELECT can.CandidateID, can.Constituency, can.Party, can.PreviousShare FROM Candidates AS can WHERE can.CurrentStanding = 1"
        Candidates_df = pd.read_sql(SelectCandidatesQuery,conn)

        # Descending Loop around every rank
        # start point = max rank from applicable polls df
        MaxRank = ApplicablePolls_df['PollRank'].max()
        MinRank = ApplicablePolls_df['PollRank'].min()

        # Boolean to record if this is the first run through for the running total
        FirstRun = True

        for Rank in range(MaxRank,MinRank-1,-1):

            # Create df from all polls relating to the current rank
            # First create list of applicable polls
            RankApplicablePolls_df = ApplicablePolls_df[ApplicablePolls_df['PollRank'] == Rank].copy()
            RankApplicablePolls = RankApplicablePolls_df['PollID'].tolist()

            for PollID in RankApplicablePolls:
                # print(Rank,":",PollID)        

                # Query for getting the poll analysis details
                SelectPollDetailsQuery = """SELECT pac.CandidateID, pac.VoteShare AS '<PollID>' FROM PollAnalysisConstituencies AS pac
                INNER JOIN PollAnalysisRegions AS par on par.PollAnalysisRegionID = pac.PollAnalysisRegionID
                INNER JOIN PollAnalysisMeta AS pam ON pam.PollAnalysisID = par.PollAnalysisID
                WHERE pam.PollID = '<PollID>'
                ORDER BY pac.CandidateID"""

                # Insert the PollID into the query
                SelectPollDetailsQuery = SelectPollDetailsQuery.replace("<PollID>",PollID)

                # Pull query into a data frame for the results
                PollAnalysis_df = pd.read_sql(SelectPollDetailsQuery,conn)

                # Merge with Candidates_df
                # PreviousRegionShares_df = PreviousRegionShares_df.merge(PollRegionShares_df[['PollDetailsID','RegionParty','PollShare']], how='left', on='RegionParty')
                Candidates_df = Candidates_df.merge(PollAnalysis_df, how='left', on='CandidateID')

            # Average the columns for each row (candidate) for this rank
            if RankApplicablePolls != []:
                if FirstRun == True:
                    Candidates_df[Rank] = Candidates_df[RankApplicablePolls].mean(axis=1)
                    # Set the first run to false now that the loop has been gone though
                    FirstRun = False
                else:
                    RankApplicablePolls.append(PreviousRank)
                    Candidates_df[Rank] = Candidates_df[RankApplicablePolls].mean(axis=1)

                # Record the last rank to be evaluated        
                PreviousRank = Rank

        Candidates_df['VoteShareRaw'] = Candidates_df[Rank].copy()

        # Determine the factor needed to ensure vote shares for each constituency sum to 1
        Candidates_df['ConstRawShareTotals'] = Candidates_df['VoteShareRaw'].groupby(Candidates_df['Constituency']).transform('sum')

        # Modify the raw vote shares to ensure they sum to 1
        Candidates_df['VoteShare'] = Candidates_df['VoteShareRaw']/Candidates_df['ConstRawShareTotals']

        Candidates_df['VoteShareCheck'] = Candidates_df['VoteShare'].groupby(Candidates_df['Constituency']).transform('sum')

        Candidates_df['VoteShareChange'] = Candidates_df['VoteShare'] - Candidates_df['PreviousShare']

        # Determine the constituency winners
        # https://stackoverflow.com/questions/15705630/get-the-rows-which-have-the-max-value-in-groups-using-groupby
        #df.sort_values('count').drop_duplicates(['Sp', 'Mt'], keep='last')
        ConstWinners_df = Candidates_df[['Constituency','Party','VoteShare','VoteShareChange']].copy()
        ConstWinners_df = ConstWinners_df.sort_values(['Constituency','VoteShare'],ascending = [True, False])
        ConstWinners_df['Majority'] = ConstWinners_df['VoteShare'] - ConstWinners_df['VoteShare'].shift(-1)
        ConstWinners_df['SecondParty'] = ConstWinners_df['Party'].shift(-1)

        # Calculate the swing for each winner
        ConstWinners_df['Swing'] = (ConstWinners_df['VoteShareChange'] - ConstWinners_df['VoteShareChange'].shift(-1))/2

        # Keep only the winners of each constituency
        ConstWinners_df = ConstWinners_df.sort_values('VoteShare').drop_duplicates(['Constituency'], keep='last')
        ConstWinners_df = ConstWinners_df.sort_values('Constituency')

        ConstWinners_df.reset_index(drop=True,inplace=True)

        # Determine how many constituencies each party has won
        OverallResults_df = ConstWinners_df[['Party','Constituency']].groupby('Party').count()
        OverallResults_df.reset_index(drop=False,inplace=True)

        # Determine total vote shares for each party
        OverallVoteShare_df = Candidates_df[['Party','VoteShare']].copy()
        OverallVoteShare_df['PartyShare'] = OverallVoteShare_df['VoteShare'].groupby(OverallVoteShare_df['Party']).transform('sum')
        OverallVoteShare_df = OverallVoteShare_df.drop_duplicates(['Party'], keep='last')
        OverallVoteShare_df.drop(['VoteShare'], axis=1, inplace=True)
        OverallVoteShare_df.sort_values(by='PartyShare', ascending=False, inplace=True)
        OverallVoteShare_df['PartyShare'] = OverallVoteShare_df['PartyShare']/OverallVoteShare_df['PartyShare'].sum()
        OverallVoteShare_df.reset_index(drop=True,inplace=True)

        # Combine the overall vote shares and constituency numbers into one dataframe
        Overall_df = OverallVoteShare_df.merge(OverallResults_df, how='left', on='Party')
        Overall_df = Overall_df.fillna(0)
        Overall_df.sort_values(by='Constituency', ascending=False, inplace=True)
        Overall_df.reset_index(drop=True,inplace=True)

        # Election prediction analysis date is today
        ElectionAnalysisDate = datetime.date.today()

        # Create the election prediction ID for the database
        ElectionPredictionID = datetime.datetime.strftime(ElectionAnalysisDate, '%Y%m%d') + datetime.datetime.strftime(ElectionPredictionDate, '%Y%m%d') + ElectionAnalysisAlgorithm

        # Create the ElectionPredictionMeta table content for this analysis
        ElectionPredictionMeta_df = pd.DataFrame(columns=["ElectionAnalysisDate","ElectionPredictionDate","ElectionAnalysisAlgorithm"])
        ElectionPredictionMeta_df.at[0,"ElectionAnalysisDate"] = ElectionAnalysisDate
        ElectionPredictionMeta_df.at[0,"ElectionPredictionDate"] = ElectionPredictionDate
        ElectionPredictionMeta_df.at[0,"ElectionAnalysisAlgorithm"] = ElectionAnalysisAlgorithm

        # Create the ElectionPredictionPolls used content
        ElectionPredictionPollsUsed_df = pd.DataFrame(columns=['ElectionPredictionID','PollID'])
        ElectionPredictionPollsUsed_df['PollID'] = ApplicablePolls_df['PollID'].copy()
        ElectionPredictionPollsUsed_df['ElectionPredictionID'] = ElectionPredictionID

        # Create ElectionPredictionCandidates data
        ElectionPredictionCandidates_df = pd.DataFrame(columns=['ElectionPredictionID','CandidateID','VoteShare'])
        ElectionPredictionCandidates_df[['CandidateID','VoteShare']] = Candidates_df[['CandidateID','VoteShare']].copy()
        ElectionPredictionCandidates_df['ElectionPredictionID'] = ElectionPredictionID

        # Create ElectionPredictionConstituencies data
        ElectionPredictionConstituencies_df = pd.DataFrame(columns=['ElectionPredictionID','Constituency','WinningParty','SecondParty','VoteShare','Majority','GAIN','LOSS','Swing'])
        ElectionPredictionConstituencies_df[['Constituency','WinningParty','SecondParty','VoteShare','Majority','Swing']] = ConstWinners_df[['Constituency','Party','SecondParty','VoteShare','Majority','Swing']].copy()

        PreviousWinnersQuery = "SELECT ConstituencyName AS 'Constituency', FirstParty AS 'PreviousWinner' FROM Constituencies"
        PreviousWinners_df = pd.read_sql(PreviousWinnersQuery,conn)
        ElectionPredictionConstituencies_df = ElectionPredictionConstituencies_df.merge(PreviousWinners_df, how='left', on='Constituency')
        ElectionPredictionConstituencies_df['GAIN'] = np.where(ElectionPredictionConstituencies_df['WinningParty'] == ElectionPredictionConstituencies_df['PreviousWinner'],ElectionPredictionConstituencies_df['WinningParty']+" HOLD",ElectionPredictionConstituencies_df['WinningParty']+" GAIN")
        ElectionPredictionConstituencies_df['LOSS'] = np.where(ElectionPredictionConstituencies_df['WinningParty'] == ElectionPredictionConstituencies_df['PreviousWinner'],ElectionPredictionConstituencies_df['PreviousWinner']+" HOLD",ElectionPredictionConstituencies_df['PreviousWinner']+" LOSS")
        ElectionPredictionConstituencies_df['ElectionPredictionID'] = ElectionPredictionID

        # Create ElectionPredictionOverall data
        ElectionPredictionOverall_df = pd.DataFrame(columns=['ElectionPredictionID','Party','VoteShare','Constituencies'])
        ElectionPredictionOverall_df[['Party','VoteShare','Constituencies']] = Overall_df[['Party','PartyShare','Constituency']].copy()
        ElectionPredictionOverall_df['Constituencies'] = ElectionPredictionOverall_df['Constituencies'].astype('int')
        ElectionPredictionOverall_df['ElectionPredictionID'] = ElectionPredictionID

        # Insert data into the database
        ElectionPredictionMeta_df.to_sql('ElectionPredictionMeta', conn, if_exists='append', index=False)
        ElectionPredictionPollsUsed_df.to_sql('ElectionPredictionPollsUsed', conn, if_exists='append', index=False)
        ElectionPredictionCandidates_df.to_sql('ElectionPredictionCandidates', conn, if_exists='append', index=False)
        ElectionPredictionConstituencies_df.to_sql('ElectionPredictionConstituencies', conn, if_exists='append', index=False)
        ElectionPredictionOverall_df.to_sql('ElectionPredictionOverall', conn, if_exists='append', index=False)

    print("End:",ElectionPredictionDate)
    ElectionPredictionDate += Delta # Increment ElectionPredictionDate

End: 2024-07-03
CPU times: total: 328 ms
Wall time: 15.6 s


In [14]:
try:
    print(Overall_df)
except:
    print('No Overall_df to show')

     Party  PartyShare  Constituency
0      Lab    0.398644         453.0
1      Con    0.218897          91.0
2       LD    0.107911          63.0
3      SNP    0.027633          16.0
4       SF    0.006485           7.0
5      DUP    0.006294           6.0
6    Green    0.056056           3.0
7       PC    0.006177           3.0
8   Reform    0.142163           2.0
9     APNI    0.005289           2.0
10    SDLP    0.003572           2.0
11   Other    0.016346           1.0
12     UUP    0.003699           1.0
13     TUV    0.000835           0.0


In [15]:
#Close the connection with the database
conn.close()