In [18]:
# Import required modules
import os
import numpy as np
import pandas as pd
import altair as alt
import streamlit as st
import plotly.express as px
import matplotlib.pyplot as plt

In [19]:
# Page setting
st.set_page_config(layout="wide", page_title="Elections 2022 Analysis", page_icon="🗳️")

with open("style.css") as f:
    st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)

In [20]:
# Select Data to view: National, County or Diaspora
st.sidebar.image("data/LibranTechie.png", width=300)
st.sidebar.title("Select the type of data you want to retrieve")
st.sidebar.subheader("Instructions:")
st.sidebar.markdown(
    """
    - Select the data to view using the select box below.
    - For National Results View the data on the main panel.
    - For County & Diaspora votes: Select the county from the dropdown list in the main panel.
        """
)
selection = st.sidebar.selectbox(
    "Select the type of data you want to retrieve", ["National", "County/Diaspora"]
)


In [21]:
# Load data
def load_data():
    df = pd.read_csv("data/elections_2022.csv")
    return df

In [22]:
data_df = load_data()
data_df.head()

Unnamed: 0,County Name,Registered Voters,Raila Odinga,William Ruto,Waihiga Mwaure,George Wajackoyah,Total Valid Votes,Rejected Ballots,Total Voters,Percentage Turnout
0,MOMBASA,642362,161015,113700,482,2104,277301,3812,281113,43.762396
1,KWALE,328316,125541,51918,413,1137,179009,1359,180368,54.937316
2,KILIFI,588842,204536,77331,1067,2552,285496,3191,288687,49.026224
3,TANA RIVER,141110,51390,41505,198,412,93505,1068,94573,67.020764
4,LAMU,81468,26160,22876,186,848,50070,887,50957,62.548485


In [23]:
def calculate_national_vote():
    data_df = load_data()
    total_df = data_df.sum(skipna=True)
    total_df = total_df.to_frame().reset_index()
    total_df.drop([0, 1, 6, 7, 8, 9], axis=0, inplace=True)
    total_df.reset_index(drop=True, inplace=True)
    total_df.rename(columns={"index": "Candidate", 0: "Votes"}, inplace=True)
    total_df["Votes"] = total_df["Votes"].astype(int)
    total_df["Percentage"] = (total_df["Votes"] / total_df["Votes"].sum() * 100).round(
        2
    )
    total_df.sort_values(by="Votes", ascending=False, inplace=True)
    return total_df

In [24]:
def calculate_county_vote(county_name):
    data_df = load_data()
    county_df = data_df.set_index("County Name")
    county_df = county_df.loc[county_name]
    county_df = county_df.to_frame().reset_index()
    county_df.drop([0, 5, 6, 7, 8], axis=0, inplace=True)
    county_df.reset_index(drop=True, inplace=True)
    county_df.rename(columns={"index": "Candidate"}, inplace=True)
    county_df["Percentage"] = (county_df[county_name] / county_df[county_name].sum() * 100).round(
        2
    )
    county_df.sort_values(by=county_name, ascending=False, inplace=True)
    return county_df

In [25]:
def calculate_county_voter_data(county_name):
    data_df = load_data()
    county_df = data_df.set_index("County Name")
    county_df = county_df.loc[county_name]
    county_df = county_df.to_frame().reset_index()
    county_df.drop([1, 2, 3, 4, 7, 8], axis=0, inplace=True)
    county_df.reset_index(drop=True, inplace=True)
    county_df.rename(columns={"index": "Metric"}, inplace=True)
    return county_df

In [26]:
national_df = calculate_national_vote()
fig = px.bar(national_df, x="Candidate", y="Percentage", color="Candidate")
fig.update_layout()

In [27]:
county_presidential_data = calculate_county_vote("UASIN GISHU")
fig = px.bar(county_presidential_data, x="Percentage", y="Candidate")
fig.update_layout()

In [28]:
county_voting_df = calculate_county_voter_data("UASIN GISHU")
turn_out = (county_voting_df.iloc[1]["UASIN GISHU"] + county_voting_df.iloc[2]["UASIN GISHU"]) / county_voting_df.iloc[0]["UASIN GISHU"] * 100
county_voting_df.head()
turn_out


69.51099805062836

In [29]:
# Corelation between Voter Turnout and Presidential Votes
# Best Voter Turnout - Ruto Won 4/5
turn_out_df = data_df
turn_out_df.drop(['Waihiga Mwaure', 'George Wajackoyah'], axis=1, inplace=True)
turn_out_df.sort_values(by="Percentage Turnout", ascending=False, inplace=True)
turn_out_df['Winner'] = turn_out_df[["Raila Odinga", "William Ruto"]].max(axis=1)
turn_out_df['Winner'] = np.where(turn_out_df['Winner'] == turn_out_df['Raila Odinga'], 'Raila Odinga', 'William Ruto')
turn_out_df.head(10)

Unnamed: 0,County Name,Registered Voters,Raila Odinga,William Ruto,Total Valid Votes,Rejected Ballots,Total Voters,Percentage Turnout,Winner
35,BOMET,377023,13383,285428,299606,1545,301151,79.876029,William Ruto
23,WEST POKOT,220042,63092,109944,173705,1258,174963,79.513457,William Ruto
34,KERICHO,428126,15053,318861,334516,1821,336337,78.560284,William Ruto
27,ELGEYO/MARAKWET,213904,4893,160033,165224,1537,166761,77.960674,William Ruto
32,NAROK,398852,159455,148310,308432,1597,310029,77.730336,Raila Odinga
29,BARINGO,281107,41227,175170,217091,1014,218105,77.587894,William Ruto
28,NANDI,406393,26034,280813,307575,1492,309067,76.051261,William Ruto
43,MIGORI,469053,294136,52525,347773,1611,349384,74.487105,Raila Odinga
42,HOMA BAY,551111,399784,3497,404112,2045,406157,73.697858,Raila Odinga
41,KISUMU,607496,419997,10011,431005,2572,433577,71.37117,Raila Odinga


In [30]:
# Worst Voter Turnout - Raila Won all of them
turn_out_df.sort_values(by="Percentage Turnout", ascending=True, inplace=True)
turn_out_df['Winner'] = turn_out_df[["Raila Odinga", "William Ruto"]].max(axis=1)
turn_out_df['Winner'] = np.where(turn_out_df['Winner'] == turn_out_df['Raila Odinga'], 'Raila Odinga', 'William Ruto')
turn_out_df.head(10)

Unnamed: 0,County Name,Registered Voters,Raila Odinga,William Ruto,Total Valid Votes,Rejected Ballots,Total Voters,Percentage Turnout,Winner
0,MOMBASA,642362,161015,113700,277301,3812,281113,43.762396,Raila Odinga
2,KILIFI,588842,204536,77331,285496,3191,288687,49.026224,Raila Odinga
6,GARISSA,201513,81376,28111,109960,590,110550,54.859984,Raila Odinga
1,KWALE,328316,125541,51918,179009,1359,180368,54.937316,Raila Odinga
46,NAIROBI,2416551,767395,561775,1339367,12869,1352236,55.957271,Raila Odinga
47,DIASPORA,10443,3727,2190,5988,44,6032,57.76118,Raila Odinga
37,VIHIGA,310063,114714,67633,184333,2115,186448,60.132296,Raila Odinga
15,MACHAKOS,687691,304809,101456,410238,3759,413997,60.201021,Raila Odinga
36,KAKAMEGA,844709,357857,141166,503719,5562,509281,60.290704,Raila Odinga
22,TURKANA,238554,96117,46696,143532,1099,144631,60.628202,Raila Odinga


In [31]:
fig = px.scatter(turn_out_df, y="Percentage Turnout", x="County Name", color="Winner", size="Percentage Turnout", hover_name="County Name", size_max=60)
fig.update_layout()

In [32]:

raila_mean = turn_out_df.loc[turn_out_df['Winner'] == 'Raila Odinga', 'Percentage Turnout'].mean()
raila_mean



63.041909711428566

In [33]:
ruto_mean = turn_out_df.loc[turn_out_df['Winner'] == 'William Ruto', 'Percentage Turnout'].mean()
ruto_mean

70.48692877900001