#### Import & Load Proprocessed/Filtered Data of 10 Constituencies

In [3]:
import pandas as pd

df = pd.read_csv("voters_turnout_10_constituencies.csv")

# ensure YEAR is treated as a category/string
df['YEAR'] = df['YEAR'].astype(str)

gender_cols = [
    "VOTER_TURNOUT_MALE_PERCENT",
    "VOTER_TURNOUT_FEMALE_PERCENT"
]


#### Aggregate Data

In [4]:
# 1) Aggregate turnout over time
agg_over_time = df.groupby("YEAR")["VOTER_TURN_OUT_PERCENT"].mean().reset_index()

# 2) Aggregate turnout across genders
agg_gender_over_time = df.groupby("YEAR")[gender_cols].mean().reset_index()

# 3) Distribution across constituencies & time
dist_const_time = df.groupby(["PC_NAME", "YEAR"])["VOTER_TURN_OUT_PERCENT"].mean().reset_index()

# 4) Distribution across constituencies & genders
dist_const_gender = df.groupby("PC_NAME")[gender_cols].mean().reset_index()


### Bokeh Setup

#### Installing required packages

In [5]:
!pip install bokeh

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: C:\Users\vaibh\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


#### Importing required packages

In [7]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.transform import factor_cmap

output_notebook()


### ▶ Change in Voter Turnout Over Time (Aggregate)

This visualization shows how the average voter turnout percentage changed
over election years across all 10 constituencies. The values were aggregated
using the mean turnout for each year. This helps understand whether overall
participation increased, decreased, or remained stable over time.

In [8]:

source = ColumnDataSource(agg_over_time)

p1 = figure(
    x_range=agg_over_time["YEAR"],
    title="Change in Voter Turnout Over Time (Aggregate)",
    height=350,
    width=600
)

p1.line(x="YEAR", y="VOTER_TURN_OUT_PERCENT", source=source, line_width=3)
p1.circle(x="YEAR", y="VOTER_TURN_OUT_PERCENT", source=source, size=8)

p1.xaxis.axis_label = "Year"
p1.yaxis.axis_label = "Turnout %"

show(p1)




### ▶ Change in Voter Turnout Across Genders (Aggregate)

This grouped bar chart compares male and female voter turnout across different
years. The data was aggregated by computing the average gender-wise turnout
for all constituencies. This visualization helps identify gender participation
gaps and trends over time.




In [13]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.transform import factor_cmap

# Data
years = agg_gender_over_time["YEAR"].astype(str).tolist()
genders = ["Male", "Female"]

# Create x-axis factors
x = [(year, gender) for year in years for gender in genders]

# Flatten values in consistent order
male_vals = agg_gender_over_time["VOTER_TURNOUT_MALE_PERCENT"].tolist()
female_vals = agg_gender_over_time["VOTER_TURNOUT_FEMALE_PERCENT"].tolist()
counts = sum(zip(male_vals, female_vals), ())

source = ColumnDataSource(data=dict(
    x=x,
    counts=counts,
    gender=[g for _ in years for g in genders]
))

# Colors for male & female
colors = ["blue", "orange"]

p = figure(
    x_range=FactorRange(*x),
    height=400,
    width=700,
    title="Change in Voter Turnout Across Genders (Aggregate)"
)

p.vbar(
    x='x',
    top='counts',
    width=0.8,
    fill_color=factor_cmap('x', palette=colors, factors=genders, start=1),
    source=source,
    legend_field="gender"
)

p.xaxis.major_label_orientation = 1
p.xaxis.axis_label = "Year & Gender"
p.yaxis.axis_label = "Turnout %"
p.legend.location = "top_left"

show(p)


### ▶ Distribution of Voter Turnout Across Constituencies and Time

This visualization displays how turnout varies between constituencies and
across different election years. Each point represents the average turnout
of one constituency in a particular year. It highlights variability,
outliers, and consistency across regions over time.


In [10]:
source = ColumnDataSource(dist_const_time)

p3 = figure(
    x_range=sorted(dist_const_time["PC_NAME"].unique()),
    title="Distribution Across Constituencies & Time",
    height=400,
    width=800
)

p3.scatter(
    x="PC_NAME",
    y="VOTER_TURN_OUT_PERCENT",
    source=source,
    size=8,
    color=factor_cmap("YEAR", "Category10_10", dist_const_time["YEAR"].unique())
)

p3.xaxis.major_label_orientation = 1.0
p3.yaxis.axis_label = "Turnout %"

show(p3)


### ▶ Distribution of Voter Turnout Across Constituencies and Genders

This grouped bar chart shows average male and female voter turnout for each
constituency. The aggregation was performed at constituency level. This helps
identify constituencies with higher gender turnout gaps and provides a
comparative participation profile across regions.


In [11]:
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap
from bokeh.palettes import Category10

# Prepare data
constituencies = dist_const_gender["PC_NAME"].tolist()
genders = ["Male", "Female"]

x = [(pc, gender) for pc in constituencies for gender in genders]

counts = sum(zip(
    dist_const_gender["VOTER_TURNOUT_MALE_PERCENT"],
    dist_const_gender["VOTER_TURNOUT_FEMALE_PERCENT"]
), ())

source = ColumnDataSource(data=dict(
    x=x,
    counts=counts,
    gender=[g for _, g in x]   # extract gender for coloring + legend
))

# Create figure
p4 = figure(
    x_range=FactorRange(*x),
    title="Distribution Across Constituencies & Genders",
    height=400,
    width=900,
    toolbar_location=None
)

# Colored grouped bars with legend
p4.vbar(
    x='x',
    top='counts',
    width=0.8,
    source=source,
    fill_color=factor_cmap(
        'gender',
        palette=Category10[3],
        factors=genders
    ),
    legend_field='gender'
)

#  Proper legend usage (no .leg attribute)
p4.legend.title = "Gender"
p4.legend.location = "top_left"
p4.legend.orientation = "horizontal"

# Axis formatting
p4.xaxis.major_label_orientation = 1.0
p4.yaxis.axis_label = "Turnout %"

show(p4)
