# Submission Details:
### Author: Himanshu Soni
### Roll Number: 2025EM1100506

# Voter Turnout Dashboard

Interactive dashboard showing voter turnout data for 10 selected constituencies across three General Elections (2014, 2019, 2024).


## 1. Setup and Data Loading


In [1]:
import sys
import os
import importlib
sys.path.append('src')

import pandas as pd
import numpy as np
from bokeh.io import output_notebook, show
from bokeh.layouts import row, column, gridplot, grid
from bokeh.models import Div, Paragraph

# Import modules
import data_loader
import data_processor
import create_dataset
import visualizations
import interactivity

# Reload modules to pick up any changes (useful during development)
importlib.reload(data_loader)
importlib.reload(data_processor)
importlib.reload(create_dataset)
importlib.reload(visualizations)
importlib.reload(interactivity)

# Import functions
from data_loader import load_all_data
from data_processor import create_standardized_dataset, aggregate_data, find_common_constituencies
from create_dataset import select_constituencies
from visualizations import (
    create_visualization_a,
    create_visualization_b,
    create_visualization_c,
    create_visualization_d,
    create_treemap
)
from interactivity import create_constituency_selector, create_year_filter, filter_data_by_selection

# Enable Bokeh in notebook
output_notebook()


In [2]:
# Load or create dataset
dataset_path = 'data/voter_turnout_dataset.csv'

if os.path.exists(dataset_path):
    print("Loading existing dataset...")
    dataset = pd.read_csv(dataset_path)
else:
    print("Creating new dataset...")
    # Load all data
    data = load_all_data()

    # Find common constituencies
    common = find_common_constituencies(data, min_years=3)

    # Select 10 constituencies
    selected = select_constituencies(common, n=10)

    # Create standardized dataset
    dataset = create_standardized_dataset(data, selected)

    # Save dataset
    dataset.to_csv(dataset_path, index=False)
    dataset.to_excel('data/voter_turnout_dataset.xlsx', index=False)
    print("Dataset created and saved.")

print(f"\nDataset loaded: {len(dataset)} rows")
print(f"Constituencies: {dataset['PC_NAME'].nunique()}")
print(f"Years: {sorted(dataset['Year'].unique())}")


Loading existing dataset...

Dataset loaded: 30 rows
Constituencies: 10
Years: [np.int64(2014), np.int64(2019), np.int64(2024)]


## 2. Create Aggregated Data


In [3]:
# Aggregate data by year for visualizations A and B
aggregated = aggregate_data(dataset, group_by=['Year'])
print("Aggregated data by year:")
print(aggregated[['Year', 'Overall_Turnout', 'Male_Turnout', 'Female_Turnout']])


Aggregated data by year:
   Year  Overall_Turnout  Male_Turnout  Female_Turnout
0  2014            71.27         73.05           71.93
1  2019            72.68         73.63           73.38
2  2024            72.76         72.06           71.85


## 3. Create Visualizations


In [4]:
# Visualization A: Change in voter turnout over time
viz_a = create_visualization_a(aggregated)

# Visualization B: Change in voter turnout across genders
viz_b = create_visualization_b(aggregated)

# Visualization C: Distribution across constituencies and time
viz_c = create_visualization_c(dataset)

# Visualization D: Distribution across constituencies and genders
viz_d = create_visualization_d(dataset)

# Treemap: Constituencies sized by total votes
viz_treemap = create_treemap(dataset)

print("All visualizations created.")


All visualizations created.


## 4. Create Interactive Dashboard


In [5]:
# Get unique constituencies and years
constituencies = sorted(dataset['PC_NAME'].unique().tolist())
years = sorted(dataset['Year'].unique().tolist())

# Pre-compute all data for filtering
print("Pre-computing data for all visualizations...")

# Create a master data source with all constituency-year combinations
all_aggregated_data = {}
for const in constituencies:
    const_data = dataset[dataset['PC_NAME'] == const]
    for year in years:
        year_data = const_data[const_data['Year'] == year]
        if len(year_data) > 0:
            key = f"{const}_{year}"
            all_aggregated_data[key] = {
                'Constituency': const,
                'Year': int(year),
                'Overall_Turnout': float(year_data['Overall_Turnout'].iloc[0]),
                'Male_Turnout': float(year_data['Male_Turnout'].iloc[0]),
                'Female_Turnout': float(year_data['Female_Turnout'].iloc[0]),
                'Total_Votes': float(year_data['Total_Votes'].iloc[0]) if 'Total_Votes' in year_data.columns else 0.0
            }

# Prepare data for Viz C (constituency x year matrix)
viz_c_data = {}
for year in years:
    year_str = str(year)
    viz_c_data[year_str] = {}
    for const in constituencies:
        key = f"{const}_{year}"
        if key in all_aggregated_data:
            viz_c_data[year_str][const] = all_aggregated_data[key]['Overall_Turnout']
        else:
            viz_c_data[year_str][const] = 0.0

# Prepare data for Viz D (constituency averages)
viz_d_data = {}
for const in constituencies:
    const_data = dataset[dataset['PC_NAME'] == const]
    viz_d_data[const] = {
        'Male': float(const_data['Male_Turnout'].mean()),
        'Female': float(const_data['Female_Turnout'].mean())
    }

# Prepare data for treemap (constituency totals)
treemap_data = {}
for const in constituencies:
    const_data = dataset[dataset['PC_NAME'] == const]
    treemap_data[const] = float(const_data['Total_Votes'].sum()) if 'Total_Votes' in const_data.columns else 0.0

# Store references to data sources from visualizations
viz_a_source = viz_a.renderers[0].data_source
viz_b_source = viz_b.renderers[0].data_source
viz_c_sources = [renderer.data_source for renderer in viz_c.renderers]
viz_d_source = viz_d.renderers[0].data_source
viz_treemap_source = viz_treemap.renderers[0].data_source

# Get the original treemap data structure for reference
from bokeh.models import ColumnDataSource
import json

# Store original treemap data structure
treemap_original_data = {
    'PC_NAME': list(viz_treemap_source.data['PC_NAME']),
    'x': list(viz_treemap_source.data['x']),
    'y': list(viz_treemap_source.data['y']),
    'dx': list(viz_treemap_source.data['dx']),
    'dy': list(viz_treemap_source.data['dy']),
    'color': list(viz_treemap_source.data['color']),
    'Total_Votes': list(viz_treemap_source.data['Total_Votes']),
    'votes_formatted': list(viz_treemap_source.data['votes_formatted']),
    'ytop': list(viz_treemap_source.data['ytop'])
}

# Create widgets
constituency_selector, constituency_container = create_constituency_selector(constituencies, default_selection=constituencies)
year_filter, year_filter_container = create_year_filter(years, default_selection=list(range(len(years))))

print(f"Constituencies: {len(constituencies)}")
print(f"Years: {years}")
print("Data sources and widgets ready")


Pre-computing data for all visualizations...
Constituencies: 10
Years: [2014, 2019, 2024]
Data sources and widgets ready


In [6]:
# Set up JavaScript callbacks for filtering
from bokeh.models import CustomJS
import json

# Create a shared callback function that updates all visualizations
def create_filter_callback():
    """Create a callback that filters all visualizations based on selections."""
    return CustomJS(args={
        'source_a': viz_a_source,
        'source_b': viz_b_source,
        'source_c': viz_c_sources,
        'source_d': viz_d_source,
        'source_treemap': viz_treemap_source,
        'all_data': all_aggregated_data,
        'viz_c_data': viz_c_data,
        'viz_d_data': viz_d_data,
        'treemap_data': treemap_data,
        'treemap_original': treemap_original_data,
        'years': years,
        'constituencies': constituencies,
        'constituency_selector': constituency_selector,
        'year_filter': year_filter
    }, code="""
        // Get current selections
        const selected_constituencies = constituency_selector.active.map(i => constituencies[i]);
        const selected_years = year_filter.active.map(i => years[i]);

        // Filter data based on selections
        const filtered_data = {};
        for (const key in all_data) {
            const item = all_data[key];
            if (selected_constituencies.includes(item.Constituency) &&
                selected_years.includes(item.Year)) {
                filtered_data[key] = item;
            }
        }

        // ===== Update Viz A (Overall Turnout Over Time) =====
        const by_year = {};
        for (const key in filtered_data) {
            const item = filtered_data[key];
            const year = item.Year.toString();
            if (!by_year[year]) {
                by_year[year] = {Year: year, Turnout: 0, Male: 0, Female: 0, count: 0};
            }
            by_year[year].Turnout += item.Overall_Turnout;
            by_year[year].Male += item.Male_Turnout;
            by_year[year].Female += item.Female_Turnout;
            by_year[year].count += 1;
        }

        const years_list = Object.keys(by_year).sort();
        const turnout_list = years_list.map(y => by_year[y].count > 0 ? by_year[y].Turnout / by_year[y].count : 0);
        source_a.data = {
            'Year': years_list,
            'Turnout': turnout_list,
            'Year_num': years_list.map(y => parseInt(y))
        };
        source_a.change.emit();

        // ===== Update Viz B (Gender Comparison Over Time) =====
        const male_list = years_list.map(y => by_year[y].count > 0 ? by_year[y].Male / by_year[y].count : 0);
        const female_list = years_list.map(y => by_year[y].count > 0 ? by_year[y].Female / by_year[y].count : 0);
        source_b.data = {
            'Year': years_list,
            'Male': male_list,
            'Female': female_list,
            'Year_num': years_list.map(y => parseInt(y))
        };
        source_b.change.emit();

        // ===== Update Viz C (Constituency x Year) =====
        // Update each year's data source
        for (let i = 0; i < source_c.length; i++) {
            const year = years[i].toString();
            if (selected_years.includes(years[i])) {
                const const_list = [];
                const turnout_list = [];
                for (const const_name of selected_constituencies) {
                    const_list.push(const_name);
                    const key = const_name + '_' + years[i];
                    if (key in all_data && all_data[key].Year === years[i]) {
                        turnout_list.push(all_data[key].Overall_Turnout);
                    } else if (year in viz_c_data && const_name in viz_c_data[year]) {
                        turnout_list.push(viz_c_data[year][const_name]);
                    } else {
                        turnout_list.push(0);
                    }
                }
                source_c[i].data = {
                    'Constituency': const_list,
                    'Turnout': turnout_list
                };
                source_c[i].change.emit();
            } else {
                // Hide data for unselected years
                source_c[i].data = {
                    'Constituency': [],
                    'Turnout': []
                };
                source_c[i].change.emit();
            }
        }

        // ===== Update Viz D (Constituency x Gender) =====
        // Calculate averages for selected constituencies and years
        const const_male = {};
        const const_female = {};
        const const_counts = {};

        for (const key in filtered_data) {
            const item = filtered_data[key];
            const const_name = item.Constituency;
            if (!const_male[const_name]) {
                const_male[const_name] = 0;
                const_female[const_name] = 0;
                const_counts[const_name] = 0;
            }
            const_male[const_name] += item.Male_Turnout;
            const_female[const_name] += item.Female_Turnout;
            const_counts[const_name] += 1;
        }

        const const_list = [];
        const male_list_d = [];
        const female_list_d = [];
        for (const const_name of selected_constituencies) {
            const_list.push(const_name);
            if (const_counts[const_name] > 0) {
                male_list_d.push(const_male[const_name] / const_counts[const_name]);
                female_list_d.push(const_female[const_name] / const_counts[const_name]);
            } else {
                // Fallback to original data if no filtered data
                male_list_d.push(viz_d_data[const_name] ? viz_d_data[const_name].Male : 0);
                female_list_d.push(viz_d_data[const_name] ? viz_d_data[const_name].Female : 0);
            }
        }

        source_d.data = {
            'Constituency': const_list,
            'Male': male_list_d,
            'Female': female_list_d
        };
        source_d.change.emit();

                // ===== Update Treemap =====
        // Filter treemap to show only selected constituencies
        // Keep original layout structure but filter by constituency
        const treemap_const_list = [];
        const treemap_votes_list = [];
        const treemap_x_list = [];
        const treemap_y_list = [];
        const treemap_dx_list = [];
        const treemap_dy_list = [];
        const treemap_color_list = [];
        const treemap_formatted_list = [];
        const treemap_ytop_list = [];

        // Filter original treemap data for selected constituencies
        for (let i = 0; i < treemap_original.PC_NAME.length; i++) {
            const const_name = treemap_original.PC_NAME[i];
            if (selected_constituencies.includes(const_name)) {
                treemap_const_list.push(const_name);
                treemap_votes_list.push(treemap_original.Total_Votes[i]);
                treemap_x_list.push(treemap_original.x[i]);
                treemap_y_list.push(treemap_original.y[i]);
                treemap_dx_list.push(treemap_original.dx[i]);
                treemap_dy_list.push(treemap_original.dy[i]);
                treemap_color_list.push(treemap_original.color[i]);
                treemap_formatted_list.push(treemap_original.votes_formatted[i]);
                treemap_ytop_list.push(treemap_original.ytop[i]);
            }
        }

        source_treemap.data = {
            'PC_NAME': treemap_const_list,
            'Total_Votes': treemap_votes_list,
            'x': treemap_x_list,
            'y': treemap_y_list,
            'dx': treemap_dx_list,
            'dy': treemap_dy_list,
            'color': treemap_color_list,
            'votes_formatted': treemap_formatted_list,
            'ytop': treemap_ytop_list
        };
        source_treemap.change.emit();
    """)

# Create callbacks
constituency_callback = create_filter_callback()
year_callback = create_filter_callback()

# Attach callbacks to widgets
constituency_selector.js_on_change('active', constituency_callback)
year_filter.js_on_change('active', year_callback)

# Create simple filters panel
filters_panel = column(
    constituency_container,
    year_filter_container,
    width=250,
    sizing_mode='scale_width'
)

# Create dashboard layout using grid()
# Grid structure:
# Row 1: Filters (left) | Time series charts (right)
# Row 2: Filters (empty) | Distribution charts (right)
# Row 3: Filters (empty) | Treemap (right)

dashboard_title = Paragraph(
    text="Voter Turnout Dashboard - Interactive analysis of voter turnout across 10 constituencies (2014, 2019, 2024)",
    styles={
        'font_size': '22pt',
        'font_weight': 'bold',
    },
    width=1200
)

# Use grid layout for clean structure
# All rows must have same number of columns (2 columns)
content_grid = grid([
    [filters_panel, row(viz_a, viz_b, sizing_mode='scale_width')],  # Row 1: Filters + time series charts
    [None, viz_c],  # Row 2: Empty left, distribution chart C
    [None, viz_d],  # Row 3: Empty left, distribution chart D
    [None, viz_treemap],  # Row 4: Empty left, treemap
],
    sizing_mode='scale_width'
)

# Wrap title and grid in a column so title spans full width
dashboard_layout = column(
    dashboard_title,
    content_grid,
    sizing_mode='scale_width',
    width=1200
)

show(dashboard_layout)


## 5. Dataset Summary


In [7]:
print("Selected Constituencies:")
for idx, (state, name) in enumerate(zip(dataset.groupby(['State', 'PC_NAME']).size().reset_index()['State'],
                                         dataset.groupby(['State', 'PC_NAME']).size().reset_index()['PC_NAME']), 1):
    print(f"{idx}. {name}, {state}")

print("\nDataset Summary by Year:")
summary = dataset.groupby('Year').agg({
    'Total_Electors': 'sum',
    'Total_Votes': 'sum',
    'Overall_Turnout': 'mean'
}).round(2)
print(summary)


Selected Constituencies:
1. Andaman & Nicobar Islands, Andaman & Nicobar Islands
2. Araku, Andhra Pradesh
3. Eluru, Andhra Pradesh
4. Guntur, Andhra Pradesh
5. Hindupur, Andhra Pradesh
6. Kadapa, Andhra Pradesh
7. Araria, Bihar
8. Akola, Maharashtra
9. Akbarpur, Uttar Pradesh
10. Asansol, West Bengal

Dataset Summary by Year:
      Total_Electors  Total_Votes  Overall_Turnout
Year                                              
2014      14036710.0   10003262.0            71.77
2019      15272100.0   11099654.0            72.53
2024      16166417.0   11763332.0            72.62
