# Exam Grade Distribution Dashboard

Interactive visualization of Portuguese national exam grade distributions.

**Features:**
- Compare by: Gender, School Type, NUTS2 Region, or Year
- Toggle between absolute counts and normalized percentages
- Filter by Gender, School Type, NUTS2, COVID period, and Year
- Filter status indicator shows excluded items

In [1]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, output_file, save, show
from bokeh.models import (
    ColumnDataSource, HoverTool, Select, CheckboxButtonGroup,
    CustomJS, Div, Toggle, Range1d, NumeralTickFormatter, InlineStyleSheet
)
from bokeh.layouts import column, row
from bokeh.io import output_notebook
from bokeh.palettes import Category10, Category20

output_notebook()

## Load and Prepare Data

In [2]:
# Load data
df = pd.read_csv('viz1_grade_distribution.csv')

# Get unique values for each dimension
genders = sorted(df['gender'].dropna().unique().tolist())
school_types = sorted(df['school_type'].dropna().unique().tolist())
nuts2_regions = sorted(df['nuts2'].dropna().unique().tolist())
years = sorted(df['year'].dropna().unique().tolist())
covid_periods = ['Before', 'After']

print(f"Loaded {len(df):,} rows")
print(f"Years: {years[0]} - {years[-1]}")
print(f"NUTS2 regions: {len(nuts2_regions)}")

Loaded 486,295 rows
Years: 2008 - 2024
NUTS2 regions: 8


In [3]:
# Pre-aggregate data for JavaScript filtering
df_agg = df.groupby(['grade_value', 'year', 'gender', 'school_type', 'nuts2', 'covid_period']).agg({
    'num_exams': 'sum'
}).reset_index()

# Create master data source
master_source = ColumnDataSource(df_agg)
print(f"Aggregated to {len(df_agg):,} rows")

Aggregated to 81,246 rows


## Define Colors and Create Data Sources

In [4]:
# Color definitions
colors = {
    'F': '#e74c3c', 'M': '#3498db',           # Gender
    'PRI': '#9b59b6', 'PUB': '#2ecc71',       # School type
}

# NUTS2 and Year colors from palettes
for i, region in enumerate(nuts2_regions):
    colors[region] = Category10[10][i % 10]
for i, year in enumerate(years):
    colors[str(year)] = Category20[20][i % 20]

In [5]:
def create_initial_series_data(df, series_col, series_values):
    """Create initial aggregated data for a series dimension."""
    result = {}
    for val in series_values:
        subset = df[df[series_col] == val].groupby('grade_value').agg({'num_exams': 'sum'}).reset_index()
        subset = subset.sort_values('grade_value')
        result[str(val)] = ColumnDataSource(data={
            'grade_value': subset['grade_value'].tolist(),
            'num_exams': subset['num_exams'].tolist()
        })
    return result

# Create and flatten sources for proper JS serialization
_sources_gender = create_initial_series_data(df_agg, 'gender', genders)
_sources_school = create_initial_series_data(df_agg, 'school_type', school_types)
_sources_nuts2 = create_initial_series_data(df_agg, 'nuts2', nuts2_regions)
_sources_year = create_initial_series_data(df_agg, 'year', years)

sources_flat = {}
for g in genders:
    sources_flat[f'gender_{g}'] = _sources_gender[g]
for s in school_types:
    sources_flat[f'school_type_{s}'] = _sources_school[s]
for n in nuts2_regions:
    sources_flat[f'nuts2_{n}'] = _sources_nuts2[n]
for y in years:
    sources_flat[f'year_{y}'] = _sources_year[str(y)]

## Create Figure and Renderers

In [6]:
# Create figure
p = figure(
    width=900,
    height=550,
    title="Exam Grade Distribution",
    x_axis_label="Exam Grade",
    y_axis_label="Number of Exams",
    toolbar_location="above",
    tools="pan,wheel_zoom,box_zoom,reset,save",
    y_range=Range1d(start=0, end=100000)
)

# Styling
p.title.text_font_size = '16pt'
p.xaxis.major_label_text_font_size = '12pt'
p.yaxis.major_label_text_font_size = '12pt'
p.xaxis.axis_label_text_font_size = '13pt'
p.yaxis.axis_label_text_font_size = '13pt'
p.x_range.start = 0
p.x_range.end = 20

# Y-axis formatters
p.yaxis[0].formatter = NumeralTickFormatter(format='0,0')
formatter_absolute = NumeralTickFormatter(format='0,0')
formatter_normalized = NumeralTickFormatter(format='0.00')

In [7]:
# Create renderers (flat dict for JS serialization)
renderers_flat = {}

# Gender lines (visible by default)
for gender in genders:
    key = f'gender_{gender}'
    renderers_flat[key] = p.line(
        'grade_value', 'num_exams', source=sources_flat[key],
        line_width=2.5, color=colors[gender], alpha=0.9,
        legend_label=f"Gender: {gender}", visible=True
    )

# School type lines (hidden)
for school in school_types:
    key = f'school_type_{school}'
    renderers_flat[key] = p.line(
        'grade_value', 'num_exams', source=sources_flat[key],
        line_width=2.5, color=colors[school], alpha=0.9,
        legend_label=f"School: {school}", visible=False
    )

# NUTS2 lines (hidden)
for region in nuts2_regions:
    key = f'nuts2_{region}'
    renderers_flat[key] = p.line(
        'grade_value', 'num_exams', source=sources_flat[key],
        line_width=2.5, color=colors[region], alpha=0.9,
        legend_label=f"Region: {region}", visible=False
    )

# Year lines (hidden)
for year in years:
    key = f'year_{year}'
    renderers_flat[key] = p.line(
        'grade_value', 'num_exams', source=sources_flat[key],
        line_width=2.5, color=colors[str(year)], alpha=0.9,
        legend_label=f"Year: {year}", visible=False
    )

# Legend configuration
p.legend.location = "top_right"
p.legend.click_policy = "hide"
p.legend.label_text_font_size = '10pt'

# Initial legend visibility - only Gender
for item in p.legend[0].items:
    item.visible = item.label.value.startswith('Gender: ')

# Hover tool
hover = HoverTool(tooltips=[('Grade', '@grade_value{0.1f}'), ('Count', '@num_exams{0,0}')])
p.add_tools(hover)

## Create Controls

In [8]:
# Series selector and metric toggle
series_select = Select(
    title="", value="gender", width=150,
    options=[("gender", "Gender"), ("school_type", "School Type"),
             ("nuts2", "NUTS2 Region"), ("year", "Year")]
)

metric_toggle = Toggle(
    label="Absolute Counts", button_type="default",
    active=False, width=150
)

In [9]:
# Style constants
DIMMED_STYLE = {'opacity': '0.3', 'pointer-events': 'none'}
ACTIVE_STYLE = {'opacity': '1', 'pointer-events': 'auto'}

# Button CSS: gray unselected, blue selected
BUTTON_CSS = """
:host(.bk-CheckboxButtonGroup) .bk-btn {
    background-color: #e6e6e6 !important;
    border-color: #ccc !important;
    color: #333 !important;
}
:host(.bk-CheckboxButtonGroup) .bk-btn.bk-active {
    background-color: #3071a9 !important;
    border-color: #285e8e !important;
    color: white !important;
}
:host(.bk-CheckboxButtonGroup) .bk-btn:hover:not(.bk-active) {
    background-color: #d4d4d4 !important;
}
:host(.bk-CheckboxButtonGroup) .bk-btn.bk-active:hover {
    background-color: #265a88 !important;
}
"""
button_stylesheet = InlineStyleSheet(css=BUTTON_CSS)

# Filter controls
gender_filter = CheckboxButtonGroup(
    labels=genders, active=list(range(len(genders))),
    stylesheets=[button_stylesheet], styles=DIMMED_STYLE
)
school_filter = CheckboxButtonGroup(
    labels=school_types, active=list(range(len(school_types))),
    stylesheets=[button_stylesheet], styles=ACTIVE_STYLE
)
nuts2_filter = CheckboxButtonGroup(
    labels=nuts2_regions, active=list(range(len(nuts2_regions))),
    stylesheets=[button_stylesheet], styles=ACTIVE_STYLE
)
covid_filter = CheckboxButtonGroup(
    labels=covid_periods, active=list(range(len(covid_periods))),
    stylesheets=[button_stylesheet]
)
year_filter = CheckboxButtonGroup(
    labels=[str(y) for y in years], active=list(range(len(years))),
    stylesheets=[button_stylesheet], styles=ACTIVE_STYLE
)

# Year series filter (shown when Year is series)
default_year_series = [years.index(y) for y in [2019, 2020, 2024] if y in years]
year_series_filter = CheckboxButtonGroup(
    labels=[str(y) for y in years], active=default_year_series,
    stylesheets=[button_stylesheet], visible=False
)

In [10]:
# Labels (vertically centered, uniform 80px width)
LABEL_STYLE = {'display': 'flex', 'align-items': 'center', 'height': '30px'}
LABEL_STYLE_DIMMED = {'display': 'flex', 'align-items': 'center', 'height': '30px', 'opacity': '0.3', 'pointer-events': 'none'}

series_label = Div(text="<b>Compare by:</b>", width=80, styles=LABEL_STYLE)
yaxis_label = Div(text="<b>Y-axis:</b>", width=80, styles=LABEL_STYLE)
gender_label = Div(text="<b>Gender:</b>", width=80, styles=LABEL_STYLE_DIMMED)
school_label = Div(text="<b>School:</b>", width=80, styles=LABEL_STYLE)
nuts2_label = Div(text="<b>NUTS2:</b>", width=80, styles=LABEL_STYLE)
covid_label = Div(text="<b>COVID:</b>", width=80, styles=LABEL_STYLE)
year_label = Div(text="<b>Year:</b>", width=80, styles=LABEL_STYLE)

# Filter status indicator
filter_status = Div(
    text="", width=900,
    styles={'color': '#e67e22', 'font-size': '12px', 'font-style': 'italic', 'margin': '5px 0'}
)

## JavaScript Callback

In [11]:
js_code = """
const series_type = series_select.value;
const show_normalized = metric_toggle.active;

// Style constants
const DIMMED = {'opacity': '0.3', 'pointer-events': 'none'};
const ACTIVE = {'opacity': '1', 'pointer-events': 'auto'};
const LABEL_DIMMED = {'display': 'flex', 'align-items': 'center', 'height': '30px', 'opacity': '0.3', 'pointer-events': 'none'};
const LABEL_ACTIVE = {'display': 'flex', 'align-items': 'center', 'height': '30px', 'opacity': '1', 'pointer-events': 'auto'};

const series_values_map = {
    'gender': ['F', 'M'],
    'school_type': ['PRI', 'PUB'],
    'nuts2': nuts2_list,
    'year': years_list.map(String)
};

const genders_all = ['F', 'M'];
const schools_all = ['PRI', 'PUB'];
const nuts2_all = nuts2_list;
const years_all = years_list;
const covid_all = ['Before', 'After'];

const genders_selected = gender_filter.active.map(i => genders_all[i]);
const schools_selected = school_filter.active.map(i => schools_all[i]);
const nuts2_selected = nuts2_filter.active.map(i => nuts2_all[i]);
const covid_selected = covid_filter.active.map(i => covid_all[i]);
const years_selected = year_filter.active.map(i => years_all[i]);
const years_series_selected = year_series_filter.active.map(i => years_all[i]);

const data = master_source.data;
const n = data['grade_value'].length;

function filterData() {
    const filtered = [];
    for (let i = 0; i < n; i++) {
        if (series_type !== 'gender' && !genders_selected.includes(data['gender'][i])) continue;
        if (series_type !== 'school_type' && !schools_selected.includes(data['school_type'][i])) continue;
        if (series_type !== 'nuts2' && !nuts2_selected.includes(data['nuts2'][i])) continue;
        if (series_type !== 'year' && !years_selected.includes(data['year'][i])) continue;
        if (!covid_selected.includes(data['covid_period'][i])) continue;
        if (series_type === 'year' && !years_series_selected.includes(data['year'][i])) continue;
        filtered.push(i);
    }
    return filtered;
}

function aggregateBySeriesAndGrade(indices, series_col) {
    const agg = {};
    for (const i of indices) {
        const series_val = String(data[series_col][i]);
        const grade = data['grade_value'][i];
        const count = data['num_exams'][i];
        if (!agg[series_val]) agg[series_val] = {};
        if (!agg[series_val][grade]) agg[series_val][grade] = 0;
        agg[series_val][grade] += count;
    }
    return agg;
}

function normalizeAggregation(agg) {
    const normalized = {};
    for (const series_val in agg) {
        const total = Object.values(agg[series_val]).reduce((a, b) => a + b, 0);
        normalized[series_val] = {};
        for (const grade in agg[series_val]) {
            normalized[series_val][grade] = total > 0 ? (agg[series_val][grade] / total) * 100 : 0;
        }
    }
    return normalized;
}

const filtered_indices = filterData();
const aggregated = aggregateBySeriesAndGrade(filtered_indices, series_type);
const normalized = normalizeAggregation(aggregated);
const dataToUse = show_normalized ? normalized : aggregated;

// Hide all renderers
for (const stype in series_values_map) {
    for (const sval of series_values_map[stype]) {
        const key = stype + '_' + sval;
        if (all_renderers[key]) all_renderers[key].visible = false;
    }
}

// Show and update current series
let maxValue = 0;
for (const sval of series_values_map[series_type]) {
    const key = series_type + '_' + sval;
    const renderer = all_renderers[key];
    const source = all_sources[key];
    if (dataToUse[sval] && Object.keys(dataToUse[sval]).length > 0) {
        const grades = Object.keys(dataToUse[sval]).map(Number).sort((a, b) => a - b);
        const values = grades.map(g => dataToUse[sval][g] || 0);
        source.data = {'grade_value': grades, 'num_exams': values};
        source.change.emit();
        renderer.visible = true;
        const seriesMax = Math.max(...values);
        if (seriesMax > maxValue) maxValue = seriesMax;
    }
}

if (maxValue > 0) {
    plot.y_range.start = 0;
    plot.y_range.end = maxValue * 1.05;
}

// Update legend
const label_prefixes = {'gender': 'Gender: ', 'school_type': 'School: ', 'nuts2': 'Region: ', 'year': 'Year: '};
for (const item of the_legend.items) {
    item.visible = item.label.value.startsWith(label_prefixes[series_type]);
}

// Update UI
y_axis.axis_label = show_normalized ? 'Percentage of Exams (%)' : 'Number of Exams';
y_axis.formatter = show_normalized ? formatter_normalized : formatter_absolute;
metric_toggle.label = show_normalized ? 'Normalized %' : 'Absolute Counts';
metric_toggle.button_type = show_normalized ? 'primary' : 'default';
hover_tool.tooltips = [['Grade', '@grade_value{0.1f}'], show_normalized ? ['Percentage', '@num_exams{0.2f}%'] : ['Count', '@num_exams{0,0}']];

// Update filter opacity
gender_filter.styles = (series_type === 'gender') ? DIMMED : ACTIVE;
gender_label.styles = (series_type === 'gender') ? LABEL_DIMMED : LABEL_ACTIVE;
school_filter.styles = (series_type === 'school_type') ? DIMMED : ACTIVE;
school_label.styles = (series_type === 'school_type') ? LABEL_DIMMED : LABEL_ACTIVE;
nuts2_filter.styles = (series_type === 'nuts2') ? DIMMED : ACTIVE;
nuts2_label.styles = (series_type === 'nuts2') ? LABEL_DIMMED : LABEL_ACTIVE;
year_filter.styles = (series_type === 'year') ? DIMMED : ACTIVE;
year_label.styles = (series_type === 'year') ? LABEL_DIMMED : LABEL_ACTIVE;

year_series_filter.visible = (series_type === 'year');
year_filter.visible = (series_type !== 'year');

// Build filter status
const excludedFilters = [];
if (series_type !== 'gender' && genders_selected.length < genders_all.length) {
    excludedFilters.push('Gender: ' + genders_all.filter(g => !genders_selected.includes(g)).join(', '));
}
if (series_type !== 'school_type' && schools_selected.length < schools_all.length) {
    excludedFilters.push('School: ' + schools_all.filter(s => !schools_selected.includes(s)).join(', '));
}
if (series_type !== 'nuts2' && nuts2_selected.length < nuts2_all.length) {
    const excluded = nuts2_all.filter(n => !nuts2_selected.includes(n));
    excludedFilters.push('NUTS2: ' + (excluded.length <= 3 ? excluded.join(', ') : excluded.length + ' regions'));
}
if (covid_selected.length < covid_all.length) {
    excludedFilters.push('COVID: ' + covid_all.filter(c => !covid_selected.includes(c)).join(', '));
}
if (series_type !== 'year' && years_selected.length < years_all.length) {
    const excluded = years_all.filter(y => !years_selected.includes(y));
    excludedFilters.push('Year: ' + (excluded.length <= 3 ? excluded.join(', ') : excluded.length + ' years'));
}
filter_status.text = excludedFilters.length > 0 ? 'Excluding: ' + excludedFilters.join(' | ') : '';
"""

In [12]:
# Create and attach callback
callback = CustomJS(
    args=dict(
        plot=p, the_legend=p.legend[0], y_axis=p.yaxis[0], hover_tool=hover,
        formatter_absolute=formatter_absolute, formatter_normalized=formatter_normalized,
        master_source=master_source, series_select=series_select, metric_toggle=metric_toggle,
        gender_filter=gender_filter, school_filter=school_filter, nuts2_filter=nuts2_filter,
        covid_filter=covid_filter, year_filter=year_filter, year_series_filter=year_series_filter,
        gender_label=gender_label, school_label=school_label, nuts2_label=nuts2_label, year_label=year_label,
        filter_status=filter_status, all_renderers=renderers_flat, all_sources=sources_flat,
        nuts2_list=nuts2_regions, years_list=years
    ),
    code=js_code
)

for control in [series_select, metric_toggle, gender_filter, school_filter, nuts2_filter, covid_filter, year_filter, year_series_filter]:
    control.js_on_change('active' if hasattr(control, 'active') and not isinstance(control, Select) else 'value', callback)

## Assemble Layout

In [13]:
# Layout rows
row_controls = row(
    series_label, series_select, Div(width=30), yaxis_label, metric_toggle,
    sizing_mode="stretch_width",
    styles={'border-bottom': '1px solid #ccc', 'padding-bottom': '8px', 'margin-bottom': '8px'}
)
row_filters_small = row(
    gender_label, gender_filter, Div(width=20),
    school_label, school_filter, Div(width=20),
    covid_label, covid_filter
)
row_nuts2 = row(nuts2_label, nuts2_filter)
row_year = row(year_label, year_filter, year_series_filter)
row_status = row(filter_status)
row_plot = row(p)

# Main layout
layout = column(
    row_controls, row_filters_small, row_nuts2, row_year, row_status, row_plot,
    sizing_mode="fixed",
    styles={'border': '1px solid #ccc', 'padding': '10px'}
)

show(layout)



## Export to HTML

In [14]:
output_file("./../site/viz1.html", title="Exam Grade Distribution")
save(layout)
print("Saved to viz1.html")



Saved to viz1.html
