In [333]:
# Cell 1: Setup and Data Loading

import pandas as pd
import altair as alt
import numpy as np
from vega_datasets import data

# Load the provided dataset
df = pd.read_csv('country_economics_data.csv')

# Select only the columns required for our "Inequality of Nations" dashboard
# We are renaming 'ID' to ensuring clarity that it represents the ISO-numeric code
required_columns = ['Name', 'ID', 'Region', 'GDP', 'Population']
df = df[required_columns].copy()

# Inspect the data to confirm units
# GDP is in Billions ($), Population is in Millions
print("Dataset Loaded Successfully.")
print(f"Total Countries: {len(df)}")
print("-" * 30)
print(df.head())
print("-" * 30)
print(df.info())

Dataset Loaded Successfully.
Total Countries: 173
------------------------------
          Name  ID    Region  GDP  Population
0  Afghanistan   4      Asia   17       35.70
1      Albania   8    Europe   27        2.36
2      Algeria  12    Africa  264       46.81
3       Angola  24    Africa   80       35.12
4    Argentina  32  Americas  633       47.07
------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173 entries, 0 to 172
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Name        173 non-null    object 
 1   ID          173 non-null    int64  
 2   Region      173 non-null    object 
 3   GDP         173 non-null    int64  
 4   Population  173 non-null    float64
dtypes: float64(1), int64(2), object(2)
memory usage: 6.9+ KB
None


In [334]:
# Cell 2: Creating the "Representation" Metrics

# 1. Global Totals
total_gdp = df['GDP'].sum()
total_pop = df['Population'].sum()

# 2. Shares
df['% Global GDP'] = (df['GDP'] / total_gdp) * 100
df['% Global Pop'] = (df['Population'] / total_pop) * 100

# 3. The Metric: Representation Ratio
df['Representation Ratio'] = df['% Global GDP'] / df['% Global Pop']

# 4. The Classification: "Proportionality Status"
def classify_representation(ratio):
    if ratio > 4.0:
        return 'Hyper-Represented'  # > 4x Fair Share
    elif ratio >= 2.0:
        return 'Over-Represented'   # 2x - 4x Fair Share
    elif ratio >= 1.25:
        return 'Slight Surplus'     # Just above parity
    elif ratio >= 0.8:
        return 'Proportional'       # The "Democratic" Zone (approx 1.0)
    elif ratio >= 0.5:
        return 'Slight Deficit'     # Just below parity
    elif ratio >= 0.25:
        return 'Under-Represented'  # 1/4 to 1/2 of Fair Share
    else:
        return 'Marginalized'       # < 1/4 of Fair Share

df['Representation Status'] = df['Representation Ratio'].apply(classify_representation)

# 5. Verify
print("Top 5 'Hyper-Represented' Nations:")
print(df[['Name', 'Representation Ratio', 'Representation Status']].sort_values(by='Representation Ratio', ascending=False).head(5))

Top 5 'Hyper-Represented' Nations:
              Name  Representation Ratio Representation Status
102         Monaco             17.870368     Hyper-Represented
88   Liechtenstein             14.296294     Hyper-Represented
90      Luxembourg              9.922055     Hyper-Represented
70         Ireland              7.581766     Hyper-Represented
150    Switzerland              7.400899     Hyper-Represented


### Cell 3: The "Power Ratio" Choropleth Map

In [335]:
# Cell 3: The Map Setup (Interactive Legend)

# Define the precise order for the Legend
color_domain = [
    'Marginalized', 'Under-Represented', 'Slight Deficit',
    'Proportional',
    'Slight Surplus', 'Over-Represented', 'Hyper-Represented'
]

# High-Contrast Purple Scale
color_range = [
    '#c4bedc', '#a99fcb', '#8f83ba', '#7668a9', '#5d4e98', '#463587', '#301d76'
]

# NEW: Define the Interaction tied to the Legend
# bind='legend' makes the legend symbols clickable buttons
select_status = alt.selection_point(fields=['Representation Status'], bind='legend')

# Create Map Layer
map_chart = alt.Chart(world_source).mark_geoshape(
    stroke='black', strokeWidth=0.2
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(df, 'ID', ['Name', 'Representation Ratio', 'Representation Status', 'Region']),
    default='Unknown'
).encode(
    color=alt.Color(
        'Representation Status:N',
        scale=alt.Scale(domain=color_domain, range=color_range),
        legend=alt.Legend(
            title="Representation Status",
            orient='none',
            legendX=40, legendY=250,
            titleFontSize=14, titleFontWeight='bold',
            labelFontSize=12, symbolSize=200,
            direction='vertical', padding=10,
            fillColor='white', strokeColor='grey', cornerRadius=5
        )
    ),
    tooltip=[
        alt.Tooltip('Name:N', title='Country'),
        alt.Tooltip('Representation Ratio:Q', format='.2f', title='Ratio (1.0 = Fair)'),
        alt.Tooltip('Representation Status:N', title='Status')
    ]
).project(
    type='equalEarth'
).properties(
    width=900, height=450, title='The Global Split'
).add_params(
    select_status # Activate the legend selection
)

### Cell 4: The "Mechanics" Scatter Plot

In [336]:
# Cell 4: The "Leverage Curve" Scatter Plot (Fixed)

# 1. Feature Engineering
df['Fairness Gap ($T)'] = ((df['% Global Pop'] - df['% Global GDP']) / 100) * (total_gdp / 1000)

# 2. Define Selections
select_highlight = alt.selection_point(fields=['Name'], on='click', empty=True)
select_calculator = alt.selection_point(fields=['Name'], on='click', empty=False)

# 3. Define the Scale
scale_type = 'log'
domain_range = [0.0001, 100]
grid_values = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]

# 4. EXPLAINABILITY: Create "Zone Labels" (Split into 2 layers to fix error)

# Label A: Top-Left (Amplified)
label_data_1 = pd.DataFrame({
    'x': [0.005], 'y': [1],
    'text': ['AMPLIFIED VOICE\n(Over-Represented)']
})
label_top_left = alt.Chart(label_data_1).mark_text(
    align='left', # Fixed alignment
    fontSize=14, fontWeight='bold', opacity=0.35, color='purple', lineBreak='\n'
).encode(
    x=alt.X('x', scale=alt.Scale(type='log', domain=domain_range)),
    y=alt.Y('y', scale=alt.Scale(type='log', domain=domain_range)),
    text='text'
)

# Label B: Bottom-Right (Diminished)
label_data_2 = pd.DataFrame({
    'x': [5], 'y': [0.005],
    'text': ['DIMINISHED VOICE\n(Under-Represented)']
})
label_bottom_right = alt.Chart(label_data_2).mark_text(
    align='right', # Fixed alignment
    fontSize=14, fontWeight='bold', opacity=0.35, color='purple', lineBreak='\n'
).encode(
    x=alt.X('x', scale=alt.Scale(type='log', domain=domain_range)),
    y=alt.Y('y', scale=alt.Scale(type='log', domain=domain_range)),
    text='text'
)

# 5. Layer 1: The Diagonal Line
line_data = pd.DataFrame({'x': [0.0001, 100], 'y': [0.0001, 100]})
diagonal_line = alt.Chart(line_data).mark_line(
    color='grey', strokeDash=[5, 5], opacity=0.5
).encode(
    x=alt.X('x', scale=alt.Scale(type=scale_type, domain=domain_range)),
    y=alt.Y('y', scale=alt.Scale(type=scale_type, domain=domain_range))
)

# 6. Layer 2: The Scatter Points
scatter_points = alt.Chart(df).mark_circle(size=100, stroke='black').encode(
    x=alt.X('% Global Pop',
            scale=alt.Scale(type=scale_type, domain=domain_range),
            axis=alt.Axis(values=grid_values),
            title='Global Population Share (%)'),

    y=alt.Y('% Global GDP',
            scale=alt.Scale(type=scale_type, domain=domain_range),
            axis=alt.Axis(values=grid_values),
            title='Global Influence Share (GDP %)'),

    # Color mapped to the NEW Status column
    color=alt.Color('Representation Status:N',
                    scale=alt.Scale(domain=color_domain, range=color_range),
                    legend=None),

    opacity=alt.condition(select_highlight, alt.value(1.0), alt.value(0.3)),
    strokeWidth=alt.condition(select_calculator, alt.value(2), alt.value(0)),

    tooltip=[
        alt.Tooltip('Name', title='Country'),
        alt.Tooltip('Representation Status', title='Status'),
        alt.Tooltip('% Global Pop', format='.2f'),
        alt.Tooltip('% Global GDP', format='.2f'),
        alt.Tooltip('Representation Ratio', format='.2f', title='Ratio'),
        alt.Tooltip('Fairness Gap ($T)', format='+.2f', title='Gap ($T)')
    ]
).add_params(select_highlight, select_calculator)

# 7. Layers 3 & 4: Calculator
gap_rule = alt.Chart(df).transform_filter(select_calculator).mark_rule(
    color='black', strokeWidth=2, strokeDash=[4, 4]
).encode(x='% Global Pop', y='% Global GDP', y2='% Global Pop')

gap_text = alt.Chart(df).transform_filter(select_calculator).mark_text(
    align='left', dx=10, dy=-10, fontSize=12, fontWeight='bold', color='black'
).encode(x='% Global Pop', y='% Global GDP', text=alt.Text('Fairness Gap ($T)', format='+.1f'))

# Combine: Note we add BOTH label layers now
final_scatter = (label_top_left + label_bottom_right + diagonal_line + scatter_points + gap_rule + gap_text).properties(
    width=700,
    height=450,
   title='The Representation Ratio Calculator'
)

final_scatter

### Cell 5: View 3 - The "Imbalance" Diverging Bar Chart

In [337]:
# Cell 5: The "Regional Split" Bar Chart (Final Narrative)

# 1. Prepare Regional Data
df_region = df.groupby('Region')[['GDP', 'Population']].sum().reset_index()

# 2. Recalculate Shares
df_region['% Global GDP'] = (df_region['GDP'] / total_gdp) * 100
df_region['% Global Pop'] = (df_region['Population'] / total_pop) * 100

# 3. Calculate Ratio & Status
df_region['Representation Ratio'] = df_region['% Global GDP'] / df_region['% Global Pop']
df_region['Representation Status'] = df_region['Representation Ratio'].apply(classify_representation)

# 4. Prepare Plotting Data (GDP Negative for Left Side)
df_region['GDP_Plot'] = -df_region['% Global GDP']

# 5. Interaction
select_region = alt.selection_point(fields=['Region'])
shared_x_scale = alt.Scale(domain=[-100, 100])

# 6. Base Layer
base = alt.Chart(df_region).encode(
    y=alt.Y('Region:N', axis=None),
    opacity=alt.condition(select_region, alt.value(1), alt.value(0.3))
)

# LEFT BARS: GDP (Influence)
left_bars = base.mark_bar(stroke='black', strokeWidth=1).encode(
    x=alt.X('GDP_Plot:Q', scale=shared_x_scale, title=None, axis=alt.Axis(format='d', labelExpr="abs(datum.value)")),
    color=alt.Color('Representation Status:N', scale=alt.Scale(domain=color_domain, range=color_range), legend=None),
    tooltip=[
        alt.Tooltip('Region'),
        alt.Tooltip('% Global GDP', format='.1f'),
        alt.Tooltip('Representation Ratio', format='.2f'),
        alt.Tooltip('Representation Status')
    ]
)

# RIGHT BARS: Population
right_bars = base.mark_bar(stroke='black', strokeWidth=1).encode(
    x=alt.X('% Global Pop:Q', scale=shared_x_scale, title=None),
    color=alt.Color('Representation Status:N', scale=alt.Scale(domain=color_domain, range=color_range), legend=None),
    tooltip=[alt.Tooltip('Region'), alt.Tooltip('% Global Pop', format='.1f')]
)

# Text Numbers
left_text = base.mark_text(align='right', dx=-5).encode(x='GDP_Plot:Q', text=alt.Text('% Global GDP', format='.1f'))
right_text = base.mark_text(align='left', dx=5).encode(x='% Global Pop:Q', text=alt.Text('% Global Pop', format='.1f'))

# Middle Region Labels
middle_labels = base.mark_text(align='center').encode(y=alt.Y('Region:N', axis=None), x=alt.value(0), text='Region')

# Custom Titles (Labels below the axis)
title_data = pd.DataFrame({
    'x': [-50, 50],
    'label': ['Global Influence Share (GDP %)', 'Global Population Share (%)']
})
custom_titles = alt.Chart(title_data).mark_text(dy=180, fontSize=10, fontWeight='bold').encode(x=alt.X('x', scale=shared_x_scale), text='label')

# Combine
regional_chart = (left_bars + left_text + right_bars + right_text + middle_labels + custom_titles).properties(
    width=1700,
    height=300,
    # REFINED TITLE BLOCK
    title=alt.TitleParams(
        text='The Regional Split',
        subtitle=[
            "Visualizing how the Global Pie is divided. Ideally, a region's Influence Share (Left) should roughly match its Population Share (Right).",
            "CAUTION: Regional averages often hide extreme outliers. Click a bar to filter the dashboard and reveal which specific nations are skewing the results."
        ],
        fontSize=15,
        subtitleFontSize=12,
        subtitleColor='#525252',
        anchor='middle'
    )
).add_params(select_region)

regional_chart

In [338]:
# Cell 6: The Final Dashboard - Interactive Legend Logic

# ---------------------------------------------------------
# STEP 1: LINK THE INTERACTIONS
# ---------------------------------------------------------

# UPDATE MAP:
# Opacity now listens to BOTH: Country Click (highlight) AND Legend Click (status)
final_map_linked = map_chart.transform_filter(
    select_region
).encode(
    # LOGIC: (Country matches?) AND (Status matches?)
    opacity=alt.condition(select_highlight & select_status, alt.value(1), alt.value(0.1)),

    strokeWidth=alt.condition(select_calculator, alt.value(2), alt.value(0.2)),
    stroke=alt.condition(select_calculator, alt.value('black'), alt.value('transparent'))
).add_params(
    select_highlight,
    select_calculator
    # Note: select_status is already added in Cell 3
)

# UPDATE SCATTER:
# We apply the same logic here so the Scatter dots ALSO highlight when you click the legend!
scatter_filtered = scatter_points.encode(
    opacity=alt.condition(select_highlight & select_status, alt.value(1.0), alt.value(0.1))
).transform_filter(select_region)

gap_rule_filtered = gap_rule.transform_filter(select_region)
gap_text_filtered = gap_text.transform_filter(select_region)

# Recombine
final_scatter_linked = (label_top_left + label_bottom_right + diagonal_line + scatter_filtered + gap_rule_filtered + gap_text_filtered).properties(
    width=700,
    height=450,
    title=alt.TitleParams(
        text='The Representation Ratio Calculator',
        subtitle=[
            "Visualizing how each country's ratio is derived. The dotted line represents a ratio of 1.0 (Perfect Equality).",
            "The distance from this line defines how over- or under-represented a nation is in the global market."
        ],
        fontSize=15,
        subtitleFontSize=12,
        subtitleColor='#525252',
    )
)

# ---------------------------------------------------------
# STEP 2: ARRANGE THE LAYOUT
# ---------------------------------------------------------
top_row = alt.hconcat(final_map_linked, final_scatter_linked).resolve_scale(
    color='shared'
)

dashboard = alt.vconcat(top_row, regional_chart, center=True).properties(
    title=alt.TitleParams(
        text='The Weight of Nations: People vs. Power',
        subtitle=[
            "In the global market, GDP acts as a proxy for influence: the larger the economy, the louder the voice.",
            "This contrasts with the democratic ideal where influence is distributed equally ('One Person, One Vote').",
            "This dashboard uses a baseline of perfect equality (Proportional representation) to measure exactly how far the current global order deviates from it.",
            "",
            "INSTRUCTIONS: Click a Region (Bottom) to filter, a Country (Map) to audit, or a Legend Item (Left) to highlight a group."
        ],
        fontSize=32,
        subtitleFontSize=16,
        subtitleColor='#525252',
        anchor='start',
        offset=0
    )
).configure_view(stroke=None).configure_concat(spacing=0)

# ---------------------------------------------------------
# STEP 3: SAVE AND DISPLAY
# ---------------------------------------------------------
dashboard.save('inequality_dashboard.html')

print("SUCCESS: Legend is now interactive!")
print("- Click a colored square in the Legend to highlight only those countries.")
print("- Double-click the Legend to reset.")
dashboard

  exec(code_obj, self.user_global_ns, self.user_ns)


SUCCESS: Legend is now interactive!
- Click a colored square in the Legend to highlight only those countries.
- Double-click the Legend to reset.
