In [94]:
# Cell 1: Setup and Data Loading

import pandas as pd
import altair as alt
import numpy as np
from vega_datasets import data

# Load the provided dataset
df = pd.read_csv('country_economics_data.csv')

# Select only the columns required for our "Inequality of Nations" dashboard
# We are renaming 'ID' to ensuring clarity that it represents the ISO-numeric code
required_columns = ['Name', 'ID', 'Region', 'GDP', 'Population']
df = df[required_columns].copy()

# Inspect the data to confirm units
# GDP is in Billions ($), Population is in Millions
print("Dataset Loaded Successfully.")
print(f"Total Countries: {len(df)}")
print("-" * 30)
print(df.head())
print("-" * 30)
print(df.info())

Dataset Loaded Successfully.
Total Countries: 173
------------------------------
          Name  ID    Region  GDP  Population
0  Afghanistan   4      Asia   17       35.70
1      Albania   8    Europe   27        2.36
2      Algeria  12    Africa  264       46.81
3       Angola  24    Africa   80       35.12
4    Argentina  32  Americas  633       47.07
------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173 entries, 0 to 172
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Name        173 non-null    object 
 1   ID          173 non-null    int64  
 2   Region      173 non-null    object 
 3   GDP         173 non-null    int64  
 4   Population  173 non-null    float64
dtypes: float64(1), int64(2), object(2)
memory usage: 6.9+ KB
None


In [95]:
# Cell 2: Creating the Inequality Metrics

# 1. Calculate the Global Totals
# We sum the entire columns to get the world totals ($110T GDP, ~7.8B Pop)
total_gdp = df['GDP'].sum()
total_pop = df['Population'].sum()

print(total_gdp)
print(total_pop)

# 2. Create the Percentage Share Columns
# This normalizes the data.
# Example: USA might be 25% of GDP but only 4% of Pop.
df['% Global GDP'] = (df['GDP'] / total_gdp) * 100
df['% Global Pop'] = (df['Population'] / total_pop) * 100

# 3. Calculate the "Fair Share Score"
# Formula: Share of Wealth / Share of People
# Score = 1.0 means perfect equality.
# Score > 1 means surplus (punching above weight).
# Score < 1 means deficit (punching below weight).
df['Fair Share Score'] = df['% Global GDP'] / df['% Global Pop']

# 4. Create the "Inequality Class" (Binning)
# We define a function to categorize the raw score into our 7 narrative bins
# This is crucial for the "Discrete Coloring" strategy in your plan.
def classify_inequality(score):
    if score > 4.0:
        return 'Elite'
    elif score >= 2.0:
        return 'Wealthy'
    elif score >= 1.25:
        return 'Above Avg'
    elif score >= 0.8:
        return 'Fair Share'
    elif score >= 0.5:
        return 'Below Avg'
    elif score >= 0.25:
        return 'Poor'
    else:
        return 'Crisis'

# Apply the function to create a new categorical column
df['Inequality Class'] = df['Fair Share Score'].apply(classify_inequality)

# 5. Inspect the Results
# We verify the calculation by checking the Top 5 (Richest) and Bottom 5 (Poorest)
print("Global Totals Calculated:")
# We divide by 1000 to shift the unit, and use .2f to show 2 decimal places
print(f"Total GDP: ${total_gdp/1000:.2f} Trillion")
print(f"Total Pop: {total_pop/1000:.2f} Billion")
print("-" * 30)
print("Top 5 'Elite' Nations (Highest Score):")
print(df[['Name', 'Fair Share Score', 'Inequality Class']].sort_values(by='Fair Share Score', ascending=False).head(5))
print("-" * 30)
print("Bottom 5 'Crisis' Nations (Lowest Score):")
print(df[['Name', 'Fair Share Score', 'Inequality Class']].sort_values(by='Fair Share Score', ascending=True).head(5))

110289
7883.620000000001
Global Totals Calculated:
Total GDP: $110.29 Trillion
Total Pop: 7.88 Billion
------------------------------
Top 5 'Elite' Nations (Highest Score):
              Name  Fair Share Score Inequality Class
102         Monaco         17.870368            Elite
88   Liechtenstein         14.296294            Elite
90      Luxembourg          9.922055            Elite
70         Ireland          7.581766            Elite
150    Switzerland          7.400899            Elite
------------------------------
Bottom 5 'Crisis' Nations (Lowest Score):
            Name  Fair Share Score Inequality Class
23       Burundi          0.010175           Crisis
170        Yemen          0.033468           Crisis
0    Afghanistan          0.034039           Crisis
92    Madagascar          0.038022           Crisis
93        Malawi          0.038734           Crisis


### Cell 3: The "Power Ratio" Choropleth Map

In [96]:
# 1. Load World Map Geometry
world_source = alt.topo_feature(data.world_110m.url, 'countries')

# 2. Define Color Scale
color_domain = ['Crisis', 'Poor', 'Below Avg', 'Fair Share', 'Above Avg', 'Wealthy', 'Elite']
color_range = ['#f2f0f7', '#dadaeb', '#bcbddc', '#9e9ac8', '#807dba', '#6a51a3', '#4a1486']

# 3. Create Map Layer
map_chart = alt.Chart(world_source).mark_geoshape(
    stroke='black',
    strokeWidth=0.2
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(df, 'ID', ['Name', 'Fair Share Score', 'Inequality Class', 'Region']),
    default='Unknown'
).encode(
    color=alt.Color('Inequality Class:N', scale=alt.Scale(domain=color_domain, range=color_range), legend=alt.Legend(title="Wealth Concentration")),
    tooltip=[
        alt.Tooltip('Name:N', title='Country'),
        alt.Tooltip('Fair Share Score:Q', format='.2f'),
        alt.Tooltip('Inequality Class:N')
    ]
).project(
    type='equalEarth'
).properties(
    width=900,  # Matches the wide aspect ratio of the projection
    height=500, # Reduced height to remove empty whitespace below
    title='The Inequality of Nations'
)

map_chart

### Cell 4: The "Mechanics" Scatter Plot

In [97]:
# Cell 4: The "Mechanics" Scatter Plot (Log Scale with Clean Lines)

# 1. Feature Engineering
df['Fairness Gap ($T)'] = ((df['% Global Pop'] - df['% Global GDP']) / 100) * (total_gdp / 1000)

# 2. Define Selections
select_highlight = alt.selection_point(fields=['Name'], on='click', empty=True)
select_calculator = alt.selection_point(fields=['Name'], on='click', empty=False)

# 3. Define the Scale Type: LOG
scale_type = 'log'
# Note: Log scale cannot start at 0, so we use a small epsilon
domain_range = [0.0001, 100]

# 4. Refined Grid Values for Log Scale
# Instead of showing every single integer (messy), we show Powers of 10 and halves.
grid_values = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]

# 5. Layer 1: The Diagonal Line
line_data = pd.DataFrame({'x': [0.0001, 100], 'y': [0.0001, 100]})
diagonal_line = alt.Chart(line_data).mark_line(
    color='grey', strokeDash=[5, 5], opacity=0.5
).encode(
    x=alt.X('x', scale=alt.Scale(type=scale_type, domain=domain_range)),
    y=alt.Y('y', scale=alt.Scale(type=scale_type, domain=domain_range))
)

# 6. Layer 2: The Scatter Points
scatter_points = alt.Chart(df).mark_circle(size=100, stroke='black').encode(
    x=alt.X('% Global Pop',
            scale=alt.Scale(type=scale_type, domain=domain_range),
            axis=alt.Axis(values=grid_values), # Applies the clean grid
            title='% Global Population (Log Scale)'),

    y=alt.Y('% Global GDP',
            scale=alt.Scale(type=scale_type, domain=domain_range),
            axis=alt.Axis(values=grid_values), # Applies the clean grid
            title='% Global GDP (Log Scale)'),

    color=alt.Color('Inequality Class:N', scale=alt.Scale(domain=color_domain, range=color_range), legend=None),

    # Interaction Logic
    opacity=alt.condition(select_highlight, alt.value(1.0), alt.value(0.3)),
    strokeWidth=alt.condition(select_calculator, alt.value(2), alt.value(0)),

    tooltip=[
        alt.Tooltip('Name', title='Country'),
        alt.Tooltip('% Global Pop', format='.2f'),
        alt.Tooltip('% Global GDP', format='.2f'),
        alt.Tooltip('Fairness Gap ($T)', format='+.2f', title='Gap ($T)')
    ]
).add_params(
    select_highlight,
    select_calculator
)

# 7. Layers 3 & 4: Calculator Rules & Text
gap_rule = alt.Chart(df).transform_filter(select_calculator).mark_rule(
    color='black', strokeWidth=2, strokeDash=[4, 4]
).encode(
    x='% Global Pop', y='% Global GDP', y2='% Global Pop'
)

gap_text = alt.Chart(df).transform_filter(select_calculator).mark_text(
    align='left', dx=10, dy=-10, fontSize=12, fontWeight='bold', color='black'
).encode(
    x='% Global Pop', y='% Global GDP', text=alt.Text('Fairness Gap ($T)', format='+.1f')
)

final_scatter = (diagonal_line + scatter_points + gap_rule + gap_text).properties(
    width=700,
    height=500,
    title='Does Size Matter? (Log Scale - Cleaned)'
).interactive()

final_scatter

### Cell 5: View 3 - The "Imbalance" Diverging Bar Chart

In [98]:
# Cell 5: The "Imbalance" Diverging Bar Chart (Polished Labels)

# 1. Prepare the Regional Data
df_region = df.groupby('Region')[['GDP', 'Population']].sum().reset_index()

# 2. Recalculate Shares
df_region['% Global GDP'] = (df_region['GDP'] / total_gdp) * 100
df_region['% Global Pop'] = (df_region['Population'] / total_pop) * 100
df_region['Fair Share Score'] = df_region['% Global GDP'] / df_region['% Global Pop']
df_region['Inequality Class'] = df_region['Fair Share Score'].apply(classify_inequality)

# 3. Prepare Data for "Butterfly" Plotting
df_region['Pop_Plot'] = -df_region['% Global Pop'] # Still negative for plotting logic

# 4. Define Interaction
select_region = alt.selection_point(fields=['Region'])

# 5. Define the Shared Scale
# We set the domain to [-100, 100] so 0 is exactly in the center
shared_x_scale = alt.Scale(domain=[-100, 100])

# 6. Create the Base Layer
base = alt.Chart(df_region).encode(
    y=alt.Y('Region:N', axis=None),
    opacity=alt.condition(select_region, alt.value(1), alt.value(0.3))
)

# LEFT BARS: Population (Grey)
left_bars = base.mark_bar(color='#525252').encode(
    x=alt.X('Pop_Plot:Q',
            scale=shared_x_scale,
            title=None, # Turn off default title
            # FIX 1: formatting the axis to show positive numbers only
            axis=alt.Axis(format='d', labelExpr="abs(datum.value)")
           ),
    tooltip=[alt.Tooltip('Region'), alt.Tooltip('% Global Pop', format='.1f')]
)

# RIGHT BARS: GDP (Colored)
right_bars = base.mark_bar().encode(
    x=alt.X('% Global GDP:Q',
            scale=shared_x_scale,
            title=None # Turn off default title
           ),
    color=alt.Color('Inequality Class:N', scale=alt.Scale(domain=color_domain, range=color_range), legend=None),
    tooltip=[
        alt.Tooltip('Region'),
        alt.Tooltip('% Global GDP', format='.1f'),
        alt.Tooltip('Fair Share Score', format='.2f'),
        alt.Tooltip('Inequality Class')
    ]
)

# TEXT NUMBERS (Left and Right)
left_text = base.mark_text(align='right', dx=-5).encode(
    x='Pop_Plot:Q',
    text=alt.Text('% Global Pop', format='.1f')
)

right_text = base.mark_text(align='left', dx=5).encode(
    x='% Global GDP:Q',
    text=alt.Text('% Global GDP', format='.1f')
)

# MIDDLE LABELS: Region Names
middle_labels = base.mark_text(align='center').encode(
    y=alt.Y('Region:N', axis=None),
    x=alt.value(0), # Relative to data, 0 is the center
    text='Region'
)

# FIX 2: Custom Title Layer
# We create a tiny temporary dataset just to place the titles
title_data = pd.DataFrame({
    'x': [-50, 50],
    'label': ['Global Population Share (%)', 'Global GDP Share (%)']
})

custom_titles = alt.Chart(title_data).mark_text(
    dy=190,  # Push text down below the chart (adjust if needed)
    fontSize=12,
    fontWeight='bold'
).encode(
    x=alt.X('x', scale=shared_x_scale),
    text='label'
)

# 7. Combine
regional_chart = (left_bars + left_text + right_bars + right_text + middle_labels + custom_titles).properties(
    width=1700,
    height=300,
    title='Regional Imbalance'
).add_params(
    select_region
)

regional_chart

In [99]:
# Cell 6: The Final Dashboard - Fixed Layout & Filter Logic

# ---------------------------------------------------------
# STEP 1: LINK THE INTERACTIONS
# ---------------------------------------------------------

# UPDATE MAP:
final_map_linked = map_chart.transform_filter(
    select_region
).encode(
    opacity=alt.condition(select_highlight, alt.value(1), alt.value(0.3)),
    strokeWidth=alt.condition(select_calculator, alt.value(2), alt.value(0.2)),
    stroke=alt.condition(select_calculator, alt.value('black'), alt.value('transparent'))
).add_params(
    select_highlight,
    select_calculator
)

# UPDATE SCATTER:
# 1. Filter the data layers
scatter_filtered = scatter_points.transform_filter(select_region)
gap_rule_filtered = gap_rule.transform_filter(select_region)
gap_text_filtered = gap_text.transform_filter(select_region)

# 2. Recombine and RESTORE SIZE (The Fix)
final_scatter_linked = (diagonal_line + scatter_filtered + gap_rule_filtered + gap_text_filtered).properties(
    width=700,   # Restored width
    height=500,  # Restored height
    title='Does Size Matter? (Log Scale)'
).interactive()

# ---------------------------------------------------------
# STEP 2: ARRANGE THE LAYOUT
# ---------------------------------------------------------
# Top Row: Map (Left) and Scatter (Right)
top_row = alt.hconcat(final_map_linked, final_scatter_linked).resolve_scale(
    color='shared'
)

# Combine Top Row with Bottom Row (Regional Bar Chart)
dashboard = alt.vconcat(top_row, regional_chart, center=True).properties(
    title=alt.TitleParams(
        text='The Inequality of Nations',
        subtitle=[
            '1. Click a Region (Bottom) to filter the global view.',
            '2. Click a Country (Map or Scatter) to calculate its Fairness Gap.'
        ],
        fontSize=24,
        anchor='start',
        offset=20
    )
).configure_view(
    stroke=None
).configure_concat(
    spacing=30
)

# ---------------------------------------------------------
# STEP 3: SAVE AND DISPLAY
# ---------------------------------------------------------
dashboard.save('inequality_dashboard.html')

print("SUCCESS: Layout restored. Interaction working.")
dashboard

  exec(code_obj, self.user_global_ns, self.user_ns)


SUCCESS: Layout restored. Interaction working.
