In [3]:
import pandas as pd
import altair as alt

# Read your CSV file
df = pd.read_csv('world_data_clean.csv')

# Convert relevant columns to numeric
df['gdp_pc_2013'] = pd.to_numeric(df['gdp_pc_2013'], errors='coerce')
df['gdp_pc_2023'] = pd.to_numeric(df['gdp_pc_2023'], errors='coerce')
df['gov_eff_2013'] = pd.to_numeric(df['gov_eff_2013'], errors='coerce')
df['gov_eff_2023'] = pd.to_numeric(df['gov_eff_2023'], errors='coerce')

# Map countries to regions (based on country codes)
region_mapping = {
    # Africa
    'DZA': 'Africa', 'AGO': 'Africa', 'BEN': 'Africa', 'BWA': 'Africa', 'BFA': 'Africa',
    'BDI': 'Africa', 'CMR': 'Africa', 'CPV': 'Africa', 'CAF': 'Africa', 'TCD': 'Africa',
    'COM': 'Africa', 'COG': 'Africa', 'COD': 'Africa', 'CIV': 'Africa', 'DJI': 'Africa',
    'EGY': 'Africa', 'GNQ': 'Africa', 'ERI': 'Africa', 'ETH': 'Africa', 'GAB': 'Africa',
    'GMB': 'Africa', 'GHA': 'Africa', 'GIN': 'Africa', 'GNB': 'Africa', 'KEN': 'Africa',
    'LSO': 'Africa', 'LBR': 'Africa', 'LBY': 'Africa', 'MDG': 'Africa', 'MWI': 'Africa',
    'MLI': 'Africa', 'MRT': 'Africa', 'MUS': 'Africa', 'MAR': 'Africa', 'MOZ': 'Africa',
    'NAM': 'Africa', 'NER': 'Africa', 'NGA': 'Africa', 'RWA': 'Africa', 'STP': 'Africa',
    'SEN': 'Africa', 'SYC': 'Africa', 'SLE': 'Africa', 'SOM': 'Africa', 'ZAF': 'Africa',
    'SSD': 'Africa', 'SDN': 'Africa', 'SWZ': 'Africa', 'TZA': 'Africa', 'TGO': 'Africa',
    'TUN': 'Africa', 'UGA': 'Africa', 'ZMB': 'Africa', 'ZWE': 'Africa',
    
    # Americas
    'ARG': 'Americas', 'BHS': 'Americas', 'BRB': 'Americas', 'BLZ': 'Americas', 'BOL': 'Americas',
    'BRA': 'Americas', 'CAN': 'Americas', 'CHL': 'Americas', 'COL': 'Americas', 'CRI': 'Americas',
    'CUB': 'Americas', 'DMA': 'Americas', 'DOM': 'Americas', 'ECU': 'Americas', 'SLV': 'Americas',
    'GRD': 'Americas', 'GTM': 'Americas', 'GUY': 'Americas', 'HTI': 'Americas', 'HND': 'Americas',
    'JAM': 'Americas', 'MEX': 'Americas', 'NIC': 'Americas', 'PAN': 'Americas', 'PRY': 'Americas',
    'PER': 'Americas', 'KNA': 'Americas', 'LCA': 'Americas', 'VCT': 'Americas', 'SUR': 'Americas',
    'TTO': 'Americas', 'USA': 'Americas', 'URY': 'Americas', 'VEN': 'Americas',
    
    # Asia
    'AFG': 'Asia', 'ARM': 'Asia', 'AZE': 'Asia', 'BHR': 'Asia', 'BGD': 'Asia',
    'BTN': 'Asia', 'BRN': 'Asia', 'KHM': 'Asia', 'CHN': 'Asia', 'GEO': 'Asia',
    'IND': 'Asia', 'IDN': 'Asia', 'IRN': 'Asia', 'IRQ': 'Asia', 'ISR': 'Asia',
    'JPN': 'Asia', 'JOR': 'Asia', 'KAZ': 'Asia', 'KWT': 'Asia', 'KGZ': 'Asia',
    'LAO': 'Asia', 'LBN': 'Asia', 'MYS': 'Asia', 'MDV': 'Asia', 'MNG': 'Asia',
    'MMR': 'Asia', 'NPL': 'Asia', 'PRK': 'Asia', 'OMN': 'Asia', 'PAK': 'Asia',
    'PSE': 'Asia', 'PHL': 'Asia', 'QAT': 'Asia', 'SAU': 'Asia', 'SGP': 'Asia',
    'KOR': 'Asia', 'LKA': 'Asia', 'SYR': 'Asia', 'TJK': 'Asia', 'THA': 'Asia',
    'TLS': 'Asia', 'TUR': 'Asia', 'TKM': 'Asia', 'ARE': 'Asia', 'UZB': 'Asia',
    'VNM': 'Asia', 'YEM': 'Asia',
    
    # Europe
    'ALB': 'Europe', 'AND': 'Europe', 'AUT': 'Europe', 'BLR': 'Europe', 'BEL': 'Europe',
    'BIH': 'Europe', 'BGR': 'Europe', 'HRV': 'Europe', 'CYP': 'Europe', 'CZE': 'Europe',
    'DNK': 'Europe', 'EST': 'Europe', 'FIN': 'Europe', 'FRA': 'Europe', 'DEU': 'Europe',
    'GRC': 'Europe', 'HUN': 'Europe', 'ISL': 'Europe', 'IRL': 'Europe', 'ITA': 'Europe',
    'XKX': 'Europe', 'LVA': 'Europe', 'LIE': 'Europe', 'LTU': 'Europe', 'LUX': 'Europe',
    'MKD': 'Europe', 'MLT': 'Europe', 'MDA': 'Europe', 'MCO': 'Europe', 'MNE': 'Europe',
    'NLD': 'Europe', 'NOR': 'Europe', 'POL': 'Europe', 'PRT': 'Europe', 'ROU': 'Europe',
    'RUS': 'Europe', 'SMR': 'Europe', 'SRB': 'Europe', 'SVK': 'Europe', 'SVN': 'Europe',
    'ESP': 'Europe', 'SWE': 'Europe', 'CHE': 'Europe', 'UKR': 'Europe', 'GBR': 'Europe',
    'VAT': 'Europe',
    
    # Oceania
    'AUS': 'Oceania', 'FJI': 'Oceania', 'KIR': 'Oceania', 'MHL': 'Oceania', 'FSM': 'Oceania',
    'NRU': 'Oceania', 'NZL': 'Oceania', 'PLW': 'Oceania', 'PNG': 'Oceania', 'WSM': 'Oceania',
    'SLB': 'Oceania', 'TON': 'Oceania', 'TUV': 'Oceania', 'VUT': 'Oceania'
}

# Add region column
df['Region'] = df['Country Code'].map(region_mapping)

# Reshape data to long format for Altair
data_2013 = df[['Country Name', 'Country Code', 'Region', 'gdp_pc_2013', 'gov_eff_2013']].copy()
data_2013['Year'] = '2013'
data_2013.columns = ['Country Name', 'Country Code', 'Region', 'GDP per Capita', 'Gov Effectiveness', 'Year']

data_2023 = df[['Country Name', 'Country Code', 'Region', 'gdp_pc_2023', 'gov_eff_2023']].copy()
data_2023['Year'] = '2023'
data_2023.columns = ['Country Name', 'Country Code', 'Region', 'GDP per Capita', 'Gov Effectiveness', 'Year']

df_long = pd.concat([data_2013, data_2023], ignore_index=True)

# Remove rows with missing data
df_long = df_long.dropna(subset=['GDP per Capita', 'Gov Effectiveness', 'Region'])

# Filter out outliers (keep only GDP per capita <= 150,000 for the chart)
df_long_filtered = df_long[df_long['GDP per Capita'] <= 150000].copy()

# Create checkbox selections for years
year_2013_checkbox = alt.binding_checkbox(name='Show 2013 ')
year_2023_checkbox = alt.binding_checkbox(name='Show 2023 ')

year_2013_select = alt.param(
    name='show_2013',
    bind=year_2013_checkbox,
    value=True
)

year_2023_select = alt.param(
    name='show_2023',
    bind=year_2023_checkbox,
    value=False
)

# Create click selection for highlighting countries
click_selection = alt.selection_point(
    fields=['Country Name'],
    empty=True
)

# Base chart
base = alt.Chart(df_long_filtered).mark_circle(size=100).encode(
    x=alt.X('Gov Effectiveness:Q',
            axis=alt.Axis(title='Government Effectiveness Index'),
            scale=alt.Scale(domain=[-2.5, 2.5])),
    y=alt.Y('GDP per Capita:Q',
            axis=alt.Axis(title='GDP per Capita (USD)', format='$,.0f'),
            scale=alt.Scale(domain=[0, 150000])),
    color=alt.condition(
        click_selection,
        alt.Color('Year:N', 
                  scale=alt.Scale(domain=['2013', '2023'], 
                                range=['#3182bd', '#e6550d']),
                  legend=alt.Legend(title='Year')),
        alt.value('lightgray')
    ),
    opacity=alt.condition(
        click_selection,
        alt.value(0.8),
        alt.value(0.15)
    ),
    tooltip=[
        alt.Tooltip('Country Name:N', title='Country'),
        alt.Tooltip('GDP per Capita:Q', title='GDP per Capita (USD)', format='$,.0f'),
        alt.Tooltip('Gov Effectiveness:Q', title='Gov Effectiveness', format='.2f'),
        alt.Tooltip('Year:N', title='Year')
    ]
).properties(
    width=250,
    height=250
).add_params(
    year_2013_select,
    year_2023_select,
    click_selection
).transform_filter(
    '(datum.Year == "2013" && show_2013) || (datum.Year == "2023" && show_2023)'
)

# Create faceted chart by region
chart = base.facet(
    facet=alt.Facet('Region:N', title=None),
    columns=3
).properties(
    title={
        "text": "GDP per Capita vs Government Effectiveness by Region",
        "subtitle": "Check boxes to show 2013 (blue) and/or 2023 (orange) | Click any point to highlight that country"
    }
).resolve_scale(
    x='shared',
    y='shared'
)

# Show in notebook (optional)
chart

# Print outliers separately
print("\n--- Countries with GDP per Capita > $150,000 (not shown in chart) ---")
outliers = df_long[df_long['GDP per Capita'] > 150000].sort_values('GDP per Capita', ascending=False)
for _, row in outliers.iterrows():
    print(f"{row['Country Name']} ({row['Year']}): ${row['GDP per Capita']:,.0f}")

# ðŸ”¹ Save as HTML for your website
chart.save("/Users/arumehta/Desktop/DS 4200/gov_effectiveness_vs_gdp.html")



--- Countries with GDP per Capita > $150,000 (not shown in chart) ---
Monaco (2023): $256,581
Liechtenstein (2023): $207,974
Liechtenstein (2013): $172,824
