<a href="https://colab.research.google.com/github/jadeacevedo/Global-Pulse-Inequality-Emissions-Atlas/blob/main/finalviz_jade.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##imports

In [None]:
import pandas as pd
import altair as alt
import kagglehub

## kaggle datset for lattitude

In [None]:
# Download latest version
path = kagglehub.dataset_download("hserdaraltan/countries-by-continent")

print("Path to dataset files:", path)
continent_df= pd.read_csv(
    "/kaggle/input/countries-by-continent/Countries by continents.csv"
)
continent_df.head()

Using Colab cache for faster access to the 'countries-by-continent' dataset.
Path to dataset files: /kaggle/input/countries-by-continent


Unnamed: 0,Continent,Country
0,Africa,Algeria
1,Africa,Angola
2,Africa,Benin
3,Africa,Botswana
4,Africa,Burkina


In [None]:
# Download latest version
path = kagglehub.dataset_download("nelgiriyewithana/countries-of-the-world-2023")

print("Path to dataset files:", path)


alt.data_transformers.disable_max_rows()

kaggle_df = pd.read_csv(
    "/kaggle/input/countries-of-the-world-2023/world-data-2023.csv"
)

kaggle_df.head()


Using Colab cache for faster access to the 'countries-of-the-world-2023' dataset.
Path to dataset files: /kaggle/input/countries-of-the-world-2023


Unnamed: 0,Country,Density\n(P/Km2),Abbreviation,Agricultural Land( %),Land Area(Km2),Armed Forces size,Birth Rate,Calling Code,Capital/Major City,Co2-Emissions,...,Out of pocket health expenditure,Physicians per thousand,Population,Population: Labor force participation (%),Tax revenue (%),Total tax rate,Unemployment rate,Urban_population,Latitude,Longitude
0,Afghanistan,60,AF,58.10%,652230,323000.0,32.49,93.0,Kabul,8672,...,78.40%,0.28,38041754,48.90%,9.30%,71.40%,11.12%,9797273,33.93911,67.709953
1,Albania,105,AL,43.10%,28748,9000.0,11.78,355.0,Tirana,4536,...,56.90%,1.2,2854191,55.70%,18.60%,36.60%,12.33%,1747593,41.153332,20.168331
2,Algeria,18,DZ,17.40%,2381741,317000.0,24.28,213.0,Algiers,150006,...,28.10%,1.72,43053054,41.20%,37.20%,66.10%,11.70%,31510100,28.033886,1.659626
3,Andorra,164,AD,40.00%,468,,7.2,376.0,Andorra la Vella,469,...,36.40%,3.33,77142,,,,,67873,42.506285,1.521801
4,Angola,26,AO,47.50%,1246700,117000.0,40.73,244.0,Luanda,34693,...,33.40%,0.21,31825295,77.50%,9.20%,49.10%,6.89%,21061025,-11.202692,17.873887


#visualization curration

In [None]:
import pandas as pd
import altair as alt

alt.data_transformers.disable_max_rows()

# ======================================
# 1 loading daa
# ======================================
gini_df = pd.read_csv("/content/GINI.csv")
co2_df  = pd.read_csv("/content/owid-co2-data.csv")
coords  = pd.read_csv("/kaggle/input/countries-by-continent/Countries by continents.csv")
world_df = pd.read_csv("/kaggle/input/countries-of-the-world-2023/world-data-2023.csv")

# EXPECTED COLUMNS (testing ):
# gini_df:  'Country Name', 'Country Code', 'Indicator Name', 'Indicator Code', '1960'..'2024'
# co2_df:   'country', 'year', 'co2', ...
# coords:   'Country', 'Continent', ...
# world_df: 'Country', 'Latitude', 'Longitude', ...

# ======================================
# 2. Normalize country names
# ======================================
def norm(x):
    return str(x).strip().lower()

gini_df = gini_df.copy()
co2_df  = co2_df.copy()
coords  = coords.copy()
world_df = world_df.copy()

gini_df["country_norm"]  = gini_df["Country Name"].apply(norm)
co2_df["country_norm"]   = co2_df["country"].apply(norm)
coords["country_norm"]   = coords["Country"].apply(norm)
world_df["country_norm"] = world_df["Country"].apply(norm)

# ======================================
# 3. Get latest GINI per country (from year columns)
# ======================================
year_cols = [c for c in gini_df.columns if c.isdigit()]

gini_long = gini_df.melt(
    id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code", "country_norm"],
    value_vars=year_cols,
    var_name="year",
    value_name="GINI"
)


gini_latest = (
    gini_long.sort_values("year")
             .dropna(subset=["GINI"])
             .groupby("country_norm")
             .tail(1)[["country_norm", "Country Name", "GINI", "year"]]
)

# ======================================
# 4. Get latest CO2 per country
# ======================================
co2_latest = (
    co2_df.sort_values("year")
          .dropna(subset=["co2"])
          .groupby("country_norm")
          .tail(1)[["country_norm", "country", "year", "co2"]]
          .rename(columns={"co2": "CO2"})
)

# ======================================
# 5. Prep continent + coordinates
# ======================================
coords_slim = coords[["country_norm", "Continent"]]

world_coords = world_df[["country_norm", "Country", "Latitude", "Longitude"]].rename(
    columns={"Country": "Country_latlon"}
)

# ======================================
# 6. Merge into ONE master dataframe
# ======================================
df = (
    gini_latest.rename(columns={"Country Name": "Country"})
      .merge(co2_latest[["country_norm", "CO2"]], on="country_norm", how="left")
      .merge(world_coords, on="country_norm", how="left")
      .merge(coords_slim, on="country_norm", how="left")
)

# Fix coords so they match the projection
df["Latitude"]  = pd.to_numeric(df["Latitude"], errors="coerce")
df["Longitude"] = pd.to_numeric(df["Longitude"], errors="coerce")
# If longitudes are in [0, 360), wrap them to [-180, 180]
df.loc[df["Longitude"] > 180, "Longitude"] = df.loc[df["Longitude"] > 180, "Longitude"] - 360

# ======================================
# 6b. Clean final country names + harmonize for topojson
# ======================================
df["Country_final"] = df["Country"].astype(str).str.strip()

# Base: start with Country_final
df["country_map"] = df["Country_final"]

# Apply specific overrides to match Natural Earth topojson
df.loc[df["Country_final"] == "United States", "country_map"] = "United States of America"
df.loc[df["Country_final"] == "Democratic Republic of Congo", "country_map"] = "Democratic Republic of the Congo"
df.loc[df["Country_final"] == "Congo", "country_map"] = "Republic of the Congo"
df.loc[df["Country_final"] == "Czechia", "country_map"] = "Czech Republic"
df.loc[df["Country_final"] == "Eswatini", "country_map"] = "Swaziland"
df.loc[df["Country_final"] == "North Macedonia", "country_map"] = "Macedonia"
df.loc[df["Country_final"] == "South Korea", "country_map"] = "Korea, South"
df.loc[df["Country_final"] == "North Korea", "country_map"] = "Korea, North"
df.loc[df["Country_final"] == "Cape Verde", "country_map"] = "Cabo Verde"
df.loc[df["Country_final"] == "Ivory Coast", "country_map"] = "CÃ´te d'Ivoire"
df.loc[df["Country_final"] == "Micronesia (country)", "country_map"] = "Micronesia"

# Metric for labels: CO2 per inequality
df["CO2_per_GINI"] = df["CO2"] / df["GINI"]

# Explicit domain for GINI color scale
gini_min = float(df["GINI"].min())
gini_max = float(df["GINI"].max())

# ======================================
# 7. Choropleth: GINI (yellowâ†’red) + graticule
# ======================================
world_topo = alt.topo_feature(
    "https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json",
    "countries"
)

# Background grid
graticule = (
    alt.Chart(alt.graticule())
      .mark_geoshape(
          fill=None,
          stroke="lightgray",
          strokeWidth=0.4
      )
)

# Main choropleth
choro = (
    alt.Chart(world_topo)
      .mark_geoshape(stroke="white", strokeWidth=0.4)
      .transform_lookup(
          lookup="properties.name",
          from_=alt.LookupData(
              df,
              key="country_map",
              fields=["Country_final", "Continent", "GINI", "CO2", "CO2_per_GINI"]
          )
      )
      .encode(
          color=alt.condition(
              "datum.GINI != null",
              alt.Color(
                  "GINI:Q",
                  title="Income inequality (GINI)",
                  scale=alt.Scale(
                      scheme="yelloworangered",
                      domain=[gini_min, gini_max],
                      nice=False
                  ),
                  legend=alt.Legend(
                      orient="bottom",
                      title="Income inequality (GINI)",
                      tickCount=5
                  )
              ),
              alt.value("#4d4d4d")
          ),
          tooltip=[
              alt.Tooltip("Country_final:N",  title="Country"),
              alt.Tooltip("Continent:N"),
              alt.Tooltip("GINI:Q",          title="GINI",      format=".2f"),
              alt.Tooltip("CO2:Q",           title="COâ‚‚",       format=".2f"),
              alt.Tooltip("CO2_per_GINI:Q",  title="COâ‚‚ / GINI",format=".2f")
          ]
      )
)

# ======================================
# 8. Bubbles + labels for top 12 CO2_per_GINI
# ======================================
bubble_df = df.dropna(subset=["Latitude", "Longitude", "CO2_per_GINI"]).copy()
bubble_df = bubble_df.sort_values("CO2_per_GINI", ascending=False).head(12)

bubbles = (
    alt.Chart(bubble_df)
      .mark_circle(opacity=0.85)
      .encode(
          longitude="Longitude:Q",
          latitude="Latitude:Q",
          size=alt.Size(
              "CO2_per_GINI:Q",
              title="COâ‚‚ per GINI index",
              scale=alt.Scale(range=[300, 2000]),
              legend=alt.Legend(
                  title="COâ‚‚ per GINI index",
                  orient="right"
              )
          ),
          color=alt.value("#1a9850"),
          tooltip=[
              alt.Tooltip("Country_final:N", title="Country"),
              alt.Tooltip("Continent:N"),
              alt.Tooltip("GINI:Q",         title="GINI",        format=".2f"),
              alt.Tooltip("CO2:Q",          title="COâ‚‚",         format=".2f"),
              alt.Tooltip("CO2_per_GINI:Q", title="COâ‚‚ per GINI",format=".2f")
          ]
      )
)
labels = (
    alt.Chart(bubble_df)
      .mark_text(
          align="center",
          baseline="middle",
          fontSize=9,
          fontWeight="bold",
          opacity=1
      )
      .encode(
          longitude="Longitude:Q",
          latitude="Latitude:Q",
          text=alt.Text("CO2_per_GINI:Q", format=".1f")
      )
)


# ======================================
# 9. Continent labels
# ======================================
continent_centers = pd.DataFrame({
    "Continent": ["North America", "South America", "Europe", "Africa", "Asia"],
    "Latitude":  [ 50,             -20,             55,       0,        30,     ],
    "Longitude": [-100,            -60,             15,       20,       90,    ]
})

continent_labels = (
    alt.Chart(continent_centers)
      .mark_text(
          fontSize=12,
          fontWeight="lighter",
          opacity=0.35
      )
      .encode(
          longitude="Longitude:Q",
          latitude="Latitude:Q",
          text="Continent:N"
      )
)

# ======================================
# 10. Final full-world map
# ======================================
final_map = (
    graticule + choro + bubbles + labels + continent_labels
).project(
    type="equalEarth"
).properties(
    width=1000,
    height=520,
    title="Top 10 Global Inequality vs COâ‚‚ Emissions (Full-World View)"
).configure_view(
    stroke=None
).configure_legend(
    labelFontSize=11,
    titleFontSize=12
)

final_map


In [None]:
import altair as alt

# [Assuming 'df' is loaded]

# === METHOD 4: GEO-BUBBLE "FULL WORLD" MAP ===

# 1. The Background (The "World" Context)
# We draw the land in light gray so it doesn't compete with the data
world_topo = alt.topo_feature(
    "https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json",
    "countries"
)

background = alt.Chart(world_topo).mark_geoshape(
    fill='#f0f0f0',      # Very light gray fill
    stroke='white',      # White borders
    strokeWidth=0.5
).project(
    type='equalEarth'    # "Full World" projection that respects area sizes
)

# 2. The Data Layer (Bubbles)
# We plot circles at the country's Latitude/Longitude
bubbles = alt.Chart(df).mark_circle(
    opacity=0.8,
    stroke='white',
    strokeWidth=0.5
).encode(
    longitude='Longitude:Q',
    latitude='Latitude:Q',

    # SIZE = CO2 Emissions
    size=alt.Size(
        'CO2:Q',
        scale=alt.Scale(range=[10, 1000]),
        title='CO2 Emissions',
        legend=alt.Legend(orient='bottom-right') # Move legend out of the way
    ),

    # COLOR = Inequality (GINI)
    color=alt.Color(
        'GINI:Q',
        scale=alt.Scale(scheme='magma'), # 'Magma' is great for dark-to-light intensity
        title='Inequality (GINI)',
        legend=alt.Legend(orient='bottom-left')
    ),

    tooltip=[
        alt.Tooltip('Country_final', title='Country'),
        alt.Tooltip('GINI', format='.1f'),
        alt.Tooltip('CO2', format='.1f'),
        alt.Tooltip('Continent')
    ]
)

# 3. Add Interaction: Zoom & Pan
# This makes the "Full World View" explorable
zoom = alt.selection_interval(bind='scales')

# 4. Combine
final_map = (background + bubbles).add_params(
    zoom
).properties(
    width=800,
    height=500,
    title="Global Inequality vs CO2 (Geo-Bubble View)"
).configure_view(
    stroke=None
).configure_title(
    fontSize=16,
    anchor='start'
)

final_map

In [None]:
import pandas as pd
import altair as alt
import numpy as np

alt.data_transformers.disable_max_rows()

# ======================================
# 1. Load Data
# ======================================
try:
    gini_df = pd.read_csv("/content/GINI.csv")
    co2_df  = pd.read_csv("/content/owid-co2-data.csv")
    world_df = pd.read_csv("/kaggle/input/countries-of-the-world-2023/world-data-2023.csv")
    coords  = pd.read_csv("/kaggle/input/countries-by-continent/Countries by continents.csv")
except FileNotFoundError:
    print("ERROR: Please update file paths in Section 1 before running.")

# Helper for normalization
def norm(x): return str(x).strip().lower()

# ======================================
# 2. Process GINI (Long Format & Time Series Prep)
# ======================================
gini_df["country_norm"] = gini_df["Country Name"].apply(norm)
year_cols = [c for c in gini_df.columns if c.isdigit()]

# Melt to long format: Country | Year | GINI
gini_long = gini_df.melt(
    id_vars=["country_norm", "Country Name"],
    value_vars=year_cols,
    var_name="year",
    value_name="GINI"
)
gini_long['year'] = gini_long['year'].astype(int)
gini_long = gini_long.dropna(subset=['GINI'])

# ======================================
# 3. Process CO2 (Long Format)
# ======================================
co2_df["country_norm"] = co2_df["country"].apply(norm)
co2_long = co2_df[['country_norm', 'year', 'co2']].rename(columns={'co2': 'CO2'})
co2_long = co2_long.dropna(subset=['CO2'])

# ======================================
# 4. The Crucial Step: Merging & Filling Time Gaps
# ======================================
# Define the timeframe we want to animate (e.g., 1990 to 2020 holds good overlap)
min_year, max_year = 1990, 2020
years_range = pd.DataFrame({'year': range(min_year, max_year + 1)})

# Get unique countries
unique_countries = pd.DataFrame({'country_norm': gini_long['country_norm'].unique()})

# Create a skeleton grid: Every country for every year in the range
# This ensures the animation runs smoothly even if data is missing for a year
skeleton = unique_countries.merge(years_range, how='cross')

# Merge GINI onto skeleton and forward-fill missing years
df_time = skeleton.merge(gini_long[['country_norm', 'year', 'GINI', 'Country Name']],
                         on=['country_norm', 'year'], how='left')
df_time = df_time.sort_values(['country_norm', 'year'])
# Forward fill: assumes GINI stays same until a new measurement appears
df_time['GINI'] = df_time.groupby('country_norm')['GINI'].ffill()

# Merge CO2 onto the time series Result
df_time = df_time.merge(co2_long, on=['country_norm', 'year'], how='left')

# Filter out rows that still have N/A after filling (e.g., years before first GINI measurement)
df_time = df_time.dropna(subset=['GINI', 'CO2', 'Country Name'])

# ======================================
# 5. Add Coordinates and Continents
# ======================================
world_df["country_norm"] = world_df["Country"].apply(norm)
world_coords = world_df[["country_norm", "Latitude", "Longitude"]].copy()
world_coords["Latitude"] = pd.to_numeric(world_coords["Latitude"], errors="coerce")
world_coords["Longitude"] = pd.to_numeric(world_coords["Longitude"], errors="coerce")
# Wrap longitudes
world_coords.loc[world_coords["Longitude"] > 180, "Longitude"] = world_coords.loc[world_coords["Longitude"] > 180, "Longitude"] - 360

coords["country_norm"]   = coords["Country"].apply(norm)
coords_slim = coords[["country_norm", "Continent"]]

# Final merge
df_final = df_time.merge(world_coords, on="country_norm", how="inner")
df_final = df_final.merge(coords_slim, on="country_norm", how="left") # Merge continent data



# # ======================================
# # 6. VISUALIZATION: Animated Map with Continent Labels
# # ======================================

# # A. Create the Slider Widget mechanism
# slider = alt.binding_range(min=min_year, max=max_year, step=1, name='Select Year: ')
# select_year = alt.selection_point(fields=['year'], bind=slider, value=2000)

# # B. The Background Map (Static)
# world_topo = alt.topo_feature("https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json", "countries")
# background = alt.Chart(world_topo).mark_geoshape(
#     fill='#f0f0f0', stroke='white', strokeWidth=0.5
# ).project(type='equalEarth')

# # C. Continent Labels (Static Layer)
# # We define manual positions for clarity so they don't overlap with countries
# continent_data = pd.DataFrame({
#     'Continent': ['North America', 'South America', 'Europe', 'Africa', 'Asia', 'Oceania'],
#     'Latitude':  [50,              -15,             54,       5,        40,     -25],
#     'Longitude': [-100,            -60,             15,       20,       90,     140]
# })

# continent_labels = alt.Chart(continent_data).mark_text(
#     align='center',
#     baseline='middle',
#     fontSize=14,
#     fontWeight='bold',
#     opacity=0.3,   # Low opacity so they look like a watermark
#     color='gray'
# ).encode(
#     longitude='Longitude:Q',
#     latitude='Latitude:Q',
#     text='Continent:N'
# )

# # D. The Bubbles (Dynamic - Filtered by Slider)
# bubbles = alt.Chart(df_final).mark_circle(
#     stroke='white',
#     strokeWidth=0.5,
#     opacity=0.8
# ).encode(
#     longitude='Longitude:Q',
#     latitude='Latitude:Q',
#     # SIZE = CO2
#     size=alt.Size('CO2:Q', scale=alt.Scale(domain=[0, df_final['CO2'].max()], range=[10, 1500]), legend=None),
#     # COLOR = GINI
#     color=alt.Color('GINI:Q', scale=alt.Scale(scheme='magma', domain=[25, 65]), title='Inequality (GINI)')
#     tooltip=[
#         alt.Tooltip('Country Name', title='Country'),
#         alt.Tooltip('year', title='Year'),
#         alt.Tooltip('GINI', format='.1f'),
#         alt.Tooltip('CO2', format=',.0f', title='CO2 Emissions (Mt)')
#     ]
# ).add_params(
#     select_year
# ).transform_filter(
#     select_year
# )

# # E. Combine (Background + Labels + Bubbles)
# final_animated_map = (background + continent_labels + bubbles).properties(
#     width=800,
#     height=500,
#     title="Evolution of Inequality vs CO2 Emissions (1990-2020)"
# ).configure_view(
#     stroke=None
# ).configure_title(
#     fontSize=16
# )

# final_animated_map

In [None]:
# ... [Keep Sections 1-5 exactly the same] ...

# ======================================
# 6. VISUALIZATION: Enhanced Animated Map
# ======================================

# A. Create the Slider Widget mechanism
slider = alt.binding_range(min=min_year, max=max_year, step=1, name='Select Year: ')
select_year = alt.selection_point(fields=['year'], bind=slider, value=2000)

# Define projection once to reuse across layers
proj_type = 'equalEarth'

# B.1 The Ocean Base (New Layer)
# Draws a sphere behind everything to represent water
sphere = alt.Chart(alt.sphere()).mark_geoshape(
    fill='#e3f2fd' # Very pale blue
).project(type=proj_type)

# B.2 Graticules/Gridlines (New Layer)
# Adds faint lat/lon lines for geographic context
graticule = alt.Chart(alt.graticule()).mark_geoshape(
    stroke='#ffffff', # White lines blend subtly with the pale ocean
    strokeWidth=0.3
).project(type=proj_type)

# B.3 The Land Background (Static)
world_topo = alt.topo_feature("https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json", "countries")
background = alt.Chart(world_topo).mark_geoshape(
    fill='#e0e0e0', # Slightly darker gray land for contrast against ocean
    stroke='white',
    strokeWidth=0.5
).project(type=proj_type)

# C. Continent Labels (Static Layer)
continent_data = pd.DataFrame({
    'Continent': ['North America', 'South America', 'Europe', 'Africa', 'Asia', 'Oceania'],
    'Latitude':  [50,              -15,             54,       5,        40,     -25],
    'Longitude': [-100,            -60,             15,       20,       90,     140]
})

continent_labels = alt.Chart(continent_data).mark_text(
    align='center',
    baseline='middle',
    fontSize=14,
    fontWeight='bold',
    opacity=0.4,
    color='#616161' # Darker gray text
).encode(
    longitude='Longitude:Q',
    latitude='Latitude:Q',
    text='Continent:N'
)

# D. The Bubbles (Dynamic - Filtered by Slider)
bubbles = alt.Chart(df_final).mark_circle(
    stroke='white',
    strokeWidth=0.75, # Slightly thicker stroke for better definition
    opacity=0.85
).encode(
    longitude='Longitude:Q',
    latitude='Latitude:Q',
    # SIZE = CO2 (Added Legend back for visual context)
    size=alt.Size(
        'CO2:Q',
        scale=alt.Scale(domain=[0, df_final['CO2'].max()], range=[10, 1500]),
        legend=alt.Legend(title='CO2 Emissions (Mt)', orient='bottom-right', labelColor='gray', titleColor='gray')
    ),
    # COLOR = GINI
    color=alt.Color(
        'GINI:Q',
        scale=alt.Scale(scheme='magma', domain=[25, 65]),
        title='Inequality (GINI)'
    ),
    tooltip=[
        alt.Tooltip('Country Name', title='Country'),
        alt.Tooltip('year', title='Year'),
        alt.Tooltip('GINI', format='.1f'),
        alt.Tooltip('CO2', format=',.0f', title='CO2 Emissions (Mt)')
    ]
).add_params(
    select_year
).transform_filter(
    select_year
)

# E. Combine Layers (Order matters!)
# Sphere bottom -> Graticule -> Land -> Labels -> Bubbles top
final_animated_map = alt.layer(
    sphere,
    graticule,
    background,
    continent_labels,
    bubbles
).properties(
    width=800,
    height=500,
    title="Evolution of Inequality vs CO2 Emissions (1990-2020)"
).configure_view(
    stroke=None # Removes the outer box border
).configure_title(
    fontSize=16,
    anchor='start'
).configure_legend(
    gradientThickness=15,
    labelFontSize=11,
    titleFontSize=12
)

final_animated_map

In [None]:
# ... [Keep Sections 1-5 exactly the same] ...

# ======================================
# 6. VISUALIZATION: Enhanced Animated Map
# ======================================

# A. Create the Slider Widget mechanism
slider = alt.binding_range(min=min_year, max=max_year, step=1, name='Select Year: ')
select_year = alt.selection_point(fields=['year'], bind=slider, value=2000)

# Define projection once to reuse across layers
proj_type = 'equalEarth'

# B.1 The Ocean Base (New Layer)
# Draws a sphere behind everything to represent water
sphere = alt.Chart(alt.sphere()).mark_geoshape(
    fill='#e3f2fd' # Very pale blue
).project(type=proj_type)

# B.2 Graticules/Gridlines (New Layer)
# Adds faint lat/lon lines for geographic context
graticule = alt.Chart(alt.graticule()).mark_geoshape(
    stroke='#ffffff', # White lines blend subtly with the pale ocean
    strokeWidth=0.3
).project(type=proj_type)

# B.3 The Land Background (Static)
world_topo = alt.topo_feature("https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json", "countries")
background = alt.Chart(world_topo).mark_geoshape(
    fill='#e0e0e0', # Slightly darker gray land for contrast against ocean
    stroke='white',
    strokeWidth=0.5
).project(type=proj_type)

# C. Continent Labels (Static Layer)
continent_data = pd.DataFrame({
    'Continent': ['North America', 'South America', 'Europe', 'Africa', 'Asia', 'Oceania'],
    'Latitude':  [50,              -15,             54,       5,        40,     -25],
    'Longitude': [-100,            -60,             15,       20,       90,     140]
})

continent_labels = alt.Chart(continent_data).mark_text(
    align='center',
    baseline='middle',
    fontSize=14,
    fontWeight='bold',
    opacity=0.4,
    color='#616161' # Darker gray text
).encode(
    longitude='Longitude:Q',
    latitude='Latitude:Q',
    text='Continent:N'
)

# D. The Bubbles (Dynamic - Filtered by Slider)
bubbles = alt.Chart(df_final).mark_circle(
    stroke='white',
    strokeWidth=0.75, # Slightly thicker stroke for better definition
    opacity=0.85
).encode(
    longitude='Longitude:Q',
    latitude='Latitude:Q',
    # SIZE = CO2 (Added Legend back for visual context)
    size=alt.Size(
        'CO2:Q',
        scale=alt.Scale(domain=[0, df_final['CO2'].max()], range=[10, 1500]),
        legend=alt.Legend(title='CO2 Emissions (Mt)', orient='bottom-right', labelColor='gray', titleColor='gray')
    ),
    # COLOR = GINI
    color=alt.Color(
        'GINI:Q',
        scale=alt.Scale(scheme='magma', domain=[25, 65]),
        title='Inequality (GINI)'
    ),
    tooltip=[
        alt.Tooltip('Country Name', title='Country'),
        alt.Tooltip('year', title='Year'),
        alt.Tooltip('GINI', format='.1f'),
        alt.Tooltip('CO2', format=',.0f', title='CO2 Emissions (Mt)')
    ]
).add_params(
    select_year
).transform_filter(
    select_year
)

# E. Combine Layers (Order matters!)
# Sphere bottom -> Graticule -> Land -> Labels -> Bubbles top
final_animated_map = alt.layer(
    sphere,
    graticule,
    background,
    continent_labels,
    bubbles
).properties(
    width=800,
    height=500,
    title="Evolution of Inequality vs CO2 Emissions (1990-2020)"
).configure_view(
    stroke=None # Removes the outer box border
).configure_title(
    fontSize=16,
    anchor='start'
).configure_legend(
    gradientThickness=15,
    labelFontSize=11,
    titleFontSize=12
)

final_animated_map



---
update 2


# final viz

## process data:
 This dataset contains the merged GINI (inequality) and CO2 emissions data, joined with geographic coordinates for the major countries, ready for visualization.


---


Dataset Columns:

country_norm: Normalized country name key.

year: Year of observation (1990-2020).

GINI: The GINI inequality index (Forward-filled to handle gaps).

Country Name: Standardized country label.

CO2: Annual CO2 emissions in Million Tonnes.

Latitude / Longitude: Geographic center for mapping.


---



In [None]:
import pandas as pd
import altair as alt

# 1. Configuration
# ----------------
# Use a clean, high-contrast theme (Pale Blue Ocean, Light Gray Land)
ocean_color = '#e6f2ff'
land_color = '#f0f0f0'
bubble_scheme = 'magma'  # Dark purple to bright yellow for visibility

# Disable max rows for large datasets
alt.data_transformers.disable_max_rows()

# 2. Load Data (Robust Load)
# --------------------------
try:
    # Try loading from local directory
    gini_df = pd.read_csv("GINI.csv")
    co2_df  = pd.read_csv("owid-co2-data.csv")
    print("Files loaded successfully.")
except FileNotFoundError:
    print("ERROR: Could not find 'GINI.csv' or 'owid-co2-data.csv'.")
    print("Please ensure these files are in the same folder as this script.")
    # Create empty frames to prevent crash if files missing (for debug)
    gini_df = pd.DataFrame()
    co2_df = pd.DataFrame()

# 3. Process Data
# ---------------
if not gini_df.empty and not co2_df.empty:
    # Helper to normalize names
    def norm(x): return str(x).strip().lower()

    # A. Process GINI
    gini_df["country_norm"] = gini_df["Country Name"].apply(norm)
    year_cols = [c for c in gini_df.columns if c.isdigit()]
    gini_long = gini_df.melt(
        id_vars=["country_norm", "Country Name"],
        value_vars=year_cols,
        var_name="year",
        value_name="GINI"
    )
    gini_long['year'] = pd.to_numeric(gini_long['year'], errors='coerce')
    gini_long = gini_long.dropna(subset=['GINI', 'year'])
    gini_long['year'] = gini_long['year'].astype(int)

    # B. Process CO2
    co2_df["country_norm"] = co2_df["country"].apply(norm)
    co2_long = co2_df[['country_norm', 'year', 'co2']].rename(columns={'co2': 'CO2'})
    co2_long = co2_long.dropna(subset=['CO2'])

    # C. Merge and Time-Fill
    min_year, max_year = 1990, 2020
    years_range = pd.DataFrame({'year': range(min_year, max_year + 1)})

    # Get all unique countries
    unique_countries = pd.DataFrame({'country_norm': pd.concat([gini_long['country_norm'], co2_long['country_norm']]).unique()})

    # Create skeleton (Country x Year)
    skeleton = unique_countries.merge(years_range, how='cross')

    # Merge GINI (Left Join + Forward Fill)
    df_time = skeleton.merge(gini_long[['country_norm', 'year', 'GINI', 'Country Name']],
                             on=['country_norm', 'year'], how='left')
    df_time = df_time.sort_values(['country_norm', 'year'])
    df_time['GINI'] = df_time.groupby('country_norm')['GINI'].ffill()

    # Fill missing Country Names
    df_time['Country Name'] = df_time.groupby('country_norm')['Country Name'].ffill().bfill()

    # Merge CO2
    df_time = df_time.merge(co2_long, on=['country_norm', 'year'], how='left')

    # Keep only rows with valid data for both
    df_final = df_time.dropna(subset=['GINI', 'CO2'])

    # 4. Add Coordinates (Fallback Dictionary)
    # ----------------------------------------
    # Since the world-data file is missing, we use this internal list for major countries
    coords_fallback = {
        'united states': [37.09, -95.71], 'china': [35.86, 104.19], 'india': [20.59, 78.96],
        'brazil': [-14.23, -51.92], 'russia': [61.52, 105.31], 'germany': [51.16, 10.45],
        'united kingdom': [55.37, -3.43], 'france': [46.22, 2.21], 'japan': [36.20, 138.25],
        'canada': [56.13, -106.34], 'australia': [-25.27, 133.77], 'south africa': [-30.55, 22.93],
        'nigeria': [9.08, 8.67], 'egypt': [26.82, 30.80], 'mexico': [23.63, -102.55],
        'indonesia': [-0.78, 113.92], 'turkey': [38.96, 35.24], 'saudi arabia': [23.88, 45.07],
        'argentina': [-38.41, -63.61], 'colombia': [4.57, -74.29], 'spain': [40.46, -3.74],
        'italy': [41.87, 12.56], 'iran': [32.42, 53.68], 'pakistan': [30.37, 69.34],
        'thailand': [15.87, 100.99], 'vietnam': [14.05, 108.27], 'philippines': [12.87, 121.77],
        'bangladesh': [23.68, 90.35], 'ukraine': [48.37, 31.16], 'poland': [51.91, 19.14],
        'sweden': [60.12, 18.64], 'norway': [60.47, 8.46], 'finland': [61.92, 25.74],
        'chile': [-35.67, -71.54], 'peru': [-9.19, -75.01], 'kenya': [-0.02, 37.90],
        'ethiopia': [9.14, 40.48], 'angola': [-11.20, 17.87]
    }
    coords_df = pd.DataFrame.from_dict(coords_fallback, orient='index', columns=['Latitude', 'Longitude'])
    coords_df['country_norm'] = coords_df.index

    # Final Merge
    df_final = df_final.merge(coords_df, on="country_norm", how="inner")

    # SAVE THE DATA (This gives you the file you wanted)
    df_final.to_csv('processed_data.csv', index=False)
    print(f"Success! Processed data saved to 'processed_data.csv' with {len(df_final)} rows.")

    # 5. Visualization (Clean Theme & Altair 5 Syntax)
    # ------------------------------------------------
    slider = alt.binding_range(min=min_year, max=max_year, step=1, name='Select Year: ')

    # Altair 5: Use selection_point
    select_year = alt.selection_point(
        fields=['year'],
        bind=slider,
        value=[{'year': 2000}]
    )

    # Layers
    sphere = alt.Chart(alt.sphere()).mark_geoshape(fill=ocean_color).project(type='equalEarth')

    graticule = alt.Chart(alt.graticule()).mark_geoshape(
        stroke='#ffffff', strokeWidth=0.5, opacity=0.5
    ).project(type='equalEarth')

    world_topo = alt.topo_feature("https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json", "countries")
    background = alt.Chart(world_topo).mark_geoshape(
        fill=land_color, stroke='#d0d0d0', strokeWidth=0.5
    ).project(type='equalEarth')

    bubbles = alt.Chart(df_final).mark_circle(
        stroke='white', strokeWidth=0.5, opacity=0.85
    ).encode(
        longitude='Longitude:Q',
        latitude='Latitude:Q',
        size=alt.Size('CO2:Q',
                      scale=alt.Scale(domain=[0, df_final['CO2'].max()], range=[20, 1500]),
                      legend=alt.Legend(title='CO2 Emissions (Mt)', orient='right')),
        color=alt.Color('GINI:Q',
                        scale=alt.Scale(scheme=bubble_scheme, domain=[25, 65]),
                        title='Inequality (GINI)',
                        legend=alt.Legend(orient='right')),
        tooltip=['Country Name', 'year', 'GINI', 'CO2']
    ).add_params(  # Altair 5 syntax
        select_year
    ).transform_filter(
        select_year
    )

    final_chart = alt.layer(
        sphere, graticule, background, bubbles
    ).properties(
        width=800, height=500,
        title="Global Inequality vs CO2 Emissions"
    ).configure_view(
        stroke=None
    ).configure(
        background='white'
    )

    # Display and Save Chart
    final_chart.save('final_chart.json')
    final_chart.display()

else:
    print("Skipping chart generation because data could not be loaded.")

Files loaded successfully.
Success! Processed data saved to 'processed_data.csv' with 924 rows.


In [None]:
import pandas as pd
import altair as alt

# 1. Load Data
# ----------------
try:
    co2_df = pd.read_csv("owid-co2-data.csv")
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: 'owid-co2-data.csv' not found. Please ensure it is in the same directory.")
    co2_df = pd.DataFrame()

# 2. Process Data for Choropleth
# ------------------------------
if not co2_df.empty:
    # Filter for the year 2020 (or the latest available year) to get a snapshot
    target_year = 2020
    df_map = co2_df[co2_df['year'] == target_year].copy()

    # Select relevant columns: Country and Population
    df_map = df_map[['country', 'population', 'iso_code']]

    # Drop rows with missing population
    df_map = df_map.dropna(subset=['population'])

    # --- CRITICAL STEP: STANDARDISING NAMES ---
    # The map topology (world-atlas) uses specific official names.
    # We must rename common countries in our data to match the map so they get colored.
    name_corrections = {
        'United States': 'United States of America',
        'Democratic Republic of Congo': 'Dem. Rep. Congo',
        'Congo': 'Congo',
        'Tanzania': 'United Republic of Tanzania',
        'Dominican Republic': 'Dominican Rep.',
        'Russia': 'Russian Federation',
        'South Korea': 'Korea',
        'North Korea': 'Dem. People\'s Rep. of Korea',
        'Vietnam': 'Viet Nam',
        'Laos': 'Lao PDR',
        'Iran': 'Iran (Islamic Republic of)',
        'Syria': 'Syrian Arab Republic',
        'Moldova': 'Republic of Moldova',
        'Bolivia': 'Bolivia (Plurinational State of)',
        'Venezuela': 'Venezuela (Bolivarian Republic of)',
        'Taiwan': 'Taiwan, Province of China'
    }
    df_map['country_map_name'] = df_map['country'].replace(name_corrections)

    # 3. Create Choropleth Map
    # ------------------------
    # Source of map shapes (TopoJSON)
    world_topo = alt.topo_feature("https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json", "countries")

    # The Map Chart
    choropleth = alt.Chart(world_topo).mark_geoshape(
        stroke='white',
        strokeWidth=0.5
    ).transform_lookup(
        # Match 'properties.name' in the map file to 'country_map_name' in our data
        lookup='properties.name',
        from_=alt.LookupData(df_map, key='country_map_name', fields=['population', 'country'])
    ).encode(
        # Color by Population using the Green-Brown scheme
        color=alt.Color(
            'population:Q',
            scale=alt.Scale(scheme='brownbluegreen', type='log'), # Log scale handles the huge population differences (China/India vs others)
            title='Population (Log Scale)'
        ),
        tooltip=[
            alt.Tooltip('properties.name:N', title='Region'),
            alt.Tooltip('population:Q', format=',.0f', title='Population')
        ]
    ).project(
        type='equalEarth'
    ).properties(
        width=800,
        height=500,
        title=f"Global Population Distribution ({target_year})"
    ).configure_view(
        stroke=None
    ).configure_legend(
        orient='bottom',
        gradientLength=400,
        titleFontSize=12
    )

    # Save and Display
    choropleth.save('population_choropleth.json')
    choropleth.display()

else:
    print("Could not process data: Dataframe is empty.")

Data loaded successfully.


#inequality vs co2 viz

In [None]:
import pandas as pd
import altair as alt

# 1. Configuration
# ----------------
ocean_color = '#e6f2ff'
land_color = '#f0f0f0'
# Two distinct color schemes to avoid confusion
bubble_scheme = 'magma'         # For GINI (Purple to Yellow)
map_scheme = 'brownbluegreen'   # For Population (Brown to Green)

alt.data_transformers.disable_max_rows()

# 2. Load Data
# --------------------------
try:
    gini_df = pd.read_csv("GINI.csv")
    co2_df = pd.read_csv("owid-co2-data.csv")
    print("Files loaded successfully.")
except FileNotFoundError:
    print("ERROR: Could not find 'GINI.csv' or 'owid-co2-data.csv'.")
    # Empty frames to allow script to compile if files are missing
    gini_df = pd.DataFrame()
    co2_df = pd.DataFrame()

# 3. Process Data
# ---------------
if not gini_df.empty and not co2_df.empty:

    # --- Helper: Name Normalization ---
    def norm(x): return str(x).strip().lower()

    # --- A. Process GINI (Time Series) ---
    gini_df["country_norm"] = gini_df["Country Name"].apply(norm)
    year_cols = [c for c in gini_df.columns if c.isdigit()]
    gini_long = gini_df.melt(
        id_vars=["country_norm", "Country Name"],
        value_vars=year_cols,
        var_name="year",
        value_name="GINI"
    )
    gini_long['year'] = pd.to_numeric(gini_long['year'], errors='coerce')
    gini_long = gini_long.dropna(subset=['GINI', 'year'])
    gini_long['year'] = gini_long['year'].astype(int)

    # --- B. Process CO2 & Population (Time Series) ---
    co2_df["country_norm"] = co2_df["country"].apply(norm)

    # Extract columns needed for Bubbles (CO2) and Map (Population)
    # We rename 'co2' to 'CO2' for the chart labels
    co2_clean = co2_df[['country_norm', 'country', 'year', 'co2', 'population']].rename(columns={'co2': 'CO2'})
    co2_clean = co2_clean.dropna(subset=['CO2'])

    # --- C. Prepare Map Data (Static Year 2020 for Context) ---
    # We use a static year for the background map to ensure performance and stability
    target_year = 2020
    df_map = co2_df[co2_df['year'] == target_year].copy()
    df_map = df_map[['country', 'population']].dropna(subset=['population'])

    # Name corrections for the TopoJSON map matching
    name_corrections = {
        'United States': 'United States of America', 'Democratic Republic of Congo': 'Dem. Rep. Congo',
        'Congo': 'Congo', 'Tanzania': 'United Republic of Tanzania', 'Dominican Republic': 'Dominican Rep.',
        'Russia': 'Russian Federation', 'South Korea': 'Korea', 'North Korea': 'Dem. People\'s Rep. of Korea',
        'Vietnam': 'Viet Nam', 'Laos': 'Lao PDR', 'Iran': 'Iran (Islamic Republic of)',
        'Syria': 'Syrian Arab Republic', 'Moldova': 'Republic of Moldova', 'Bolivia': 'Bolivia (Plurinational State of)',
        'Venezuela': 'Venezuela (Bolivarian Republic of)', 'Taiwan': 'Taiwan, Province of China'
    }
    df_map['country_map_name'] = df_map['country'].replace(name_corrections)

    # --- D. Prepare Bubble Data (Merged Time Series) ---
    min_year, max_year = 1990, 2020
    years_range = pd.DataFrame({'year': range(min_year, max_year + 1)})

    # Unique countries list
    unique_countries = pd.DataFrame({'country_norm': pd.concat([gini_long['country_norm'], co2_clean['country_norm']]).unique()})

    # Skeleton (Country x Year)
    skeleton = unique_countries.merge(years_range, how='cross')

    # Merge GINI (Forward Fill)
    df_time = skeleton.merge(gini_long[['country_norm', 'year', 'GINI', 'Country Name']],
                             on=['country_norm', 'year'], how='left')
    df_time = df_time.sort_values(['country_norm', 'year'])
    df_time['GINI'] = df_time.groupby('country_norm')['GINI'].ffill()

    # Fill Country Names
    df_time['Country Name'] = df_time.groupby('country_norm')['Country Name'].ffill().bfill()

    # Merge CO2
    df_time = df_time.merge(co2_clean[['country_norm', 'year', 'CO2']], on=['country_norm', 'year'], how='left')

    # Keep valid rows
    df_bubbles = df_time.dropna(subset=['GINI', 'CO2'])

    # --- E. Add Coordinates to Bubbles ---
    coords_fallback = {
        'united states': [37.09, -95.71], 'china': [35.86, 104.19], 'india': [20.59, 78.96],
        'brazil': [-14.23, -51.92], 'russia': [61.52, 105.31], 'germany': [51.16, 10.45],
        'united kingdom': [55.37, -3.43], 'france': [46.22, 2.21], 'japan': [36.20, 138.25],
        'canada': [56.13, -106.34], 'australia': [-25.27, 133.77], 'south africa': [-30.55, 22.93],
        'nigeria': [9.08, 8.67], 'egypt': [26.82, 30.80], 'mexico': [23.63, -102.55],
        'indonesia': [-0.78, 113.92], 'turkey': [38.96, 35.24], 'saudi arabia': [23.88, 45.07],
        'argentina': [-38.41, -63.61], 'colombia': [4.57, -74.29], 'spain': [40.46, -3.74],
        'italy': [41.87, 12.56], 'iran': [32.42, 53.68], 'pakistan': [30.37, 69.34],
        'thailand': [15.87, 100.99], 'vietnam': [14.05, 108.27], 'philippines': [12.87, 121.77],
        'bangladesh': [23.68, 90.35], 'ukraine': [48.37, 31.16], 'poland': [51.91, 19.14],
        'sweden': [60.12, 18.64], 'norway': [60.47, 8.46], 'finland': [61.92, 25.74],
        'chile': [-35.67, -71.54], 'peru': [-9.19, -75.01], 'kenya': [-0.02, 37.90],
        'ethiopia': [9.14, 40.48], 'angola': [-11.20, 17.87]
    }
    coords_df = pd.DataFrame.from_dict(coords_fallback, orient='index', columns=['Latitude', 'Longitude'])
    coords_df['country_norm'] = coords_df.index

    df_bubbles = df_bubbles.merge(coords_df, on="country_norm", how="inner")

    # 4. Visualization
    # ----------------

    # UI Elements
    slider = alt.binding_range(min=min_year, max=max_year, step=1, name='Select Year: ')
    select_year = alt.selection_point(fields=['year'], bind=slider, value=[{'year': 2000}])

    # Geo Sources
    world_topo = alt.topo_feature("https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json", "countries")

    # Layer 1: Base Sphere (Ocean)
    sphere = alt.Chart(alt.sphere()).mark_geoshape(fill=ocean_color).project(type='equalEarth')

    # Layer 2: Graticule
    graticule = alt.Chart(alt.graticule()).mark_geoshape(
        stroke='#ffffff', strokeWidth=0.5, opacity=0.5
    ).project(type='equalEarth')

    # Layer 3: Land Background (Gray for missing data)
    land_base = alt.Chart(world_topo).mark_geoshape(
        fill=land_color, stroke='#d0d0d0', strokeWidth=0.5
    ).project(type='equalEarth')

    # Layer 4: Choropleth (Population 2020)
    # Note: We use the static df_map here.
    choropleth = alt.Chart(world_topo).mark_geoshape(
        stroke='white', strokeWidth=0.2
    ).transform_lookup(
        lookup='properties.name',
        from_=alt.LookupData(df_map, key='country_map_name', fields=['population', 'country'])
    ).encode(
        color=alt.Color('population:Q',
                        scale=alt.Scale(scheme=map_scheme, type='log'),
                        title='Population (2020)',
                        legend=alt.Legend(orient='bottom', gradientLength=300)),
        tooltip=[alt.Tooltip('properties.name:N', title='Region'),
                 alt.Tooltip('population:Q', format=',.0f', title='Population')]
    ).project(type='equalEarth')

    # Layer 5: Bubbles (Dynamic GINI vs CO2)
    bubbles = alt.Chart(df_bubbles).mark_circle(
        stroke='white', strokeWidth=0.5, opacity=0.85
    ).encode(
        longitude='Longitude:Q',
        latitude='Latitude:Q',
        size=alt.Size('CO2:Q',
                      scale=alt.Scale(domain=[0, df_bubbles['CO2'].max()], range=[20, 1500]),
                      legend=alt.Legend(title='CO2 Emissions (Mt)', orient='right')),
        color=alt.Color('GINI:Q',
                        scale=alt.Scale(scheme=bubble_scheme, domain=[25, 65]),
                        title='Inequality (GINI)',
                        legend=alt.Legend(orient='right')),
        tooltip=['Country Name', 'year', 'GINI', 'CO2']
    ).add_params(
        select_year
    ).transform_filter(
        select_year
    )

    # Combine All
    # We resolve scale color to 'independent' so Population and GINI get separate legends
    final_chart = alt.layer(
        sphere, graticule, land_base, choropleth, bubbles
    ).resolve_scale(
        color='independent'
    ).properties(
        width=900, height=550,
        title="Inequality (GINI) vs CO2 Emissions over Population Context"
    ).configure_view(
        stroke=None
    ).configure(
        background='white'
    )

    # Save and Display
    final_chart.save('combined_visualization.json')
    final_chart.display()
    print("Chart generated and saved to 'combined_visualization.json'.")

else:
    print("Skipping chart generation due to missing data.")

Files loaded successfully.


Chart generated and saved to 'combined_visualization.json'.


In [None]:
import pandas as pd
import altair as alt

# 1. Configuration
# ----------------
ocean_color = '#e6f2ff'
land_color = '#f0f0f0'
bubble_scheme = 'magma'         # For GINI (Purple to Yellow)
map_scheme = 'brownbluegreen'   # For Population (Brown to Green)

alt.data_transformers.disable_max_rows()

# 2. Load Data
# ----------------
try:
    gini_df = pd.read_csv("GINI.csv")
    co2_df = pd.read_csv("owid-co2-data.csv")
    print("Files loaded successfully.")
except FileNotFoundError:
    print("Error: Files not found. Please ensure 'GINI.csv' and 'owid-co2-data.csv' are present.")
    gini_df = pd.DataFrame()
    co2_df = pd.DataFrame()

# 3. Process Data
# ----------------
if not gini_df.empty and not co2_df.empty:

    # --- Helper ---
    def norm(x): return str(x).strip().lower()

    # --- A. Process GINI (Time Series) ---
    gini_df["country_norm"] = gini_df["Country Name"].apply(norm)
    year_cols = [c for c in gini_df.columns if c.isdigit()]
    gini_long = gini_df.melt(
        id_vars=["country_norm", "Country Name"],
        value_vars=year_cols,
        var_name="year",
        value_name="GINI"
    )
    gini_long['year'] = pd.to_numeric(gini_long['year'], errors='coerce')
    gini_long = gini_long.dropna(subset=['GINI', 'year'])
    gini_long['year'] = gini_long['year'].astype(int)

    # --- B. Process CO2 & Population (Time Series) ---
    co2_df["country_norm"] = co2_df["country"].apply(norm)

    # Map Name Corrections (for TopoJSON matching)
    name_corrections = {
        'United States': 'United States of America', 'Democratic Republic of Congo': 'Dem. Rep. Congo',
        'Congo': 'Congo', 'Tanzania': 'United Republic of Tanzania', 'Dominican Republic': 'Dominican Rep.',
        'Russia': 'Russian Federation', 'South Korea': 'Korea', 'North Korea': 'Dem. People\'s Rep. of Korea',
        'Vietnam': 'Viet Nam', 'Laos': 'Lao PDR', 'Iran': 'Iran (Islamic Republic of)',
        'Syria': 'Syrian Arab Republic', 'Moldova': 'Republic of Moldova', 'Bolivia': 'Bolivia (Plurinational State of)',
        'Venezuela': 'Venezuela (Bolivarian Republic of)', 'Taiwan': 'Taiwan, Province of China'
    }
    co2_df['country_map_name'] = co2_df['country'].replace(name_corrections)

    # --- C. Create Unified Time Structure ---
    min_year, max_year = 1990, 2020
    years_range = pd.DataFrame({'year': range(min_year, max_year + 1)})

    # Get all unique countries from both datasets
    unique_countries = pd.DataFrame({'country_norm': pd.concat([gini_long['country_norm'], co2_df['country_norm']]).unique()})

    # Create Skeleton (Country x Year)
    skeleton = unique_countries.merge(years_range, how='cross')

    # Merge GINI (Forward Fill to handle missing years)
    df_merged = skeleton.merge(gini_long[['country_norm', 'year', 'GINI', 'Country Name']],
                               on=['country_norm', 'year'], how='left')
    df_merged = df_merged.sort_values(['country_norm', 'year'])
    df_merged['GINI'] = df_merged.groupby('country_norm')['GINI'].ffill()

    # Fill Country Names
    df_merged['Country Name'] = df_merged.groupby('country_norm')['Country Name'].ffill().bfill()

    # Merge CO2 and Population
    df_merged = df_merged.merge(co2_df[['country_norm', 'year', 'co2', 'population', 'country_map_name']],
                                on=['country_norm', 'year'], how='left')

    # --- D. Split into Specialized Datasets ---

    # Dataset 1: Map Data (Needs Population, Year, Map Name)
    # We drop rows where population is missing to avoid blank/error polygons,
    # but the background layer will handle true missing data.
    df_map_time = df_merged[['country_map_name', 'year', 'population']].dropna(subset=['population', 'country_map_name'])

    # Dataset 2: Bubble Data (Needs CO2, GINI, Lat/Lon)
    df_bubbles = df_merged.dropna(subset=['co2', 'GINI']).rename(columns={'co2': 'CO2'})

    # Add Coordinates to Bubbles
    coords_fallback = {
        'united states': [37.09, -95.71], 'china': [35.86, 104.19], 'india': [20.59, 78.96],
        'brazil': [-14.23, -51.92], 'russia': [61.52, 105.31], 'germany': [51.16, 10.45],
        'united kingdom': [55.37, -3.43], 'france': [46.22, 2.21], 'japan': [36.20, 138.25],
        'canada': [56.13, -106.34], 'australia': [-25.27, 133.77], 'south africa': [-30.55, 22.93],
        'nigeria': [9.08, 8.67], 'egypt': [26.82, 30.80], 'mexico': [23.63, -102.55],
        'indonesia': [-0.78, 113.92], 'turkey': [38.96, 35.24], 'saudi arabia': [23.88, 45.07],
        'argentina': [-38.41, -63.61], 'colombia': [4.57, -74.29], 'spain': [40.46, -3.74],
        'italy': [41.87, 12.56], 'iran': [32.42, 53.68], 'pakistan': [30.37, 69.34],
        'thailand': [15.87, 100.99], 'vietnam': [14.05, 108.27], 'philippines': [12.87, 121.77],
        'bangladesh': [23.68, 90.35], 'ukraine': [48.37, 31.16], 'poland': [51.91, 19.14],
        'sweden': [60.12, 18.64], 'norway': [60.47, 8.46], 'finland': [61.92, 25.74],
        'chile': [-35.67, -71.54], 'peru': [-9.19, -75.01], 'kenya': [-0.02, 37.90],
        'ethiopia': [9.14, 40.48], 'angola': [-11.20, 17.87]
    }
    coords_df = pd.DataFrame.from_dict(coords_fallback, orient='index', columns=['Latitude', 'Longitude'])
    coords_df['country_norm'] = coords_df.index
    df_bubbles = df_bubbles.merge(coords_df, on="country_norm", how="inner")

    # 4. Visualization
    # ----------------
    slider = alt.binding_range(min=min_year, max=max_year, step=1, name='Select Year: ')

    # Create the selection
    select_year = alt.selection_point(fields=['year'], bind=slider, value=[{'year': 2000}])
    # select_year = alt.selection_single(fields=['year'], bind=slider, init={'year': 2000})

    # Geo Sources
    world_topo = alt.topo_feature("https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json", "countries")

    # Layer 1: Ocean
    sphere = alt.Chart(alt.sphere()).mark_geoshape(fill=ocean_color).project(type='equalEarth')

    # Layer 2: Graticule
    graticule = alt.Chart(alt.graticule()).mark_geoshape(
        stroke='#ffffff', strokeWidth=0.5, opacity=0.5
    ).project(type='equalEarth')

    # Layer 3: Static Land Base (Gray) - Shows when data is missing
    land_base = alt.Chart(world_topo).mark_geoshape(
        fill=land_color, stroke='#d0d0d0', strokeWidth=0.5
    ).project(type='equalEarth')

    # Layer 4: Dynamic Choropleth (Population)
    # We use transform_lookup to pull the shapes INTO the filtered data
    map_layer = alt.Chart(df_map_time).mark_geoshape(
        stroke='white', strokeWidth=0.2
    ).transform_lookup(
        lookup='country_map_name',
        from_=alt.LookupData(world_topo, key='properties.name', fields=['type', 'geometry'])
    ).transform_filter(
        select_year  # Filter data first
    ).encode(
        color=alt.Color('population:Q',
                        scale=alt.Scale(scheme=map_scheme, type='log'),
                        title='Population',
                        legend=alt.Legend(orient='bottom', gradientLength=300)),
        tooltip=['country_map_name', 'population', 'year']
    ).project(type='equalEarth')

    # Layer 5: Dynamic Bubbles (CO2 & GINI)
    bubble_layer = alt.Chart(df_bubbles).mark_circle(
        stroke='white', strokeWidth=0.5, opacity=0.85
    ).transform_filter(
        select_year
    ).encode(
        longitude='Longitude:Q',
        latitude='Latitude:Q',
        size=alt.Size('CO2:Q',
                      scale=alt.Scale(domain=[0, df_bubbles['CO2'].max()], range=[20, 1500]),
                      legend=alt.Legend(title='CO2 Emissions (Mt)', orient='right')),
        color=alt.Color('GINI:Q',
                        scale=alt.Scale(scheme=bubble_scheme, domain=[25, 65]),
                        title='Inequality (GINI)',
                        legend=alt.Legend(orient='right')),
        tooltip=['Country Name', 'year', 'GINI', 'CO2']
    )

    # Combine All
    final_chart = alt.layer(
        sphere, graticule, land_base, map_layer, bubble_layer
    ).resolve_scale(
        color='independent' # Keep Population and GINI legends separate
    ).add_params(
        select_year
    ).properties(
        width=900, height=550,
        title="Global Inequality vs CO2 (Bubbles) & Population (Map)"
    ).configure_view(
        stroke=None
    ).configure(
        background='white'
    )

    # Save and Display
    final_chart.save('combined_dynamic.json')
    final_chart.display()
    print("Success! Dynamic map and bubble chart generated.")

else:
    print("Skipping due to missing data.")

Files loaded successfully.


Success! Dynamic map and bubble chart generated.
