In [34]:
# Ensure the 'folium' library is installed before running this script.
import pandas as pd
import geopandas as gpd
import numpy as np
import folium
import ast
import difflib
import re

# Auto-detect geo name column
geo_name_candidates = ['NAME', 'ADMIN', 'Country', 'country', 'ADMIN_NAME', 'NAME_EN', 'SOVEREIGNT']
for candidate in geo_name_candidates:
    if candidate in geo_df.columns:
        geo_name_col = candidate
        break
else:
    geo_name_col = geo_df.columns[0]

# Auto-detect country name column in country_df
country_name_candidates = ['Country', 'country', 'country_name', 'name']
for c in country_name_candidates:
    if c in country_df.columns:
        country_name_col = c
        break
else:
    country_name_col = country_df.columns[0]

# Create canonical original-name column and a stable alias 'Country'
# Avoid evaluating defaults that access missing columns (pd.DataFrame.get evaluates default eagerly),
# so test column existence first to prevent KeyError.
if '_orig_country_for_debug' in country_df.columns:
    country_df['_orig_country_for_debug'] = country_df['_orig_country_for_debug'].astype(str)
else:
    if country_name_col in country_df.columns:
        country_df['_orig_country_for_debug'] = country_df[country_name_col].astype(str)
    else:
        # fallback to the first column if the expected name column is missing
        country_df['_orig_country_for_debug'] = country_df.iloc[:, 0].astype(str)

# Ensure there is a stable 'Country' column (create it from the canonical column if missing)
if 'Country' in country_df.columns:
    country_df['Country'] = country_df['Country'].astype(str)
else:
    country_df['Country'] = country_df['_orig_country_for_debug'].astype(str)

print(f"Using geo_df name column: {geo_name_col}")
print(f"Using country_df country column: {country_name_col} (aliased to 'Country')")

# Ensure geo_df is in WGS84
if getattr(geo_df, "crs", None) is not None and geo_df.crs.to_string() != "EPSG:4326":
    geo_df = geo_df.to_crs(epsg=4326)

# Normalizer
def normalize_name(s):
    s = str(s).lower().strip()
    s = re.sub(r"[\.\'\",&()]", " ", s)
    s = re.sub(r"[-/]", " ", s)
    s = re.sub(r"\s+", " ", s)
    s = s.replace("the ", "")
    return s.strip()

# Prepare normalized keys
country_df['Country_norm'] = country_df['_orig_country_for_debug'].map(normalize_name)
geo_df[geo_name_col] = geo_df[geo_name_col].astype(str)
geo_df['geo_norm'] = geo_df[geo_name_col].map(normalize_name)

# Attempt normalized exact merge
merged = country_df.merge(
    geo_df[[geo_name_col, 'geo_norm', 'geometry']],
    left_on='Country_norm',
    right_on='geo_norm',
    how='left'
)

# Report missing using the canonical '_orig_country_for_debug' to avoid KeyError
missing = merged[merged['geometry'].isna()]['_orig_country_for_debug'].unique().tolist()
print("Missing after normalized exact match:", missing)

# Suggestions for missing with difflib
suggestions = {}
unique_geo_norms = list(geo_df['geo_norm'].unique())
for name in missing:
    norm = normalize_name(name)
    close = difflib.get_close_matches(norm, unique_geo_norms, n=3, cutoff=0.55)
    suggestions[name] = [geo_df[geo_df['geo_norm'] == c][geo_name_col].iloc[0] for c in close] if close else []
print("Suggestions for missing names (review and edit mapping):")
for k, v in suggestions.items():
    print(f"  {k} -> {v}")

# Manual mapping (edit as needed)
manual_map = {
    "Bahamas": "The Bahamas",
    "Republic of Korea": "Korea, Rep.",
    "Timor Leste": "Timor-Leste",
    "Micronesia": "Federated States of Micronesia",
    "Brunei Darussalam": "Brunei",
    "Serbia": "Republic of Serbia",
    # add additional mappings as needed
}

# Add the following countries on the map, in green
new_countries = [
    "Central African Republic",
    "Democratic Republic of the Congo",
    "Cameroon",
    "Chad",
    "Colombia",
    "Haiti",
    "Mali",
    "Mozambique",
    "Myanmar",
    "Niger",
    "Nigeria",
    "Somalia",
    "South Sudan",
    "Sudan",
    "Syria",
    "Uganda",
    "Venezuela",
    "Yemen",
]

# Add the following countries to manual_map and ensure they're present in country_df
for country in new_countries:
    manual_map[country] = country
    if country not in country_df['Country'].values:
        print(f"Warning: {country} not found in country_df['Country']")
        # add a minimal row so merges can work; other columns will be NaN
        country_df = pd.concat([
            country_df,
            pd.DataFrame({'Country': [country], '_orig_country_for_debug': [country]})
        ], ignore_index=True)


# Apply manual mapping and re-merge
country_df['Country_for_merge'] = country_df['Country'].replace(manual_map)
country_df['Country_norm2'] = country_df['Country_for_merge'].map(normalize_name)

# Merge only the necessary columns to avoid column name conflicts
merged_df = country_df[['Country', 'Country_norm2', '_orig_country_for_debug']].merge(
    geo_df[['geo_norm', 'geometry']],
    left_on='Country_norm2',
    right_on='geo_norm',
    how='left'
)


# Report remaining missing using '_orig_country_for_debug'
missing2 = merged_df[merged_df['geometry'].isna()]['_orig_country_for_debug'].unique().tolist()
print("Remaining missing after manual mapping:", missing2)
if missing2:
    print("Please adjust manual_map entries above using the suggestions printed earlier.")

# Convert to GeoDataFrame if geometry present
if 'geometry' in merged_df.columns:
    merged_gdf = gpd.GeoDataFrame(merged_df, geometry='geometry', crs=geo_df.crs if getattr(geo_df, "crs", None) is not None else "EPSG:4326")
else:
    merged_gdf = gpd.GeoDataFrame(merged_df)

# Save for inspection
try:
    merged_gdf.to_file("merged_countries_debug.geojson", driver="GeoJSON")
    print("Exported merged_countries_debug.geojson")
except Exception as e:
    print("Could not write GeoJSON (this may be fine for now):", e)

# 9) Create a folium map centered roughly at the world centroid of available geometries
valid_geoms = merged_gdf['geometry'].dropna()
if not valid_geoms.empty:
    union_geom = gpd.GeoSeries(valid_geoms).unary_union
    centroid = union_geom.centroid
    m = folium.Map(location=[centroid.y, centroid.x], zoom_start=2)
else:
    m = folium.Map(location=[0, 0], zoom_start=2)

# m is ready for adding layers using merged_gdf
print("Map object 'm' created. Use merged_gdf for plotting.")

# Add each country's geometry to the map
for _, row in merged_gdf.iterrows():
    country = row['Country']
    geom = row.get('geometry', None)
    # Use green for new_countries, otherwise use the 'Color' column (fallback to red)
    color = 'green' if country in new_countries else row.get('Color', '#ff0000')
    if geom is not None:
        try:
            folium.GeoJson(
                data=geom.__geo_interface__,
                name=country,
                style_function=lambda feature, col=color: {
                    'fillColor': col,
                    'color': col,
                    'weight': 1,
                    'fillOpacity': 0.5,
                },
                tooltip=country
            ).add_to(m)
        except Exception as e:
            print(f"Could not add geometry for {country}: {e}")

# Save the map
m.save('world_map.html')
print("Map saved as world_map.html")


Using geo_df name column: Country
Using country_df country column: Country (aliased to 'Country')
Missing after normalized exact match: ['Antigua and Barbuda', 'Asia Pacific network', 'Brunei Darussalam', 'Kiribati', 'Maldives', 'Micronesia', 'Republic of Korea', 'Samoa', 'Singapore', 'Timor Leste', 'Tonga', 'Tuvalu', 'DK', 'Serbia', 'Global', 'IFRC', 'IFRC network', 'Lead to Change']
Suggestions for missing names (review and edit mapping):
  Antigua and Barbuda -> []
  Asia Pacific network -> []
  Brunei Darussalam -> []
  Kiribati -> []
  Maldives -> ['Mali', 'Malawi', 'Bangladesh']
  Micronesia -> ['Indonesia', 'Macedonia', 'Romania']
  Republic of Korea -> ['Republic of Serbia', 'Republic of the Congo', 'United Republic of Tanzania']
  Samoa -> ['Cambodia']
  Singapore -> ['Suriname']
  Timor Leste -> []
  Tonga -> ['Togo', 'Estonia', 'Mongolia']
  Tuvalu -> ['Portugal']
  DK -> []
  Serbia -> ['Syria', 'Nigeria', 'Liberia']
  Global -> ['Colombia']
  IFRC -> ['France']
  IFRC netw

  union_geom = gpd.GeoSeries(valid_geoms).unary_union


Map saved as world_map.html
