In [1]:
import pandas as pd
import numpy as np
import pycountry

# ── 1) Load your NOAA earthquake CSV into a DataFrame ─────────────────────────────
df = pd.read_csv("earthquakes.csv")
df.columns = [c.strip() for c in df.columns]

# ── 2) Fill NaNs with zero for our outcome fields ────────────────────────────────
required = ["Total Deaths", "Total Injuries", "Total Damage ($Mil)", "Total Houses Destroyed", "Mag"]
for col in required:
    if col not in df.columns:
        raise KeyError(f"Column '{col}' not found in DataFrame.")
    df[col] = df[col].fillna(0)

# ── 3) Extract ISO3 country codes from "Location Name" ───────────────────────────
def extract_iso3(location):
    try:
        country_name = location.split(",")[-1].strip()
        return pycountry.countries.lookup(country_name).alpha_3
    except Exception:
        return np.nan

df["CountryISO3"] = df["Location Name"].apply(extract_iso3)
df = df.dropna(subset=["CountryISO3"])  # drop rows we couldn't map

# ── 4) Filter out low-magnitude events (only true stress tests) ──────────────────
df = df[df["Mag"] >= 4.5].copy()

# ── 5) Min–max normalize each series to [0,1] ───────────────────────────────────
def normalize(s: pd.Series) -> pd.Series:
    mn, mx = s.min(), s.max()
    return (s - mn) / (mx - mn) if mx > mn else pd.Series(0, index=s.index)

df["Deaths_n"]           = normalize(df["Total Deaths"])
df["Injuries_n"]         = normalize(df["Total Injuries"])
df["Damage_n"]           = normalize(df["Total Damage ($Mil)"])
df["HousesDestroyed_n"]  = normalize(df["Total Houses Destroyed"])
df["Mag_n"]              = normalize(df["Mag"])

# ── 6) Compute raw vulnerability & event-level InfraScore ────────────────────────
df["V_raw"]      = (df["Deaths_n"] + df["Injuries_n"] + df["Damage_n"] + df["HousesDestroyed_n"]) / 4
eps             = 1e-6
df["V_event"]    = df["V_raw"] / (df["Mag_n"] + eps)
df["InfraScore_event"] = 1.0 / (1.0 + df["V_event"])

# ── 7) Aggregate to country level with magnitude-weighted averaging ─────────────
df_country = (
    df
    .groupby("CountryISO3")
    .apply(lambda grp: np.average(grp["InfraScore_event"], weights=grp["Mag_n"]))
    .reset_index(name="InfraScore_country")
)

# ── 8) Inspect top/bottom countries ──────────────────────────────────────────────
print("Top 10 countries by InfraScore:")
print(df_country.sort_values("InfraScore_country", ascending=False).head(10))

print("\nBottom 10 countries by InfraScore:")
print(df_country.sort_values("InfraScore_country", ascending=True).head(10))


ModuleNotFoundError: No module named 'pycountry'