import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.graph_objects as go
import re

# ---------------------------------------------------------
# HIGHLIGHT COUNTRIES MANUALLY
# ---------------------------------------------------------
USER_COUNTRY = "Philippines"  # red
USER_COLOR = "red"

HIGHLIGHT_COUNTRY = "Japan"  # orange
HIGHLIGHT_COLOR = "orange"

# ---------------------------------------------------------
# FETCH NATIONAL DEBT DATA
# ---------------------------------------------------------
url = "https://worldpopulationreview.com/country-rankings/countries-by-national-debt"
html = requests.get(url).text
soup = BeautifulSoup(html, "html.parser")

table = soup.find("table")
rows = table.find_all("tr")[1:]

DEBT_MULT = {"T": 1e12, "B": 1e9, "M": 1e6}

records = []

for row in rows:
    cols = row.find_all("td")
    if len(cols) < 5:
        continue

    country = cols[1].text.strip()
    if country.upper() == "TOTAL":
        continue

    debt_text = cols[2].text.strip()
    gdp_pct_text = cols[3].text.strip()
    per_capita_text = cols[4].text.strip()

    # Debt USD
    m = re.match(r"\$(\d+(\.\d+)?)([TBM])", debt_text)
    if not m:
        continue
    debt_val = float(m.group(1))
    debt_unit = m.group(3)
    debt_usd = int(debt_val * DEBT_MULT[debt_unit])

    # Debt % GDP
    debt_pct_gdp = float(gdp_pct_text.replace("%", "")) if "%" in gdp_pct_text else None

    # Debt per Capita
    per_capita = None
    pc_text = per_capita_text.replace("$", "").replace(",", "").strip()
    pc_match = re.match(r"(\d+(\.\d+)?)\s*(Mn|Bn)?", pc_text)
    if pc_match:
        pc_val = float(pc_match.group(1))
        pc_unit = pc_match.group(3)
        if pc_unit == "Mn":
            per_capita = int(pc_val * 1_000_000)
        elif pc_unit == "Bn":
            per_capita = int(pc_val * 1_000_000_000)
        else:
            per_capita = int(pc_val)

    records.append({
        "Country": country,
        "Debt Label": debt_text,
        "Debt USD": debt_usd,
        "Debt % GDP": debt_pct_gdp,
        "Debt Per Capita": per_capita
    })

df = pd.DataFrame(records)

# ---------------------------------------------------------
# HELPER TO CREATE RANK LABELS
# ---------------------------------------------------------
def create_rank_labels(df, value_col, value_format="raw"):
    df_sorted = df.sort_values(value_col, ascending=False).reset_index(drop=True)
    df_sorted["Rank"] = df_sorted.index + 1

    def format_value(row):
        if value_format == "debt":
            return f"{row['Debt Label']}"
        elif value_format == "pct":
            return f"{row[value_col]}%"
        elif value_format == "percap":
            val = row[value_col]
            if val >= 1e9:
                return f"${val/1e9:.2f} B"
            elif val >= 1e6:
                return f"${val/1e6:.2f} M"
            else:
                return f"${val}"
        else:
            return str(row[value_col])

    df_sorted["Plot Label"] = df_sorted.apply(
        lambda x: f"rank {x['Rank']}, {format_value(x)}", axis=1
    )
    return df_sorted.sort_values(value_col)

# ---------------------------------------------------------
# HELPERS TO HIGHLIGHT COUNTRIES
# ---------------------------------------------------------
def build_bar_colors(df, country_col="Country", user_country=None, user_color=None,
                     extra_country=None, extra_color=None):
    colors = []
    for c in df[country_col]:
        if user_country and c == user_country:
            colors.append(user_color)
        elif extra_country and c == extra_country:
            colors.append(extra_color)
        else:
            colors.append("#1f77b4")
    return colors

def build_text_colors(df, country_col="Country", user_country=None, user_color=None,
                     extra_country=None, extra_color=None):
    colors = []
    for c in df[country_col]:
        if user_country and c == user_country:
            colors.append(user_color)
        elif extra_country and c == extra_country:
            colors.append(extra_color)
        else:
            colors.append("black")
    return colors

# ---------------------------------------------------------
# CHARTS (omitted repeated code for brevity; same as before)
# ---------------------------------------------------------
# ... (charts code remains identical to previous example)

# ---------------------------------------------------------
# SUMMARY DATAFRAME WITH UNITS, ABSOLUTE DIFFERENCE AND RATIO
# ---------------------------------------------------------
main_country = USER_COUNTRY
compare_country = HIGHLIGHT_COUNTRY

main_row = df[df["Country"] == main_country]
compare_row = df[df["Country"] == compare_country]

summary_data = {
    "Metric": ["National Debt (USD)", "Debt % GDP", "Debt Per Capita"],
    main_country: [
        int(main_row["Debt USD"].iloc[0]) if not main_row.empty else None,
        float(main_row["Debt % GDP"].iloc[0]) if not main_row.empty else None,
        int(main_row["Debt Per Capita"].iloc[0]) if not main_row.empty else None
    ],
    compare_country: [
        int(compare_row["Debt USD"].iloc[0]) if not compare_row.empty else None,
        float(compare_row["Debt % GDP"].iloc[0]) if not compare_row.empty else None,
        int(compare_row["Debt Per Capita"].iloc[0]) if not compare_row.empty else None
    ],
}

# Difference (absolute) and ratio
diff_abs = []
ratio = []
for i in range(3):
    val1 = summary_data[main_country][i]
    val2 = summary_data[compare_country][i]
    if val1 is not None and val2 is not None:
        diff_abs.append(abs(val1 - val2))
        big = max(val1, val2)
        small = min(val1, val2)
        ratio.append(round(big / small, 2))
    else:
        diff_abs.append(None)
        ratio.append(None)

summary_df = pd.DataFrame(summary_data)
summary_df["Difference"] = diff_abs
summary_df["Ratio (big/small)"] = ratio

# Helper to convert to readable units
def format_value_units(val, metric):
    if val is None:
        return None
    if metric == "Debt % GDP":
        return f"{val}%"
    elif metric == "National Debt (USD)" or metric == "Debt Per Capita":
        if val >= 1e12:
            return f"${val/1e12:.2f} T"
        elif val >= 1e9:
            return f"${val/1e9:.2f} B"
        elif val >= 1e6:
            return f"${val/1e6:.2f} M"
        else:
            return f"${val:,}"
    return val

for col in [main_country, compare_country, "Difference"]:
    summary_df[col] = [format_value_units(val, metric) for val, metric in zip(summary_df[col], summary_df["Metric"])]

# Fit DataFrame width
pd.set_option("display.max_colwidth", None)
pd.set_option("display.expand_frame_repr", False)

print("\n--- Country Debt Comparison ---")
print(summary_df)

# ---------------------------------------------------------
# NARRATIVE STATEMENTS
# ---------------------------------------------------------
def narrative_statement(df, main_country, compare_country):
    statements = []

    # Individual country profiles
    for country in [main_country, compare_country]:
        debt = df.loc[df['Metric']=="National Debt (USD)", country].values[0]
        pct_gdp = df.loc[df['Metric']=="Debt % GDP", country].values[0]
        per_capita = df.loc[df['Metric']=="Debt Per Capita", country].values[0]
        statements.append(f"{country} has a national debt of {debt}, which is {pct_gdp} of its GDP, and a per capita debt of {per_capita}.")

    # Comparative statement
    diff_debt = df.loc[df['Metric']=="National Debt (USD)", "Difference"].values[0]
    if "$" in diff_debt:
        statements.append(f"In comparison, {main_country} has {'less' if '$' in df.loc[df['Metric']=="National Debt (USD)", main_country].values[0] else 'more'} debt than {compare_country} by {diff_debt}.")
    else:
        statements.append(f"{main_country} differs from {compare_country} by {diff_debt} in national debt.")

    return "\n".join(statements)

print("\n--- Narrative ---")
print(narrative_statement(summary_df, main_country, compare_country))
