import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.graph_objects as go
import re

# ---------------------------------------------------------
# HIGHLIGHT COUNTRIES MANUALLY
# ---------------------------------------------------------
USER_COUNTRY = "Philippines"  # red
USER_COLOR = "red"

HIGHLIGHT_COUNTRY = "Japan"  # orange
HIGHLIGHT_COLOR = "orange"

# ---------------------------------------------------------
# FETCH NATIONAL DEBT DATA
# ---------------------------------------------------------
url = "https://worldpopulationreview.com/country-rankings/countries-by-national-debt"
html = requests.get(url).text
soup = BeautifulSoup(html, "html.parser")

table = soup.find("table")
rows = table.find_all("tr")[1:]

DEBT_MULT = {"T": 1e12, "B": 1e9, "M": 1e6}

records = []

for row in rows:
    cols = row.find_all("td")
    if len(cols) < 5:
        continue

    country = cols[1].text.strip()
    if country.upper() == "TOTAL":
        continue

    debt_text = cols[2].text.strip()
    gdp_pct_text = cols[3].text.strip()
    per_capita_text = cols[4].text.strip()

    # Debt USD
    m = re.match(r"\$(\d+(\.\d+)?)([TBM])", debt_text)
    if not m:
        continue
    debt_val = float(m.group(1))
    debt_unit = m.group(3)
    debt_usd = int(debt_val * DEBT_MULT[debt_unit])

    # Debt % GDP
    debt_pct_gdp = float(gdp_pct_text.replace("%", "")) if "%" in gdp_pct_text else None

    # Debt per Capita
    per_capita = None
    pc_text = per_capita_text.replace("$", "").replace(",", "").strip()
    pc_match = re.match(r"(\d+(\.\d+)?)\s*(Mn|Bn)?", pc_text)
    if pc_match:
        pc_val = float(pc_match.group(1))
        pc_unit = pc_match.group(3)
        if pc_unit == "Mn":
            per_capita = int(pc_val * 1_000_000)
        elif pc_unit == "Bn":
            per_capita = int(pc_val * 1_000_000_000)
        else:
            per_capita = int(pc_val)

    records.append({
        "Country": country,
        "Debt Label": debt_text,
        "Debt USD": debt_usd,
        "Debt % GDP": debt_pct_gdp,
        "Debt Per Capita": per_capita
    })

df = pd.DataFrame(records)

# ---------------------------------------------------------
# HELPER TO CREATE RANK LABELS
# ---------------------------------------------------------
def create_rank_labels(df, value_col, value_format="raw"):
    df_sorted = df.sort_values(value_col, ascending=False).reset_index(drop=True)
    df_sorted["Rank"] = df_sorted.index + 1
    return df_sorted

# ---------------------------------------------------------
# ADDITIONAL DATAFRAME & NARRATIVE LOGIC WITH RANK
# ---------------------------------------------------------
main_country = USER_COUNTRY
compare_country = HIGHLIGHT_COUNTRY

# Get ranks from original sorted data
def get_rank(df_sorted, country):
    row = df_sorted[df_sorted["Country"]==country]
    return int(row["Rank"].iloc[0]) if not row.empty else None

df1_plot = create_rank_labels(df, "Debt USD")
df2_plot = create_rank_labels(df, "Debt % GDP")
df3_plot = create_rank_labels(df, "Debt Per Capita")

summary_data = {
    "Metric": ["National Debt (USD)", "Debt % GDP", "Debt Per Capita"],
    main_country: [
        int(df1_plot[df1_plot["Country"]==main_country]["Debt USD"].iloc[0]),
        float(df2_plot[df2_plot["Country"]==main_country]["Debt % GDP"].iloc[0]),
        int(df3_plot[df3_plot["Country"]==main_country]["Debt Per Capita"].iloc[0])
    ],
    compare_country: [
        int(df1_plot[df1_plot["Country"]==compare_country]["Debt USD"].iloc[0]),
        float(df2_plot[df2_plot["Country"]==compare_country]["Debt % GDP"].iloc[0]),
        int(df3_plot[df3_plot["Country"]==compare_country]["Debt Per Capita"].iloc[0])
    ],
    f"{main_country} Rank": [
        get_rank(df1_plot, main_country),
        get_rank(df2_plot, main_country),
        get_rank(df3_plot, main_country)
    ],
    f"{compare_country} Rank": [
        get_rank(df1_plot, compare_country),
        get_rank(df2_plot, compare_country),
        get_rank(df3_plot, compare_country)
    ]
}

# Difference, ratios, inverse ratios
diff_abs = []
ratio_main_other = []
ratio_other_main = []

for i in range(3):
    val_main = summary_data[main_country][i]
    val_other = summary_data[compare_country][i]
    diff_abs.append(abs(val_main - val_other))
    ratio_main_other.append(round(val_main / val_other, 2) if val_other != 0 else None)
    ratio_other_main.append(round(val_other / val_main, 2) if val_main != 0 else None)

summary_df = pd.DataFrame(summary_data)
summary_df["Difference"] = diff_abs
summary_df["Ratio (main/other)"] = ratio_main_other
summary_df["Ratio (other/main)"] = ratio_other_main

# Format units
def format_value_units(val, metric):
    if val is None:
        return None
    if metric == "Debt % GDP":
        return f"{val}%"
    elif metric == "National Debt (USD)" or metric == "Debt Per Capita":
        if val >= 1e12:
            return f"${val/1e12:.2f} T"
        elif val >= 1e9:
            return f"${val/1e9:.2f} B"
        elif val >= 1e6:
            return f"${val/1e6:.2f} M"
        else:
            return f"${val:,}"
    return val

for col in [main_country, compare_country, "Difference"]:
    summary_df[col] = [format_value_units(val, metric) for val, metric in zip(summary_df[col], summary_df["Metric"])]

# ---------------------------------------------------------
# PLOTLY TABLE FOR SUMMARY DATAFRAME
# ---------------------------------------------------------
table_values = []
for col in summary_df.columns:
    table_values.append(summary_df[col].astype(str).tolist())

colors = []
for col in summary_df.columns:
    if col == USER_COUNTRY:
        colors.append([USER_COLOR]*len(summary_df))
    elif col == HIGHLIGHT_COUNTRY:
        colors.append([HIGHLIGHT_COLOR]*len(summary_df))
    else:
        colors.append(["white"]*len(summary_df))

fig_table = go.Figure(data=[go.Table(
    header=dict(
        values=list(summary_df.columns),
        fill_color='lightgrey',
        align='center',
        font=dict(size=14, color='black')
    ),
    cells=dict(
        values=table_values,
        fill_color=colors,
        align='center',
        font=dict(color='black', size=12)
    )
)])

fig_table.update_layout(title="Country Debt Comparison Table")
fig_table.show()

# ---------------------------------------------------------
# NARRATIVE WITH RANKS, RATIOS, INVERSE STATEMENTS
# ---------------------------------------------------------
def narrative_with_ranks(df, main_country, compare_country):
    statements = []

    # Individual country profiles with rank
    for country in [main_country, compare_country]:
        debt = df.loc[df['Metric']=="National Debt (USD)", country].values[0]
        pct_gdp = df.loc[df['Metric']=="Debt % GDP", country].values[0]
        per_capita = df.loc[df['Metric']=="Debt Per Capita", country].values[0]
        rank_debt = df.loc[df['Metric']=="National Debt (USD)", f"{country} Rank"].values[0]
        rank_gdp = df.loc[df['Metric']=="Debt % GDP", f"{country} Rank"].values[0]
        rank_pc = df.loc[df['Metric']=="Debt Per Capita", f"{country} Rank"].values[0]
        statements.append(f"{country} has a national debt of {debt} (Rank {rank_debt}), which is {pct_gdp} of its GDP (Rank {rank_gdp}), and a per capita debt of {per_capita} (Rank {rank_pc}).\n")

    # Comparative statements with ratios and inverse ratios
    for metric in df['Metric']:
        val_main = df.loc[df['Metric']==metric, main_country].values[0]
