In [1]:
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup

# --------------------------------------------------
# 1️⃣ Define output folder (relative to project)
# --------------------------------------------------
output_folder = "output"
os.makedirs(output_folder, exist_ok=True)

# --------------------------------------------------
# 2️⃣ Fetch HTML content from Sailwave results page
# --------------------------------------------------
url = "https://www.sailwave.com/results/POR-Clubes/1PAN_ILCA_Oeiras.htm"
response = requests.get(url)
response.raise_for_status()

soup = BeautifulSoup(response.content, "html.parser")
site_url = url

# --------------------------------------------------
# 3️⃣ Locate summary tables and corresponding titles
# --------------------------------------------------
tables = soup.find_all("table", class_="summarytable")
titles = soup.find_all("h3", class_="summarytitle")

competitions = {}

# --------------------------------------------------
# 4️⃣ Process each competition table
# --------------------------------------------------
for i, table in enumerate(tables):

    competition_name = (
        titles[i].get_text(strip=True)
        if i < len(titles)
        else f"Competition_{i+1}"
    )

    # Extract headers
    headers = [th.get_text(strip=True) for th in table.find_all("th")]

    # Extract rows
    rows = []
    for tr in table.find_all("tr"):
        cols = [td.get_text(strip=True) for td in tr.find_all("td")]
        if cols:
            rows.append(cols)

    df = pd.DataFrame(rows, columns=headers)

    # Rename general ranking column if needed
    if df.columns[0] == "" or df.columns[0].lower() == "rank":
        df = df.rename(columns={df.columns[0]: "classif_geral"})

    # --------------------------------------------------
    # Preserve race columns as strings (e.g. "(6.0)")
    # --------------------------------------------------
    race_columns = ["R1", "R2", "R3", "R4"]
    for col in race_columns:
        if col in df.columns:
            df[col] = df[col].astype(str)

    # Convert Total and Nett to numeric
    for col in ["Total", "Nett"]:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")

    # Internal ranking by division
    df["classif_no_escalao"] = (
        df.sort_values("Nett")
        .groupby("Division")
        .cumcount() + 1
    )

    # Internal ranking by club
    df["Rank_Clube"] = (
        df.sort_values("Nett")
        .groupby("Yacht Club")
        .cumcount() + 1
    )

    # Rankings by division
    by_division = {
        div: data.sort_values("Nett").reset_index(drop=True)
        for div, data in df.groupby("Division")
    }

    # Rankings by club
    by_club = {
        club: data.sort_values("Nett").reset_index(drop=True)
        for club, data in df.groupby("Yacht Club")
    }

    # --------------------------------------------------
    # Club collective ranking (Top N sailors per club)
    # --------------------------------------------------
    TOP_N = 3
    club_collective = (
        df[df["Rank_Clube"] <= TOP_N]
        .groupby("Yacht Club")
        .agg(
            Pontos_Coletivos=("Nett", "sum"),
            Atletas_Contam=("HelmName", "count")
        )
        .sort_values("Pontos_Coletivos")
        .reset_index()
    )
    club_collective["Rank_Coletivo"] = range(1, len(club_collective) + 1)

    competitions[competition_name] = {
        "df": df,
        "by_division": by_division,
        "by_club": by_club,
        "club_collective": club_collective
    }

# --------------------------------------------------
# 5️⃣ Build HTML output
# --------------------------------------------------
html_content = """
<html>
<head>
<meta charset="utf-8">
<title>Sailing Results</title>
<link rel="stylesheet" type="text/css" href="estilos_south.css">
</head>
<body>
"""

# Index
html_content += '<h1 id="topo">Index</h1><ul>'
for comp in competitions.keys():
    html_content += f'<li><a href="#{comp}">{comp}</a></li>'
html_content += '<li><a href="#ranking_clubes_total">Overall Club Ranking</a></li>'
html_content += '</ul>'

# --------------------------------------------------
# Helper: apply alternating row styles
# --------------------------------------------------
def html_table_with_stripes(df):
    table_html = df.to_html(index=False, escape=False, classes="summarytable")
    soup_table = BeautifulSoup(table_html, "html.parser")
    for i, tr in enumerate(soup_table.find_all("tr")[1:]):
        tr["class"] = "even" if i % 2 == 0 else "odd"
    return str(soup_table)

# --------------------------------------------------
# Insert competitions
# --------------------------------------------------
for comp, data in competitions.items():

    if "ILCA4" in comp.upper():
        html_content += '<div id="ilca4">'
    elif "ILCA6" in comp.upper():
        html_content += '<div id="ilca6">'
    else:
        html_content += '<div>'

    html_content += f'<hr><h2 id="{comp}">{comp}</h2>'

    # Original table
    df_original = data["df"].drop(columns=["classif_no_escalao", "Rank_Clube"])
    html_content += f'<a href="{site_url}" target="_blank"><h3>Original / Official Table</h3></a>'
    html_content += '<p><a href="#topo">Back to top</a></p>'
    html_content += html_table_with_stripes(df_original)

    # Rankings by division
    html_content += '<h3>Ranking by Division</h3>'
    for div, df_div in data["by_division"].items():
        df_div_clean = df_div.drop(columns=["Rank_Clube"])
        df_div_clean["classif_no_escalao"] = df_div_clean["classif_no_escalao"].astype(object)

        for i, row in df_div_clean.iterrows():
            if row["classif_no_escalao"] == 1:
                df_div_clean.at[i, "classif_no_escalao"] = '<span class="place1">1</span>'
            elif row["classif_no_escalao"] == 2:
                df_div_clean.at[i, "classif_no_escalao"] = '<span class="place2">2</span>'
            elif row["classif_no_escalao"] == 3:
                df_div_clean.at[i, "classif_no_escalao"] = '<span class="place3">3</span>'

        html_content += f'<h4>{div}</h4>'
        html_content += html_table_with_stripes(df_div_clean)

    # Rankings by club
    html_content += '<h3>Ranking by Club</h3>'
    for club, df_cl in data["by_club"].items():
        df_cl_clean = df_cl.drop(columns=["classif_no_escalao", "Rank_Clube"])
        html_content += f'<h4>{club}</h4>'
        html_content += html_table_with_stripes(df_cl_clean)

    html_content += '</div>'

# --------------------------------------------------
# Overall club ranking (combined competitions)
# --------------------------------------------------
overall = pd.concat(
    [
        comp["club_collective"][["Yacht Club", "Pontos_Coletivos"]]
        for comp in competitions.values()
    ]
)

overall = (
    overall.groupby("Yacht Club")
    .sum()
    .reset_index()
    .sort_values("Pontos_Coletivos")
    .reset_index(drop=True)
)
overall["Rank_Coletivo_Total"] = range(1, len(overall) + 1)

html_content += '<h2 id="ranking_clubes_total">Overall Club Ranking</h2>'
html_content += '<p><a href="#topo">Back to top</a></p>'
html_content += html_table_with_stripes(overall)

html_content += "</body></html>"

# --------------------------------------------------
# Save HTML file
# --------------------------------------------------
output_file = os.path.join(output_folder, "classificacoes_vela.html")
with open(output_file, "w", encoding="utf-8") as f:
    f.write(html_content)

print(f"HTML page created at: {output_file.replace(os.sep, '/')}")



<class 'ModuleNotFoundError'>: No module named 'requests'