# World Athletics Senior Women's Triple Jump
<img src="https://trackandfieldnews.com/wp-content/uploads/2023/12/yulimar-rojas-01-24.jpg" alt="U20 Chen Ting Getty Images" width="250" height="166.75">

In [1]:
import requests
import pandas as pd
import time
from bs4 import BeautifulSoup

In [None]:
# intall lxml dependancy 
!pip install lxml
import lxml
print(("lxml successfully installed"))

In [None]:
def fetch_toplist_page(page_num):
    url = (
        "https://worldathletics.org/records/all-time-toplists/jumps/triple-jump/all/women/senior"
        "?regionType=world"
        "&windReading=all&page=1"
        "&bestResultsOnly=false"
        "&firstDay=1899-12-31"
        "&lastDay=2025-10-21"
        "&maxResultsByCountry=all"
        "&eventId=10229529"
        "&ageCategory=senior"
        f"&page={page_num}"
    )
    resp = requests.get(url)
    resp.raise_for_status()
    return resp.text

In [None]:
def parse_table_from_html(html):
    dfs = pd.read_html(html) # use pandas to extract tables
    for df in dfs:
        cols = set(df.columns)
        expected = {"Rank", "Mark", "WIND", "Competitor", "DOB", "Country", "Pos", "Venue", "Date", "Results Score"}
        if expected.intersection(cols):
            return df
        return max(dfs, key=lambda d: d.shape[0])

In [None]:
def scrape_all_pages(max_pages=80, delay=1.0):
    all_dfs = []
    for page in range(1, max_pages + 1):
        print(f"Fetching page {page}...")
        html = fetch_toplist_page(page)
        df = parse_table_from_html(html)
        if df is None or df.shape[0] == 0:
            print("No more data or empty table on this page. Stopping.")
            break
        all_dfs.append(df)
        time.sleep(delay)
    combined = pd.concat(all_dfs, ignore_index=True)
    return combined

In [None]:
def main():
    df = scrape_all_pages(max_pages=80, delay=1.0)
    print("Scraped rows:", len(df))
    df.to_csv("women_senior_triple_jump.csv", index=False)
    print("Saved CSV.")

if __name__ == "__main__":
    main()