In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import date
import os



In [8]:
def obtener_listas_disponibles():
    """Lee el select de top_lists.phtml y devuelve {codigo: nombre}."""
    url = "https://ratings.fide.com/top_lists.phtml"
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")

    listas = {}
    for opt in soup.select("select#select_top option"):
        codigo = opt["value"]
        nombre = opt.get_text(strip=True)
        listas[codigo] = nombre
    return listas


In [11]:
def obtener_ranking(codigo, nombre):
    """Scrapea una tabla de ranking FIDE para una lista concreta."""
    url = f"https://ratings.fide.com/a_top.php?list={codigo}"
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")

    tabla = soup.find("table", {"class": "top_recors_table"})
    if not tabla:
        return pd.DataFrame()

    jugadores = []
    for fila in tabla.find_all("tr")[1:]:
        celdas = [c.get_text(strip=True) for c in fila.find_all("td")]
        if len(celdas) == 5:  # rank, name, fed, elo, birth
            jugadores.append({
                "rank": celdas[0],
                "name": celdas[1],
                "federation": celdas[2],
                "elo": celdas[3],
                "birth_year": celdas[4],
                "list": nombre,
                "scraping_date": date.today().isoformat()
            })
    return pd.DataFrame(jugadores)

In [12]:
listas = obtener_listas_disponibles()

In [13]:
dfs = []
for codigo, nombre in listas.items():
    print(f"Scrapeando {nombre}...")
    dfs.append(obtener_ranking(codigo, nombre))

df_final = pd.concat(dfs, ignore_index=True)

Scrapeando Open...
Scrapeando Women...
Scrapeando Juniors...
Scrapeando Girls...
Scrapeando Rapid Open...
Scrapeando Rapid Women...
Scrapeando Rapid Juniors...
Scrapeando Rapid Girls...
Scrapeando Blitz Open...
Scrapeando Blitz Women...
Scrapeando Blitz Juniors...
Scrapeando Blitz Girls...


In [18]:
ruta = os.path.join("..", "data", "fide_rankings.csv")
df_final.to_csv(ruta, index=False, encoding="utf-8")

In [9]:
def get_chesscom_games(username, year, month):
    url = f"https://api.chess.com/pub/player/{username}/games/{year}/{month:02d}"
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
                      "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    }

    r = requests.get(url, headers=headers)
    if r.status_code != 200:
        return pd.DataFrame()

    data = r.json()
    if "games" not in data or not data["games"]:
        return pd.DataFrame()

    return pd.DataFrame(data["games"])


In [10]:
df_cc = get_chesscom_games("MagnusCarlsen", 2024, 12)


In [12]:
df_cc.head(2)

Unnamed: 0,url,pgn,time_control,end_time,rated,accuracies,tcn,uuid,initial_setup,fen,time_class,rules,white,black,eco,tournament
0,https://www.chess.com/game/live/126898219067,"[Event ""Live Chess""]\n[Site ""Chess.com""]\n[Dat...",180,1733159429,True,"{'white': 87.95, 'black': 73.23}",lB!TgvYIBJXHcM7PiyHybsPjsyjzkszCyP6XP4X4dbCbab...,13fe8d0b-b0d0-11ef-90b3-6cfe544c0428,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,b1R2b1r/p1np1ppp/3k1p2/2pP4/4P3/2P5/3N1PPP/4KB...,blitz,chess,"{'rating': 3221, 'result': 'win', '@id': 'http...","{'rating': 3318, 'result': 'resigned', '@id': ...",https://www.chess.com/openings/Indian-Game-Spi...,
1,https://www.chess.com/game/live/126898243379,"[Event ""Live Chess""]\n[Site ""Chess.com""]\n[Dat...",180,1733159767,True,"{'white': 92.82, 'black': 92.36}",mC0Kgv5QfA!Tlt90eg8!ksZJCJTJfe6EblJPpxENAr7tvK...,5713b539-b0d0-11ef-90b3-6cfe544c0428,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,8/1Kp5/4k3/4P2R/1P6/8/8/1r6 w - -,blitz,chess,"{'rating': 3316, 'result': 'agreed', '@id': 'h...","{'rating': 3223, 'result': 'agreed', '@id': 'h...",https://www.chess.com/openings/Italian-Game-Tw...,
