In [6]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from io import StringIO


def get_basketball_reference_table(url: str) -> pd.DataFrame:
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")

    # Encuentra el primer elemento <table> con clase "stats_table"
    table = soup.find("table", {"class": "stats_table"})
    html_str = str(table)
    df = pd.read_html(StringIO(html_str))[0]

    # Elimina filas que son duplicados de encabezado (se repiten en medio de la tabla)
    df = df[df[df.columns[0]] != df.columns[0]]

    # Limpieza: nombres sin asteriscos, columnas útiles
    df["Player"] = df["Player"].str.replace("*", "", regex=False)

    return df.reset_index(drop=True)

# URLs de estadísticas 2024-25
per_game_url = "https://www.basketball-reference.com/leagues/NBA_2025_per_game.html"
advanced_url = "https://www.basketball-reference.com/leagues/NBA_2025_advanced.html"

# Extrae las tablas
df_per_game = get_basketball_reference_table(per_game_url)
df_advanced = get_basketball_reference_table(advanced_url)

print("Columnas en df_per_game:", df_per_game.columns.tolist())
print("Columnas en df_advanced:", df_advanced.columns.tolist())


# Combina usando columnas comunes (Player + Team)
merged_df = pd.merge(df_per_game, df_advanced, on=["Player", "Team"], suffixes=('_per_game', '_adv'))

# Muestra las primeras filas
merged_df.head(800)


Columnas en df_per_game: ['Rk', 'Player', 'Age', 'Team', 'Pos', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'Awards']
Columnas en df_advanced: ['Rk', 'Player', 'Age', 'Team', 'Pos', 'G', 'GS', 'MP', 'PER', 'TS%', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%', 'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM', 'VORP', 'Awards']


Unnamed: 0,Rk_per_game,Player,Age_per_game,Team,Pos_per_game,G_per_game,GS_per_game,MP_per_game,FG,FGA,...,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,Awards_adv
0,1.0,Shai Gilgeous-Alexander,26.0,OKC,PG,76.0,76.0,34.2,11.3,21.8,...,34.8,11.9,4.8,16.7,0.309,8.9,2.6,11.5,8.9,
1,2.0,Giannis Antetokounmpo,30.0,MIL,PF,67.0,67.0,34.2,11.8,19.7,...,35.2,7.8,3.7,11.5,0.241,6.9,2.5,9.5,6.6,
2,3.0,Nikola Jokić,29.0,DEN,C,70.0,70.0,36.7,11.2,19.5,...,29.5,12.7,3.8,16.4,0.307,9.9,3.3,13.3,9.8,
3,4.0,Luka Dončić,25.0,2TM,PG,50.0,50.0,35.4,9.2,20.5,...,33.9,3.5,2.4,5.9,0.160,5.5,1.2,6.7,3.9,
4,4.0,Luka Dončić,25.0,DAL,PG,22.0,22.0,35.7,9.8,21.2,...,33.1,1.5,1.1,2.5,0.155,5.5,1.4,6.9,1.8,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
731,566.0,Jahlil Okafor,29.0,IND,C,1.0,0.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.196,2.5,4.9,7.5,0.0,
732,567.0,Zyon Pullin,23.0,MEM,SG,3.0,0.0,1.0,0.0,0.3,...,13.4,0.0,0.0,0.0,-0.304,-15.0,-7.7,-22.7,0.0,
733,568.0,Isaiah Stevens,24.0,MIA,PG,3.0,0.0,2.0,0.0,0.7,...,14.8,0.0,0.0,0.0,-0.196,-19.5,7.7,-11.8,0.0,
734,569.0,Terry Taylor,25.0,SAC,PF,3.0,0.0,2.0,0.0,0.3,...,7.2,0.0,0.0,0.0,0.049,3.2,0.1,3.3,0.0,
