
Pro Sports Transactions Data: https://www.prosportstransactions.com/basketball/Search/Search.php

Scraping Source Code: https://github.com/logan-lauton/nba_webscrape?tab=readme-ov-file 


In [None]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import time
import requests
import warnings
import io

In [None]:
BASE = ("https://www.prosportstransactions.com/basketball/Search/SearchResults.php?Player=&Team=&BeginDate=&EndDate=&ILChkBx=yes&Submit=Search&start={start}")

HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
    ),
    "Accept-Language": "en-US,en;q=0.9",
}

def fetch_page(start: int, session: requests.Session) -> pd.DataFrame:
    url = BASE.format(start=start)
    r = session.get(url, headers=HEADERS, timeout=30)
    r.raise_for_status()

    soup = BeautifulSoup(r.content, "lxml")

    table = soup.select_one("table.datatable.center")
    if table:
        return pd.read_html(io.StringIO(str(table)), flavor="lxml")[0]

    dfs = pd.read_html(io.StringIO(r.text), flavor="lxml")
    if len(dfs) == 0:
        raise RuntimeError(f"No tables found at start={start}. "
                           f"Status={r.status_code}, url={url}")
    return dfs[0]

def single(num_pages: int) -> pd.DataFrame:
    warnings.filterwarnings("ignore", message="The frame.append method is deprecated")
    frames = []
    start = 0
    with requests.Session() as s:
        for _ in range(num_pages):
            page_df = fetch_page(start, s)
            frames.append(page_df)
            start += 25
            time.sleep(2.38)  # keep your rate limit

    df = pd.concat(frames, ignore_index=True)
    df.columns = ["Date", "Team", "Acquired", "Relinquished", "Notes"]
    df = df[df["Date"] != "Date"].reset_index(drop=True)
    df["Acquired"] = df["Acquired"].str.replace("• ", "", regex=False)
    df["Relinquished"] = df["Relinquished"].str.replace("• ", "", regex=False)
    df["Date"] = pd.to_datetime(df["Date"].str.strip(), format="%Y-%m-%d", errors="coerce")
    return df


In [None]:
df = single(5)

In [None]:
df.to_csv('NBA_Player_Injury_Stats_1947_2025.csv', index=True)