# Web scraping NBA transaction data from [basketball-reference.com](https://www.basketball-reference.com/)

In [167]:
import bs4
from bs4 import BeautifulSoup
import requests
import re
import csv
import pandas as pd

In [175]:
years = list(range(1991, 2025))

url_blank = "https://www.basketball-reference.com/leagues/NBA_{}_transactions.html"

In [None]:
# save HTML files for each year
for year in years:
    url = url_blank.format(year)
    response = requests.get(url)

    with open("./Data/transaction_htmls/transactions_{}.html".format(year), "w+") as f:
        f.write(response.text)

In [187]:
# extract waiver data from HTML file for each year and save as CSV
for year in years:
    with open("./Data/transaction_htmls/transactions_{}.html".format(year)) as f:
        page = f.read()

    soup = BeautifulSoup(page, "html.parser")

    page_index = soup.find("ul", attrs={"class":"page_index"})
    entries = page_index.find_all("p")
    waived_entries = [entry for entry in entries if "waived" in entry.get_text()]

    waived_teams = [entry.find("a", attrs={"data-attr-from":True})["data-attr-from"] for entry in waived_entries]
    waived_players = [entry.find("a", attrs={"href":re.compile("/players/")}).get_text() for entry in waived_entries]
    waived_dates = [entry.parent.find("span").get_text() for entry in waived_entries]

    df = pd.DataFrame({"player":waived_players, "team_from":waived_teams, "date":waived_dates}, index=None)

    df.to_csv("./Data/waived/waived_{}.csv".format(year))

In [213]:
# concatenate waiver data for all years and save as CSV
waived_dfs = [pd.read_csv("./Data/waived/waived_{}.csv".format(year), index_col=0) for year in years]
waived_df = pd.concat(waived_dfs, ignore_index=True)
waived_df.to_csv("./Data/waived_data.csv")