In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [18]:
url = 'https://fightingillini.com/sports/2021/4/30/mens-basketball-history'

# Scrape the webpage
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')



# Grab all <table class="release">
all_tables = soup.find_all("table", class_="release")

# Extract team honor tables
titles_dict = {}
for table in all_tables:
    rows = table.find_all("tr")
    if not rows:
        continue

    # First row should contain <th> with title
    header_cells = rows[0].find_all("th")
    if not header_cells:
        continue

    title = header_cells[0].get_text(strip=True)

    # Get second row <td> values (could be 1 or many)
    if len(rows) > 1:
        data_cells = rows[1].find_all("td")
        data = [cell.get_text(strip=True) for cell in data_cells]

        # Turn into a DataFrame
        df = pd.DataFrame([data])  # one row of data

        # Rename columns only if there's exactly 1 column
        if len(df.columns) == 1:
            df.columns = [title]

        titles_dict[title] = df


In [22]:
titles_dict

{'1* National Title':   1* National Title
 0              1915,
 '5 NCAA Final Fours':              5 NCAA Final Fours
 0  1949, 1951, 1952, 1989, 2005,
 '35* NCAA Tournament Appearances':                      35* NCAA Tournament Appearances
 0  1942, 1949, 1951, 1952, 1963, 1981, 1983, 1984...,
 '18* Big Ten Regular Season Championships':             18* Big Ten Regular Season Championships
 0  1915, 1917, 1924, 1935, 1937, 1942, 1943, 1949...,
 '4 Big Ten Tournament Championships':   4 Big Ten Tournament Championships
 0             2003, 2005, 2021, 2024,
 'Winningest Seasons – By Total Wins':   Rank Wins  Year
 0   1.   37  2005,
 'Winningest Seasons – By Win Percentage':   Rank  Win % Record  Year
 0   1.  1.000   16-0  1915,
 'Winningest\xa0Big Ten Seasons – By Total Wins':   Rank Wins  Year
 0   1.   16  2021,
 'Winningest\xa0Big Ten Seasons – By Win Percentage':   Rank  Win % Record  Year
 0   1.  1.000   12-0  1943}

In [23]:
cleaned_dict = {}

for title, df in titles_dict.items():
    # 1. Fix weird spacing in titles
    clean_title = title.replace('\xa0', ' ').strip()

    # 2. Rename columns if clearly unlabelled
    if len(df.columns) == 3 and list(df.columns) == [0, 1, 2]:
        df.columns = ['Rank', 'Wins', 'Year']
    elif len(df.columns) == 4 and list(df.columns) == [0, 1, 2, 3]:
        df.columns = ['Rank', 'Win %', 'Record', 'Year']

    cleaned_dict[clean_title] = df

In [24]:
titles_dict

{'1* National Title':   1* National Title
 0              1915,
 '5 NCAA Final Fours':              5 NCAA Final Fours
 0  1949, 1951, 1952, 1989, 2005,
 '35* NCAA Tournament Appearances':                      35* NCAA Tournament Appearances
 0  1942, 1949, 1951, 1952, 1963, 1981, 1983, 1984...,
 '18* Big Ten Regular Season Championships':             18* Big Ten Regular Season Championships
 0  1915, 1917, 1924, 1935, 1937, 1942, 1943, 1949...,
 '4 Big Ten Tournament Championships':   4 Big Ten Tournament Championships
 0             2003, 2005, 2021, 2024,
 'Winningest Seasons – By Total Wins':   Rank Wins  Year
 0   1.   37  2005,
 'Winningest Seasons – By Win Percentage':   Rank  Win % Record  Year
 0   1.  1.000   16-0  1915,
 'Winningest\xa0Big Ten Seasons – By Total Wins':   Rank Wins  Year
 0   1.   16  2021,
 'Winningest\xa0Big Ten Seasons – By Win Percentage':   Rank  Win % Record  Year
 0   1.  1.000   12-0  1943}