In [None]:
import pandas as pd
from bs4 import BeautifulSoup
import re

def clean_college_name(raw_text):
    """Remove 'More Details' and any extra content after it."""
    return re.split(r'More Details|Close\|\|', raw_text)[0].strip()

def parse_table(html_content, year):
    """Parses the NIRF HTML table and returns a structured DataFrame."""
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')
    if not table:
        print(" Table not found.")
        return None

    headers = [th.get_text(strip=True) for th in table.find_all('th')]
    headers += [f'TLR_{year}', f'RPC_{year}', f'GO_{year}', f'OI_{year}', f'Perspective_{year}']

    data = []
    rows = table.find_all('tr')[1:]

    for row in rows:
        cols = row.find_all('td')
        if len(cols) < 7:
            continue
        try:
            base_data = [col.get_text(strip=True) for col in cols]
            
            base_data[1] = clean_college_name(base_data[1])

            scores = base_data[2:7]
            other_data = base_data[:2] + base_data[7:]
            final_row = other_data + scores
            data.append(final_row)
        except Exception as e:
            print(f" Error parsing row: {e}")
            continue

    max_cols = max(len(row) for row in data)
    if len(headers) < max_cols:
        headers += [f"Extra_{i}" for i in range(len(headers), max_cols)]

    return pd.DataFrame(data, columns=headers[:max_cols])
