# Import Libraries

In [None]:
from abc import ABC, abstractmethod
from bs4 import BeautifulSoup

import regex as re
import json
import heapq
import os

import numpy as np
import pandas as pd

# Define Team Class

In [None]:
class Team:
    def __init__(self, rank, name, institution, solved, penalty, first_solve_count):
        self.rank = rank
        self.name = name
        self.institution = institution.upper()
        self.solved = solved
        self.penalty = penalty
        self.first_solve_count = first_solve_count

    def __lt__(self, other):
        if not isinstance(other, Team): return NotImplemented
        return self.rank < other.rank

    def __repr__(self):
        return json.dumps(self.__dict__)

# Define ContestParser Class

In [None]:
class ContestParser(ABC):
    _parsers = {}

    def parse(self, filepaths):
        team_list = []
        for filepath in filepaths:
            team_list.extend(self.parseFile(filepath))
        return team_list

    @abstractmethod
    def parseFile(self, filepath):
        """
        Parses a single HTML file containing contest standings and returns a list of Team objects.

        Args:
            filepath (str): Path to the HTML file containing the contest data.

        Returns:
            list[Team]: A list of Team objects.
        """
        pass

    @classmethod
    def get_parser(cls, key):
        """
        Returns a parser instance for the given key.

        Args:
            key (str): The key identifying the parser.

        Returns:
            ContestParser: The parser instance corresponding to the key.
        """
        key = key.upper()
        if key not in cls._parsers: raise ValueError(f"No parser found for key: {key}")

        return cls._parsers[key]

    @classmethod
    def register_parser(cls, key, parser_instance):
        """
        Registers a parser instance with the specified key.

        Args:
            key (str): The key to associate with the parser.
            parser_instance (ContestParser): The parser instance to register.
        """
        if key.upper() in cls._parsers: raise ValueError(f"Parser for key '{key}' is already registered.")

        cls._parsers[key.upper()] = parser_instance

## Define Implementations of ContestParser Class

In [None]:
class TophParser(ContestParser):
    def parseFile(self, filepath):
        with open(filepath, "r", encoding="utf-8") as file:
            contest_html = file.read()

        soup = BeautifulSoup(contest_html, "html.parser")
        table = soup.find("table")
        if not table: raise ValueError("No table found in the HTML document.")

        rows = table.find_all("tr")
        team_list = []

        for row in rows[1:]:
            cells = row.find_all("td")
            if len(cells) < 3: continue

            try:
                rank = int(cells[0].get_text(strip=True))

                team_name = cells[1].contents[0].strip()
                institution_div = cells[1].find("div", class_="adjunct")
                institution = institution_div.get_text(strip=True) if institution_div else ""

                solve_count = int(cells[2].find("strong").get_text(strip=True))
                penalty_text = cells[2].find("div", class_="adjunct").get("data-tippy-content")
                penalty = int(re.search(r"Penalty: (\d+)", penalty_text).group(1))

                first_solve_count = sum(
                    1 for cell in cells[3:]
                    if cell.find("img", class_="icon green") and
                    cell.find("img", class_="icon green").get("data-tippy-content") == "First to Solve"
                )

                team = Team(
                    name=team_name,
                    institution=institution,
                    rank=rank,
                    solved=solve_count,
                    penalty=penalty,
                    first_solve_count=first_solve_count,
                )
                team_list.append(team)
            except Exception as e:
                print(f"Error processing row: {row}")
                print(e)

        return team_list

ContestParser.register_parser("toph", TophParser())

In [None]:
class BAPSparser(ContestParser):
    def parseFile(self, filepath):
        with open(filepath, "r", encoding="utf-8") as file:
            contest_html = file.read()

        soup = BeautifulSoup(contest_html, "html.parser")
        table = soup.find("table")
        if not table: raise ValueError("No table found in the HTML document.")

        rows = table.find_all("tr")
        team_list = []

        for row in rows[1:]:
            cells = row.find_all("td")
            if len(cells) < 3: continue

            try:
                rank = int(cells[0].get_text(strip=True))
                team_name = cells[1].find("strong").get_text(strip=True)
                institution_div = cells[1].find("div")
                institution = institution_div.get_text(strip=True) if institution_div else ""
                solve_count_text = cells[2].get_text(strip=True)
                solve_count = int(re.search(r"(\d+)", solve_count_text).group(1))
                penalty = int(re.search(r"\((\d+)\)", solve_count_text).group(1))
                first_solve_count = sum(
                    1 for cell in cells[3:]
                    if cell.find("div", style=re.compile(r"animation:.*shine.*"))
                )

                team = Team(
                    name=team_name,
                    institution=institution,
                    rank=rank,
                    solved=solve_count,
                    penalty=penalty,
                    first_solve_count=first_solve_count,
                )
                team_list.append(team)
            except Exception as e:
                print(f"Error processing row: {row}")
                print(e)

        return team_list

ContestParser.register_parser("baps", BAPSparser())

# Define Contest Class

In [None]:
class Contest:
    def __init__(self, name, filepaths, parser):
        if not isinstance(parser, ContestParser):
            raise TypeError("parser must be an instance of ContestParser")

        self.name = name.upper()
        self.team_list = parser.parse(filepaths)

        self.max_solved = 0
        self.institution_map = {}

        for team in self.team_list:
            self.max_solved = max(self.max_solved, team.solved)

            if team.institution not in self.institution_map: self.institution_map[team.institution] = []
            self.institution_map[team.institution].append(team)

    def __repr__(self):
        return json.dumps({
            "name": self.name,
            "max_solved": self.max_solved,
            "team_list": [team.__dict__ for team in self.team_list],
            "institution_map": {k: [team.__dict__ for team in v] for k, v in self.institution_map.items()}
        }, indent=4)

# Define Institution Class

In [None]:
class Institution:
    def __init__(self, name, alt_names=[]):
        self.name = name.upper()
        self.alt_names = [alt_name.upper() for alt_name in alt_names]
        self.contest_map = {}

    def add_contest(self, contest):
        name = self.name
        alt_names = self.alt_names
        institution_map = contest.institution_map
        contest_name = contest.name

        contest_team_list = []

        if name in institution_map:
            contest_team_list.extend(institution_map[name])

        for alt_name in alt_names:
            if alt_name in institution_map:
                contest_team_list.extend(institution_map[alt_name])

        if len(contest_team_list) > 0:
            self.contest_map[contest_name] = sorted(contest_team_list)

    def get_contest_teams(self, contest_name):
        if contest_name in self.contest_map:
            return self.contest_map[contest_name]
        return None

    def __repr__(self):
        return json.dumps({
            "name": self.name,
            "alt_names": self.alt_names,
            "contest_map": {
                contest_name: [team.__dict__ for team in teams]
                for contest_name, teams in self.contest_map.items()
            }
        }, indent=4)

# Load Data

In [None]:
def load_contests_from_json(file_path):
    contest_dir = "./input/contest_files/"
    contests = {}

    with open(file_path, "r") as file:
        data = json.load(file)

        for contest in data["contests"]:
            name = contest["name"]
            filepaths = [contest_dir + filename for filename in contest["filenames"]]
            parser = ContestParser.get_parser(contest["parser"])
            contests[name] = Contest(name, filepaths, parser)

    return contests

def load_institutions_from_json(file_path):
    institutions = []

    with open(file_path, "r") as file:
        data = json.load(file)
        for institution in data["institutions"]:
            name = institution["name"]
            alt_names = institution.get("alt_names", [])
            institutions.append(Institution(name, alt_names))
    return institutions

In [None]:
contests_file_path = "./input/contests.json"
contests = load_contests_from_json(contests_file_path)

institutions_file_path = "./input/institutions.json"
institutions = load_institutions_from_json(institutions_file_path)

In [None]:
for institution in institutions:
    for contest in contests.values():
        institution.add_contest(contest)

In [None]:
credits_file_path = "./input/credits.json"
with open(credits_file_path, "r") as file: credits_map = json.load(file)

In [None]:
options_file_path = "./input/options.json"
with open(options_file_path, "r") as file: options_map = json.load(file)

# Define GradeCalculator Class

In [None]:
class GradeCalculator:
    def get_grade_point(self, institution, contest_name):
        contest_teams = institution.get_contest_teams(contest_name)
        if not contest_teams: return None

        contest = contests[contest_name]

        best4 = sorted(contest_teams)[:4]
        team_grades = []

        decay_rate = 0.02
        for team in best4:
            grade = 4 * (1 - decay_rate) ** (team.rank - 1) * (team.solved / contest.max_solved) ** (1/2)

            team_grades.append(grade)

        k = 3.14159
        lk_norm = np.mean(np.array(team_grades) ** k) ** (1 / k)
        return lk_norm
    
    def get_cgpa(self, institution_list, credits_map):
        contest_name_list = list(credits_map.keys())

        marksheet = pd.DataFrame(
            index=[inst.name for inst in institution_list],
            columns=[contest_name + " GP" for contest_name in contest_name_list]
        )

        marksheet["GP L2"] = 0.0
        marksheet["Credit L2"] = 0.0

        for institution in institution_list:
            for contest_name in contest_name_list:
                credit = credits_map[contest_name]
                gp = self.get_grade_point(institution, contest_name)
                marksheet.at[institution.name, contest_name + " GP"] = gp

                if gp is not None:
                    marksheet.at[institution.name, "GP L2"] += (gp * credit) ** 2
                    marksheet.at[institution.name, "Credit L2"] += credit ** 2

        marksheet.loc[marksheet["Credit L2"] > 0, "CGPA"] = (marksheet["GP L2"] / marksheet["Credit L2"]) ** (1 / 2)
        marksheet.loc[marksheet["Credit L2"] == 0, "CGPA"] = 0
        
        marksheet.drop(columns=["GP L2", "Credit L2"], inplace=True)
        marksheet = marksheet.infer_objects(copy=False)

        return marksheet

# Perform Calculations

In [None]:
calculator = GradeCalculator()

marksheet = calculator.get_cgpa(institutions, credits_map)
ranked_df = marksheet.sort_values(by="CGPA", ascending=False)
display(ranked_df)

# Export Results

In [None]:
contest_list = [contest for contest in contests.values()]

In [None]:
with open("./output/contests.json", "w") as file:
    json.dump(json.loads(contest_list.__repr__()), file, indent=4)

In [None]:
with open("./output/institutions.json", "w") as file:
    json.dump(json.loads(institutions.__repr__()), file, indent=4)

In [None]:
marksheet_df = ranked_df.reset_index()
marksheet_df.rename(columns={"index": "Institution"}, inplace=True)

os.makedirs('output', exist_ok=True)
marksheet_df.to_csv("./output/marksheet.csv", index=False)

# Automatic Slot Distribution

In [None]:
def distribute_slots(marksheet_df, total_slots, max_slots):
    institution_cgpa = dict(zip(marksheet_df['Institution'], marksheet_df['CGPA']))
    allocated_slots = {institution: 1 for institution in institution_cgpa}
    total_allocated = len(allocated_slots)
    pq = [(-cgpa / 2, institution) for institution, cgpa in institution_cgpa.items()]
    heapq.heapify(pq)
    
    while total_allocated < total_slots and pq:
        _, institution = heapq.heappop(pq)
        
        if allocated_slots[institution] < max_slots:
            allocated_slots[institution] += 1
            total_allocated += 1
            new_quotient = -institution_cgpa[institution] / (allocated_slots[institution] + 1)
            heapq.heappush(pq, (new_quotient, institution))
    
    return pd.DataFrame(
        {'Institution': allocated_slots.keys(), 'CGPA': institution_cgpa.values(), 'Slots': allocated_slots.values()}
    )

In [None]:
total_slots = options_map['Total Slots']
max_slots = options_map['Max Slots']
slots_df = distribute_slots(marksheet_df, total_slots, max_slots)
display(slots_df)

In [None]:
os.makedirs('output', exist_ok=True)
slots_df.to_csv("./output/slots.csv", index=False)

## Generate Excel Sheet

In [None]:
def round_floats_in_df(df, precision=6):
    rounded_df = df.copy()
    for col in rounded_df.select_dtypes(include='float'):
        rounded_df[col] = rounded_df[col].round(precision)
    return rounded_df

def col_letter(col_num):
    result = ''

    while col_num >= 0:
        result = chr(col_num % 26 + ord('A')) + result
        col_num = col_num // 26 - 1

    return result

In [None]:
def write_header(worksheet, df):
    for col_num, col_name in enumerate(df.columns):
        worksheet.write(0, col_num, col_name)

def write_rows(slots_ws, df, general_col, reserved_col, total_col):
    for row in range(len(df)):
        excel_row = row + 2
        general_cell = f"{col_letter(general_col)}{excel_row}"
        reserved_cell = f"{col_letter(reserved_col)}{excel_row}"
        formula = f"={general_cell}+{reserved_cell}"
        slots_ws.write_formula(row + 1, total_col, formula)

In [None]:
def apply_alternating_colors(workbook, worksheet, df, colors):
    header_format = workbook.add_format({'bg_color': colors[0], 'font_color': 'white'})
    worksheet.set_row(0, None, header_format)

    num_rows = len(df)
    num_cols = len(df.columns)
    col_range = f"A2:{col_letter(num_cols - 1)}{num_rows + 1}"

    worksheet.conditional_format(col_range, {
        'type': 'formula',
        'criteria': '=ISEVEN(ROW())',
        'format': workbook.add_format({'bg_color': colors[1]}),
    })

    worksheet.conditional_format(col_range, {
        'type': 'formula',
        'criteria': '=ISODD(ROW())',
        'format': workbook.add_format({'bg_color': colors[2]}),
    })

def autofit_columns(worksheet, df):
    for col_num, column in enumerate(df.columns):
        max_width = max(df[column].astype(str).map(len).max(), len(column)) + 2
        worksheet.set_column(col_num, col_num, max_width)

In [None]:
def lock_sheet(worksheet):
    worksheet.protect()

def unlock_columns(worksheet, col_indices, start_row, end_row, df):
    unlocked_format = worksheet.book.add_format({'locked': False})
    for row in range(start_row, end_row + 1):
        for col in col_indices:
            value = df.iloc[row - 1, col]
            worksheet.write(row, col, value, unlocked_format)

def generateExcelSheet(marksheet_df, slots_df):
    slots_sheet_df = slots_df.copy()
    slots_sheet_df.rename(columns={'Slots': 'General Slots'}, inplace=True)
    slots_sheet_df['Reserved Slots'] = 0
    slots_sheet_df['Total Slots'] = ''
    slots_sheet_df['Explanation for Reserved Slots'] = ''

    slots_sheet_df = round_floats_in_df(slots_sheet_df)
    rounded_marksheet_df = round_floats_in_df(marksheet_df)

    os.makedirs('output', exist_ok=True)
    output_file = 'output/Slot Distribution.xlsx'

    colors = ['#535FC1', '#C9DAF8', '#FFFFFF']

    with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
        workbook = writer.book

        slots_sheet_df.to_excel(writer, sheet_name='Slots', index=False, header=False, startrow=1)
        slots_ws = writer.sheets['Slots']
        slots_ws.book = workbook

        write_header(slots_ws, slots_sheet_df)

        general_col = slots_sheet_df.columns.get_loc('General Slots')
        reserved_col = slots_sheet_df.columns.get_loc('Reserved Slots')
        total_col = slots_sheet_df.columns.get_loc('Total Slots')
        explanation_col = slots_sheet_df.columns.get_loc('Explanation for Reserved Slots')

        write_rows(slots_ws, slots_sheet_df, general_col, reserved_col, total_col)
        
        apply_alternating_colors(workbook, slots_ws, slots_sheet_df, colors)
        autofit_columns(slots_ws, slots_sheet_df)

        lock_sheet(slots_ws)
        editable_cols = [reserved_col, explanation_col]
        unlock_columns(slots_ws, editable_cols, 1, len(slots_sheet_df), slots_sheet_df)

        rounded_marksheet_df.to_excel(writer, sheet_name='Marksheet', index=False, header=False, startrow=1)
        marksheet_ws = writer.sheets['Marksheet']
        marksheet_ws.book = workbook

        write_header(marksheet_ws, rounded_marksheet_df)
        apply_alternating_colors(workbook, marksheet_ws, rounded_marksheet_df, colors)
        autofit_columns(marksheet_ws, rounded_marksheet_df)

        lock_sheet(marksheet_ws)

In [None]:
generateExcelSheet(marksheet_df, slots_df)