# Import Libraries

In [None]:
from abc import ABC, abstractmethod
from bs4 import BeautifulSoup

import regex as re
import json
import os
from datetime import date

import numpy as np
import pandas as pd

# Define Team Class

In [None]:
class Team:
    def __init__(self, rank, name, institution, solved, penalty, first_solve_count):
        self.rank = rank
        self.name = name
        self.institution = institution.upper()
        self.solved = solved
        self.penalty = penalty
        self.first_solve_count = first_solve_count

    def __lt__(self, other):
        if not isinstance(other, Team): return NotImplemented
        return self.rank < other.rank

    def __repr__(self):
        return json.dumps(self.__dict__)

# Define ContestParser Class

In [None]:
class ContestParser(ABC):
    _parsers = {}

    def parse(self, filepaths):
        team_list = []
        for filepath in filepaths:
            team_list.extend(self.parseFile(filepath))
        return team_list

    @abstractmethod
    def parseFile(self, filepath):
        """
        Parses a single HTML file containing contest standings and returns a list of Team objects.

        Args:
            filepath (str): Path to the HTML file containing the contest data.

        Returns:
            list[Team]: A list of Team objects.
        """
        pass

    @classmethod
    def get_parser(cls, key):
        """
        Returns a parser instance for the given key.

        Args:
            key (str): The key identifying the parser.

        Returns:
            ContestParser: The parser instance corresponding to the key.
        """
        key = key.upper()
        if key not in cls._parsers: raise ValueError(f"No parser found for key: {key}")

        return cls._parsers[key]

    @classmethod
    def register_parser(cls, key, parser_instance):
        """
        Registers a parser instance with the specified key.

        Args:
            key (str): The key to associate with the parser.
            parser_instance (ContestParser): The parser instance to register.
        """
        if key.upper() in cls._parsers: raise ValueError(f"Parser for key '{key}' is already registered.")

        cls._parsers[key.upper()] = parser_instance

## Define Implementations of ContestParser Class

In [None]:
class TophParser(ContestParser):
    def parseFile(self, filepath):
        with open(filepath, "r", encoding="utf-8") as file:
            contest_html = file.read()

        soup = BeautifulSoup(contest_html, "html.parser")
        table = soup.find("table")
        if not table: raise ValueError("No table found in the HTML document.")

        rows = table.find_all("tr")
        team_list = []

        for row in rows[1:]:
            cells = row.find_all("td")
            if len(cells) < 3: continue

            try:
                rank = int(cells[0].get_text(strip=True))

                team_name = cells[1].contents[0].strip()
                institution_div = cells[1].find("div", class_="adjunct")
                institution = institution_div.get_text(strip=True) if institution_div else ""

                solve_count = int(cells[2].find("strong").get_text(strip=True))
                penalty_text = cells[2].find("div", class_="adjunct").get("data-tippy-content")
                penalty = int(re.search(r"Penalty: (\d+)", penalty_text).group(1))

                first_solve_count = sum(
                    1 for cell in cells[3:]
                    if cell.find("img", class_="icon green") and
                    cell.find("img", class_="icon green").get("data-tippy-content") == "First to Solve"
                )

                team = Team(
                    name=team_name,
                    institution=institution,
                    rank=rank,
                    solved=solve_count,
                    penalty=penalty,
                    first_solve_count=first_solve_count,
                )
                team_list.append(team)
            except Exception as e:
                print(f"Error processing row: {row}")
                print(e)

        return team_list

ContestParser.register_parser("toph", TophParser())

In [None]:
class BAPSparser(ContestParser):
    def parseFile(self, filepath):
        with open(filepath, "r", encoding="utf-8") as file:
            contest_html = file.read()

        soup = BeautifulSoup(contest_html, "html.parser")
        table = soup.find("table")
        if not table: raise ValueError("No table found in the HTML document.")

        rows = table.find_all("tr")
        team_list = []

        for row in rows[1:]:
            cells = row.find_all("td")
            if len(cells) < 3: continue

            try:
                rank = int(cells[0].get_text(strip=True))
                team_name = cells[1].find("strong").get_text(strip=True)
                institution_div = cells[1].find("div")
                institution = institution_div.get_text(strip=True) if institution_div else ""
                solve_count_text = cells[2].get_text(strip=True)
                solve_count = int(re.search(r"(\d+)", solve_count_text).group(1))
                penalty = int(re.search(r"\((\d+)\)", solve_count_text).group(1))
                first_solve_count = sum(
                    1 for cell in cells[3:]
                    if cell.find("div", style=re.compile(r"animation:.*shine.*"))
                )

                team = Team(
                    name=team_name,
                    institution=institution,
                    rank=rank,
                    solved=solve_count,
                    penalty=penalty,
                    first_solve_count=first_solve_count,
                )
                team_list.append(team)
            except Exception as e:
                print(f"Error processing row: {row}")
                print(e)

        return team_list

ContestParser.register_parser("baps", BAPSparser())

# Define Contest Class

In [None]:
class Contest:
    def __init__(self, name, filepaths, parser, date_string):
        if not isinstance(parser, ContestParser):
            raise TypeError("parser must be an instance of ContestParser")

        self.name = name.upper()
        self.team_list = parser.parse(filepaths)

        self.max_solved = 0
        self.institution_map = {}
        self.date = date.fromisoformat(date_string)

        for team in self.team_list:
            self.max_solved = max(self.max_solved, team.solved)

            if team.institution not in self.institution_map: self.institution_map[team.institution] = []
            self.institution_map[team.institution].append(team)

    def __lt__(self, other):
        if not isinstance(other, Contest): return NotImplemented
        return self.date < other.date

    def __repr__(self):
        return json.dumps({
            "name": self.name,
            "date": self.date.isoformat(),
            "max_solved": self.max_solved,
            "team_list": [team.__dict__ for team in self.team_list],
            "institution_map": {k: [team.__dict__ for team in v] for k, v in self.institution_map.items()}
        }, indent=4)

# Define Institution Class

In [None]:
class Institution:
    def __init__(self, name, alt_names=[]):
        self.name = name.upper()
        self.alt_names = [alt_name.upper() for alt_name in alt_names]
        self.contest_map = {}

    def add_contest(self, contest):
        name = self.name
        alt_names = self.alt_names
        institution_map = contest.institution_map
        contest_name = contest.name

        contest_team_list = []

        if name in institution_map:
            contest_team_list.extend(institution_map[name])

        for alt_name in alt_names:
            if alt_name in institution_map:
                contest_team_list.extend(institution_map[alt_name])

        if len(contest_team_list) > 0:
            self.contest_map[contest_name] = sorted(contest_team_list)

    def get_contest_teams(self, contest_name):
        if contest_name in self.contest_map:
            return self.contest_map[contest_name]
        return None

    def __repr__(self):
        return json.dumps({
            "name": self.name,
            "alt_names": self.alt_names,
            "contest_map": {
                contest_name: [team.__dict__ for team in teams]
                for contest_name, teams in self.contest_map.items()
            }
        }, indent=4)

# Load Data

In [None]:
def load_contests_from_json(file_path):
    contest_dir = "./input/grading/contest_files/"
    contests = {}

    with open(file_path, "r") as file:
        data = json.load(file)

        for contest in data["contests"]:
            name = contest["name"]
            filepaths = [contest_dir + filename for filename in contest["filenames"]]
            parser = ContestParser.get_parser(contest["parser"])
            date_string = contest["date"]
            contests[name] = Contest(name, filepaths, parser, date_string)

    return contests

def load_institutions_from_json(file_path):
    institutions = []

    with open(file_path, "r") as file:
        data = json.load(file)

        for institution_data in data["institutions"]:
            name = institution_data["name"]
            alt_names = institution_data.get("alt_names", [])
            institutions.append(Institution(name, alt_names))

    return institutions

In [None]:
contests_file_path = "./input/grading/contests.json"
contests = load_contests_from_json(contests_file_path)

institutions_file_path = "./input/grading/institutions.json"
institutions = load_institutions_from_json(institutions_file_path)

In [None]:
contest_list = sorted(contests.values())

In [None]:
for institution in institutions:
    for contest in contest_list:
        institution.add_contest(contest)

In [None]:
credits_file_path = "./input/grading/credits.json"
with open(credits_file_path, "r") as file: credits_map = json.load(file)

# Define Calculator Class

In [None]:
class GradeCalculator:
    def get_grade_point(self, institution, contest, contest_name):
        contest_teams = institution.get_contest_teams(contest_name)
        if not contest_teams: return None

        best4 = sorted(contest_teams)[:4]
        team_grades = []

        rank_decay_rate = 0.02
        for team in best4:
            grade = 4000 * (1 - rank_decay_rate) ** (team.rank - 1) * (team.solved / contest.max_solved) ** (1/2)

            team_grades.append(grade)

        k = 3.14159
        lk_norm = np.mean(np.array(team_grades) ** k) ** (1 / k)
        return lk_norm
    
    def get_cgpa(self, institution_list, contest_list, credits_map):
        contest_name_list = [c.name for c in reversed(contest_list)]
        institution_names = [inst.name for inst in institution_list]

        marksheet = pd.DataFrame(
            index=institution_names,
            columns=[contest_name + " Point" for contest_name in contest_name_list]
        )
        history_cgpa = pd.DataFrame(
            index=institution_names,
            columns=["Rating After " + contest_name for contest_name in contest_name_list]
        )

        time_decay_rate = 0.25
        time_decay_pulse = 91.3125  # 3 months

        weights_map = {}
        gp_l2 = {name: 0.0 for name in institution_names}
        credit_l2 = {name: 0.0 for name in institution_names}

        contests_in_processing = []
        for contest in contest_list:
            latest_date = contest.date
            contests_in_processing.append(contest)

            for contest_in_processing in contests_in_processing:
                weight = credits_map[contest_in_processing.name]

                period = (latest_date - contest_in_processing.date).days // time_decay_pulse
                weight *= ((1 - time_decay_rate) ** period)
                
                weights_map[contest_in_processing.name] = weight

            gp_l2 = {name: 0.0 for name in institution_names}
            credit_l2 = {name: 0.0 for name in institution_names}

            for contest_in_processing in contests_in_processing:
                weight = weights_map[contest_in_processing.name]
                contest_name = contest_in_processing.name

                for institution in institution_list:
                    gp = self.get_grade_point(institution, contest_in_processing, contest_name)
                    if contest_in_processing == contest:
                        marksheet.at[institution.name, contest_name + " Point"] = gp
                    if gp is not None:
                        gp_l2[institution.name] += (gp * weight) ** 2
                        credit_l2[institution.name] += weight ** 2

            for name in institution_names:
                if credit_l2[name] > 0:
                    cgpa = (gp_l2[name] / credit_l2[name]) ** 0.5
                else:
                    cgpa = None
                history_cgpa.at[name, "Rating After " + contest.name] = cgpa

        last_col = history_cgpa.columns[0]
        marksheet.insert(0, "Rating", history_cgpa[last_col])

        marksheet = marksheet.infer_objects(copy=False)
        history_cgpa = history_cgpa.infer_objects(copy=False)

        marksheet.sort_values(by="Rating", ascending=False, inplace=True)
        history_cgpa = history_cgpa.loc[marksheet.index]

        history_rank = history_cgpa.rank(axis=0, ascending=False, method='min')
        history_rank.columns = ["Rank After " + c.split("Rating After ")[-1] for c in history_cgpa.columns]
        
        marksheet = marksheet.round(0).astype('Int64')
        history_cgpa = history_cgpa.round(0).astype('Int64')
        history_rank = history_rank.astype('Int64')

        return marksheet, history_cgpa, history_rank

## Extra Calculators (Unused)

In [None]:
class ELOcalculator:
    def __init__(self, initial_rating=1500, k=32):
        self.initial_rating = initial_rating
        self.k = k

    def get_performance(self, institution, contest, contest_name):
        contest_teams = institution.get_contest_teams(contest_name)
        if not contest_teams:
            return None

        best4 = sorted(contest_teams)[:4]
        team_grades = []

        rank_decay_rate = 0.02
        for team in best4:
            grade = 4000 * (1 - rank_decay_rate) ** (team.rank - 1) * (team.solved / contest.max_solved) ** 0.5
            team_grades.append(grade)

        k_norm = 3.14159
        return (np.mean(np.array(team_grades) ** k_norm)) ** (1 / k_norm)

    def get_expected_performance(self, inst_rating, others_ratings):
        expected = 0.0
        for other_rating in others_ratings:
            expected += 1 / (1 + 10 ** ((other_rating - inst_rating) / 400))
        return expected / len(others_ratings) if others_ratings else 0.0

    def get_elo_rating(self, institution_list, contest_list, credits_map):
        contest_name_list = [c.name for c in reversed(contest_list)]
        institution_names = [inst.name for inst in institution_list]

        ratings = {name: self.initial_rating for name in institution_names}
        marksheet = pd.DataFrame(index=institution_names,
                                 columns=[contest_name + " Point" for contest_name in contest_name_list])
        history_elo = pd.DataFrame(index=institution_names,
                                   columns=["Rating After " + c.name for c in contest_list])

        for contest in contest_list:
            contest_name = contest.name
            credit = credits_map.get(contest_name, 1.0)
            actual_scores = {}
            current_ratings = {}

            for inst in institution_list:
                perf = self.get_performance(inst, contest, contest_name)
                if perf is not None:
                    actual_scores[inst.name] = perf
                    current_ratings[inst.name] = ratings[inst.name]
                    marksheet.at[inst.name, contest_name + " Point"] = perf

            if actual_scores:
                min_score = min(actual_scores.values())
                max_score = max(actual_scores.values())
                if max_score > min_score:
                    for name in actual_scores:
                        actual_scores[name] = (actual_scores[name] - min_score) / (max_score - min_score)
                else:
                    for name in actual_scores:
                        actual_scores[name] = 0.5

            for name in actual_scores:
                others = [r for n, r in current_ratings.items() if n != name]
                expected = self.get_expected_performance(current_ratings[name], others)
                actual = actual_scores[name]
                delta = self.k * credit * (actual - expected)
                ratings[name] += delta

            for name in institution_names:
                history_elo.at[name, "Rating After " + contest_name] = ratings[name]

        marksheet.insert(0, "Rating", history_elo.iloc[:, -1])
        marksheet = marksheet.infer_objects(copy=False)
        history_elo = history_elo.infer_objects(copy=False)

        marksheet.sort_values(by="Rating", ascending=False, inplace=True)
        history_elo = history_elo.loc[marksheet.index]

        history_rank = history_elo.rank(axis=0, ascending=False, method='min')
        history_rank.columns = ["Rank After " + c.name for c in contest_list]

        marksheet = marksheet.round(0).astype('Int64')
        history_elo = history_elo.round(0).astype('Int64')
        history_rank = history_rank.astype('Int64')

        contest_by_date = sorted(contest_list, key=lambda c: c.date, reverse=True)

        sorted_point_cols = [c.name + " Point" for c in contest_by_date]
        sorted_rating_cols = ["Rating After " + c.name for c in contest_by_date]
        sorted_rank_cols = ["Rank After " + c.name for c in contest_by_date]

        marksheet = marksheet[["Rating"] + sorted_point_cols]
        history_elo = history_elo[sorted_rating_cols]
        history_rank = history_rank[sorted_rank_cols]

        return marksheet, history_elo, history_rank

In [None]:
class AtCoderRatingCalculator:
    def __init__(self, center=1500, decay=0.9, max_perf=None):
        self.center = center
        self.decay = decay
        self.max_perf = max_perf

    def get_performance(self, institution, contest, contest_name):
        contest_teams = institution.get_contest_teams(contest_name)
        if not contest_teams:
            return None

        best4 = sorted(contest_teams)[:4]
        team_grades = []

        rank_decay_rate = 0.02
        for team in best4:
            grade = 4000 * (1 - rank_decay_rate) ** (team.rank - 1) * (team.solved / contest.max_solved) ** 0.5
            team_grades.append(grade)

        k_norm = 3.14159
        return (np.mean(np.array(team_grades) ** k_norm)) ** (1 / k_norm)

    def compute_f_penalty(self, n):
        if n == 0:
            return 1200
        numer = sum((0.81 ** i) for i in range(1, n + 1)) ** 0.5
        denom = sum((self.decay ** i) for i in range(1, n + 1))
        F_n = numer / denom

        F_1 = (0.81 ** 1) ** 0.5 / (self.decay ** 1)
        F_inf = (1 / (1 - 0.81)) ** 0.5 / (1 / (1 - self.decay))

        return ((F_n - F_inf) / (F_1 - F_inf)) * 1200

    def get_rating(self, institution_list, contest_list, credits_map):
        contest_name_list = [c.name for c in reversed(contest_list)]
        institution_names = [inst.name for inst in institution_list]

        history = {name: [] for name in institution_names}
        marksheet = pd.DataFrame(index=institution_names,
                                 columns=[contest.name + " Point" for contest in contest_list])
        history_rating = pd.DataFrame(index=institution_names,
                                      columns=["Rating After " + c.name for c in contest_list])

        for idx, contest in enumerate(contest_list):
            contest_name = contest.name
            credit = credits_map.get(contest_name, 1.0)

            for inst in institution_list:
                perf = self.get_performance(inst, contest, contest_name)
                if perf is None:
                    continue

                if self.max_perf is not None:
                    perf = min(perf, self.max_perf)

                history[inst.name].insert(0, (perf, credit))  # newest first
                marksheet.at[inst.name, contest_name + " Point"] = perf

                weighted_sum = 0
                weight_total = 0
                for i, (rperf, c) in enumerate(history[inst.name], 1):
                    weight = c * (self.decay ** i)
                    weighted_sum += rperf * weight
                    weight_total += weight

                if weight_total == 0:
                    avg_perf = self.center
                else:
                    avg_perf = weighted_sum / weight_total

                penalty = self.compute_f_penalty(len(history[inst.name]))
                rating = avg_perf - penalty
                history_rating.at[inst.name, "Rating After " + contest_name] = rating

        marksheet.insert(0, "Rating", history_rating.iloc[:, -1])
        marksheet = marksheet.infer_objects(copy=False)
        history_rating = history_rating.infer_objects(copy=False)

        marksheet.sort_values(by="Rating", ascending=False, inplace=True)
        history_rating = history_rating.loc[marksheet.index]

        history_rank = history_rating.rank(axis=0, ascending=False, method='min')
        history_rank.columns = ["Rank After " + c.name for c in contest_list]

        marksheet = marksheet.round(0).astype('Int64')
        history_rating = history_rating.round(0).astype('Int64')
        history_rank = history_rank.astype('Int64')

        contest_by_date = sorted(contest_list, key=lambda c: c.date, reverse=True)

        sorted_point_cols = [c.name + " Point" for c in contest_by_date]
        sorted_rating_cols = ["Rating After " + c.name for c in contest_by_date]
        sorted_rank_cols = ["Rank After " + c.name for c in contest_by_date]

        marksheet = marksheet[["Rating"] + sorted_point_cols]
        history_rating = history_rating[sorted_rating_cols]
        history_rank = history_rank[sorted_rank_cols]

        return marksheet, history_rating, history_rank

# Perform Calculations

In [None]:
calculator = GradeCalculator()
marksheet, history_cgpa, history_rank = calculator.get_cgpa(institutions, contest_list, credits_map)

In [None]:
display(marksheet)

In [None]:
display(history_cgpa)

# Export Results

In [None]:
os.makedirs('result', exist_ok=True)

In [None]:
with open("./result/contests.json", "w") as file:
    json.dump(json.loads(contest_list.__repr__()), file, indent=4)

In [None]:
with open("./result/institutions.json", "w") as file:
    json.dump(json.loads(institutions.__repr__()), file, indent=4)

In [None]:
marksheet_df = marksheet.reset_index()
marksheet_df.rename(columns={"index": "Institution"}, inplace=True)

marksheet_df.to_csv("./result/marksheet.csv", index=False)

In [None]:
history_cgpa_df = history_cgpa.reset_index()
history_cgpa_df.rename(columns={"index": "Institution"}, inplace=True)

history_cgpa_df.to_csv("./result/history_cgpa.csv", index=False)

In [None]:
history_rank_df = history_rank.reset_index()
history_rank_df.rename(columns={"index": "Institution"}, inplace=True)

history_rank_df.to_csv("./result/history_rank.csv", index=False)