In [None]:
import pandas as pd

In [None]:
def save_dataset(dataset, path, sep=';', index=False, encoding='utf-8-sig'):
    dataset.to_csv(path, sep=sep, index=index, encoding=encoding)

def read_dataset(path, sep=';', encoding='utf-8-sig', low_memory=False):
    return pd.read_csv(path, sep=sep, encoding=encoding, low_memory=low_memory)

In [None]:
is_na = {'зач.': (70, 1), 'неуваж.': (0, 1), 'недсд.': (0, 1), '4': (70, 1), 'недоп.': (0, 1), '5': (90, 1), 'незач.': (20, 1), '2': (20, 1), '3': (50, 1)}

In [None]:
def power_penalty_score(student_scores, subject_stats, p=2.0):
    total_score = 0
    subject_count = 0

    for subject, student_score in student_scores.items():
        stats = subject_stats[subject]
        scores = stats['scores']
        total_students = stats['total_students']

        non_zero_scores = [s for s in scores if s > 0]
        num_fails = total_students - len(non_zero_scores)

        if len(non_zero_scores) == 0 or total_students == 0:
            continue

        mean_clean = sum(non_zero_scores) / len(non_zero_scores)
        fail_ratio = num_fails / total_students

        if student_score < 40:
            if 1 / (fail_ratio + 1e-6) <= 0:
                print(1 / (fail_ratio + 1e-6))
                print(f'fail_ratio: {fail_ratio}')
                print(f'num_fails: {num_fails}')
                print(f'non_zero_scores: {len(non_zero_scores)}')
                print(f'total_students: {total_students}')
                print(f"subject: {subject}")
            multiplier = 1 + math.log(1 / (fail_ratio + 1e-6))
            adjusted = (mean_clean ** p) * multiplier
        else:
            delta = mean_clean - student_score
            signed_power_delta = math.copysign(abs(delta) ** p, delta)
            adjusted = signed_power_delta * fail_ratio

        total_score += adjusted
        subject_count += 1

    if subject_count == 0:
        return None

    return total_score / subject_count

In [None]:
def make_subject_stats(df):
    ans = {}
    for index, row in df.iterrows():
        subj = row["Наименование дисциплины"]
        student = row["UUID студента"]
        score = row["Балл"]
        oc = row["Оценка"]
        
        if subj not in ans:
            ans[subj] = {'scores': [], 'total_students': 0}
        if pd.isna(score):
            score = is_na[oc][0]
        ans[subj]['scores'].append(score)
        ans[subj]['total_students'] += 1
    return ans

In [None]:
def process_scores(df):
    ans = {}
    subject_stats = make_subject_stats(df)

    current_student = None
    student_data = {}

    for index, row in df.iterrows():
        student = row["UUID студента"]
        subject = row["Наименование дисциплины"]
        score = row["Балл"]
        oc = row["Оценка"]

        if student != current_student:
            if current_student is not None:
                ans[current_student] = power_penalty_score(student_data, subject_stats)
            current_student = student
            student_data = {}
        if pd.isna(score):
            score = is_na[oc][0]
        student_data[subject] = score
    
    if current_student is not None:
        ans[current_student] = power_penalty_score(student_data, subject_stats)
    return ans

In [None]:
def check_nuniques(df):
    nuniques = df.groupby("UUID студента").nunique()
    for column in nuniques.columns.to_list():
        print(f"{column} - {nuniques[column].nunique()} - {nuniques[column].unique()}")

In [None]:
import bisect

def get_position_with_binary_search(new_rating, sorted_ratings_desc):
    """
    Возвращает позицию нового студента в рейтинге на основе бинарного поиска.
    
    sorted_ratings_desc: list
        Список рейтингов, отсортированный по убыванию.
    new_rating: float or int
        Рейтинг нового студента.
    """
    # bisect.bisect_left работает на списках по возрастанию
    # Поэтому переворачиваем логику
    index = bisect.bisect_left(sorted_ratings_desc, new_rating, lo=0, hi=len(sorted_ratings_desc))
    
    return index + 1  # Позиции начинаются с 1