In [17]:
import math
import typing as t
from pathlib import Path

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

In [18]:
tqdm.pandas()

In [19]:
ROOT_DIR = Path('/root/data')
DATASET_ROOT_DIR_PATH = ROOT_DIR / 'datasets'
DATASET_DIR_PATH = DATASET_ROOT_DIR_PATH / 'fp-0p925'

In [20]:
def _load_train_test_df(data_dir: Path) -> t.Tuple[pd.DataFrame, pd.DataFrame]:
    train_df = pd.read_csv(data_dir / 'TRAIN_DB.csv')
    train_score_df = pd.read_csv(data_dir / 'microsoft-deberta-v3-base-v7-meta-tti-valfreq_0p1-focal_gamma_0p3-valid.csv')
    train_df = train_df.merge(train_score_df, left_on='discourse_id', right_on='id')

    test_df = pd.read_csv(data_dir / 'TEST_DB.csv')
    test_score_df = pd.read_csv(data_dir / 'microsoft-deberta-v3-base-v7-meta-tti-valfreq_0p1-focal_gamma_0p3.csv')
    test_df = test_df.merge(test_score_df, left_on='discourse_id', right_on='id')

    return train_df, test_df

train_df, test_df = _load_train_test_df(DATASET_DIR_PATH)

In [21]:
def _get_loss(row: t.Dict[str, t.Any]) -> float:
    (
        disc_eff,
        score_ineff,
        score_adeq,
        score_eff,
    ) = (
        str(row['discourse_effectiveness']),
        float(row['score_ineffective']),
        float(row['score_adequate']),
        float(row['score_effective']),
    )
    if disc_eff == 'Ineffective':
        return -math.log(score_ineff)
    if disc_eff == 'Adequate':
        return -math.log(score_adeq)
    if disc_eff == 'Effective':
        return -math.log(score_eff)
    raise ValueError(f'Unknown disc_eff = {disc_eff}')


def get_loss(df: pd.DataFrame) -> float:
    return df.apply(_get_loss, axis=1).mean()

In [22]:
get_loss(train_df)

0.6015403506132903

In [45]:
def up_lowest_if_uncertain(df: pd.DataFrame, min_certainty: float, min_score_threshold: float, dt_set: t.Set[str]) -> pd.DataFrame:
    new_row_list = []
    row: t.Dict[str, float]
    num_affected = 0
    it = tqdm(df.iterrows(), total=len(df))
    for _, row in it:
        (
            dt,
            score_ineff,
            score_adq,
            score_eff,
        ) = (
            str(row['discourse_type']),
            float(row['score_ineffective']),
            float(row['score_adequate']),
            float(row['score_effective']),
        )
        max_score = max([score_ineff, score_adq, score_eff])
        min_score = min([score_ineff, score_adq, score_eff])
        new_row: t.Dict[str, float] = {**row}
        if max_score >= min_certainty or min_score >= min_score_threshold or dt not in dt_set:
            new_row_list.append(new_row)
            continue
        min_delta = min_score_threshold - min_score
        assert min_delta > 0
        if min_score == score_ineff:
            new_row['score_ineffective'] = score_ineff + min_delta
            new_row['score_adequate'] = score_adq - min_delta / 2
            new_row['score_effective'] = score_eff - min_delta / 2
        elif min_score == score_adq:
            new_row['score_ineffective'] = score_ineff - min_delta / 2
            new_row['score_adequate'] = score_adq + min_delta
            new_row['score_effective'] = score_eff - min_delta / 2
        else:
            new_row['score_ineffective'] = score_ineff - min_delta / 2
            new_row['score_adequate'] = score_adq - min_delta / 2
            new_row['score_effective'] = score_eff + min_delta
        num_affected += 1
        it.set_description(f'affected: {num_affected / len(df):.4f}')
        new_row_list.append(new_row)
    return pd.DataFrame(new_row_list)


In [53]:
get_loss(up_lowest_if_uncertain(train_df, min_certainty=0.55, min_score_threshold=0.05, dt_set={'Claim'}))

  0%|          | 0/34088 [00:00<?, ?it/s]

0.6018796304756154

In [40]:
debug_train_df = train_df.copy()
debug_train_df['loss'] = debug_train_df.apply(_get_loss, axis=1)

In [42]:
debug_train_df.sort_values('loss', ascending=False)[['discourse_type', 'discourse_effectiveness', 'score_ineffective', 'score_adequate', 'score_effective', 'loss']].head(n=25)

Unnamed: 0,discourse_type,discourse_effectiveness,score_ineffective,score_adequate,score_effective,loss
10418,Lead,Ineffective,0.001432,0.141906,0.856662,6.548365
4578,Claim,Ineffective,0.001733,0.094916,0.903351,6.358136
6457,Claim,Ineffective,0.002047,0.117202,0.880752,6.191499
6456,Claim,Ineffective,0.002205,0.128517,0.869278,6.117067
17339,Evidence,Ineffective,0.003247,0.133073,0.86368,5.730129
29330,Evidence,Ineffective,0.003978,0.205648,0.790374,5.526866
12751,Claim,Effective,0.293273,0.702706,0.004022,5.516078
15502,Lead,Effective,0.462632,0.533292,0.004076,5.502739
12748,Evidence,Effective,0.200464,0.795432,0.004104,5.495737
29822,Position,Ineffective,0.004162,0.401045,0.594793,5.481864


In [43]:
debug_train_df.sort_values('loss', ascending=True)[['discourse_type', 'discourse_effectiveness', 'score_ineffective', 'score_adequate', 'score_effective', 'loss']].head(n=25)

Unnamed: 0,discourse_type,discourse_effectiveness,score_ineffective,score_adequate,score_effective,loss
631,Evidence,Ineffective,0.97801,0.019312,0.002678,0.022235
8689,Evidence,Ineffective,0.97644,0.020885,0.002675,0.023842
3527,Evidence,Ineffective,0.976099,0.020848,0.003054,0.024192
3689,Evidence,Ineffective,0.973964,0.022356,0.00368,0.026381
10232,Evidence,Ineffective,0.972112,0.025142,0.002746,0.028284
12952,Evidence,Ineffective,0.971955,0.025326,0.002719,0.028445
14510,Evidence,Ineffective,0.971619,0.025111,0.00327,0.028791
9992,Evidence,Ineffective,0.97152,0.025106,0.003373,0.028893
2763,Evidence,Ineffective,0.971497,0.024766,0.003737,0.028917
856,Evidence,Effective,0.000902,0.02763,0.971468,0.028947
