In [1]:
from typing import Union

import numpy as np
import pandas as pd
from scipy.optimize import brentq

class ScoreMode():
    DEFAULT = "default"
    ALL_EQUAL = "all_equal"
    TRUSTED_ONLY = "trusted_only"
    
EPSILON = 1e-6  # convergence tolerance


def QrMed(W: float, w: Union[pd.Series, float], x: pd.Series, delta: pd.Series):
    """
    Quadratically regularized median

    Parameters:
        * `W`: Byzantine resilience parameter.
            The influence of a single contributor 'i' is bounded by (w_i/W)
        * `w`: voting rights vector
        * `x`: partial scores vector
        * `delta`: partial scores uncertainties vector
    """
    if len(x) == 0:
        return 0.0
    if isinstance(w, pd.Series):
        w = w.to_numpy()
    if isinstance(x, pd.Series):
        x = x.to_numpy()
    if isinstance(delta, pd.Series):
        delta = delta.to_numpy()
    delta_2 = delta ** 2

    def L_prime(m: float):
        x_minus_m = x - m
        return W * m - np.sum(w * x_minus_m / np.sqrt(delta_2 + x_minus_m ** 2))

    m_low = -1.0
    while L_prime(m_low) > 0:
        m_low *= 2

    m_up = 1.0
    while L_prime(m_up) < 0:
        m_up *= 2

    # Brent’s method is used as a faster alternative to usual bisection
    return brentq(L_prime, m_low, m_up, xtol=EPSILON)


def QrDev(
    W: float,
    default_dev: float,
    w: Union[pd.Series, float],
    x: pd.Series,
    delta: pd.Series,
    qr_med=None,
):
    """
    Quadratically regularized deviation, between x and their QrMed.
    Can be understood as a measure of polarization.
    """
    if qr_med is None:
        qr_med = QrMed(W, w, x, delta)
    return default_dev + QrMed(W, w, np.abs(x - qr_med) - default_dev, delta)


def QrUnc(
    W: float,
    default_dev: float,
    w: pd.Series,
    x: pd.Series,
    delta: pd.Series,
    qr_med=None,
):
    """
    Quadratically regularized uncertainty
    """
    if isinstance(w, pd.Series):
        w = w.to_numpy()
    if isinstance(x, pd.Series):
        x = x.to_numpy()
    if isinstance(delta, pd.Series):
        delta = delta.to_numpy()

    if qr_med is None:
        qr_med = QrMed(W, w, x, delta)
    qr_dev = QrDev(W, default_dev, w, x, delta, qr_med=qr_med)
    delta_2 = delta ** 2
    h = W + np.sum(
        w * np.minimum(1, delta_2 * (delta_2 + (x - qr_med) ** 2) ** (-3 / 2))
    )

    if h <= W:
        return qr_dev

    k = (h - W) ** (-1 / 2)
    return (np.exp(-qr_dev) * qr_dev + np.exp(-k) * k) / (np.exp(-qr_dev) + np.exp(-k))


def Clip(x: np.ndarray, center: float, radius: float):
    return x.clip(center - radius, center + radius)


def ClipMean(w: np.ndarray, x: np.ndarray, center: float, radius: float):
    return np.sum(w * Clip(x, center, radius)) / np.sum(w)


def BrMean(W: float, w: Union[float, np.ndarray], x: np.ndarray, delta: np.ndarray):
    """
    Byzantine-robustified mean
    """
    if len(x) == 0:
        return 0.0
    if isinstance(w, float):
        w = np.full(x.shape, w)
    return ClipMean(w, x, center=QrMed(4 * W, w, x, delta), radius=np.sum(w) / (4 * W))


In [None]:
from abc import ABC, abstractmethod
from typing import Optional

import pandas as pd


class MlInput(ABC):
    @abstractmethod
    def get_comparisons(
        self,
        trusted_only=False,
        criteria: Optional[str] = None,
        user_id: Optional[int] = None,
    ) -> pd.DataFrame:
        """Fetch data about comparisons submitted by users

        Returns:
        - comparisons_df: DataFrame with columns
            * `user_id`: int
            * `entity_a`: int or str
            * `entity_b`: int or str
            * `criteria`: str
            * `score`: float
            * `weight`: float
        """
        pass

    @abstractmethod
    def get_ratings_properties(self) -> pd.DataFrame:
        """Fetch data about contributor ratings properties

        Returns:
        - ratings_df: DataFrame with columns
            * `user_id`: int
            * `entity_id`: int or str
            * `is_public`: bool
            * `is_trusted`: bool
            * `is_supertrusted`: bool
        """
        pass
class MlInputFromPublicDataset(MlInput):
    def __init__(self, csv_file):
        self.public_dataset = pd.read_csv(csv_file)
        self.public_dataset.rename(
            {"video_a": "entity_a", "video_b": "entity_b"}, axis=1, inplace=True
        )
        self.public_dataset["user_id"], self.user_indices = self.public_dataset[
            "public_username"
        ].factorize()

    def get_comparisons(
        self, trusted_only=False, criteria=None, user_id=None
    ) -> pd.DataFrame:
        df = self.public_dataset.copy(deep=False)
        if criteria is not None:
            df = df[df.criteria == criteria]
        if user_id is not None:
            df = df[df.user_id == user_id]
        return df[["user_id", "entity_a", "entity_b", "criteria", "score", "weight"]]

    def get_ratings_properties(self):
        user_entities_pairs = pd.Series(
            iter(
                set(self.public_dataset.groupby(["user_id", "entity_a"]).indices.keys())
                | set(
                    self.public_dataset.groupby(["user_id", "entity_b"]).indices.keys()
                )
            )
        )
        df = pd.DataFrame([*user_entities_pairs], columns=["user_id", "entity_id"])
        df["is_public"] = True
        top_users = df.value_counts("user_id").index[:6]
        df["is_trusted"] = df["is_supertrusted"] = df["user_id"].isin(top_users)
        return df

In [None]:
W = 20.0

SCALING_WEIGHT_SUPERTRUSTED = W
SCALING_WEIGHT_TRUSTED = 1.0
SCALING_WEIGHT_NONTRUSTED = 0.0

VOTE_WEIGHT_TRUSTED_PUBLIC = 1.0
VOTE_WEIGHT_TRUSTED_PRIVATE = 0.5

TOTAL_VOTE_WEIGHT_NONTRUSTED_DEFAULT = 2.0  # w_⨯,default
TOTAL_VOTE_WEIGHT_NONTRUSTED_FRACTION = 0.1  # f_⨯

def get_global_scores(scaled_individual_scores: pd.DataFrame, score_mode: ScoreMode):
    df = scaled_individual_scores.copy(deep=False)

    if score_mode == ScoreMode.TRUSTED_ONLY:
        df = df[df["is_trusted"]]
        df["voting_weight"] = 1

    if score_mode == ScoreMode.ALL_EQUAL:
        df["voting_weight"] = 1

    if score_mode == ScoreMode.DEFAULT:
        # Voting weight for non trusted users will be computed per entity
        df["voting_weight"] = 0
        df["voting_weight"].mask(
            (df.is_trusted) & (df.is_public),
            VOTE_WEIGHT_TRUSTED_PUBLIC,
            inplace=True,
        )
        df["voting_weight"].mask(
            (df.is_trusted) & (~df.is_public),
            VOTE_WEIGHT_TRUSTED_PRIVATE,
            inplace=True,
        )

    global_scores = {}
    for (entity_id, scores) in df.groupby("entity_id"):
        if score_mode == ScoreMode.DEFAULT:
            trusted_weight = scores["voting_weight"].sum()
            non_trusted_weight = (
                TOTAL_VOTE_WEIGHT_NONTRUSTED_DEFAULT
                + TOTAL_VOTE_WEIGHT_NONTRUSTED_FRACTION * trusted_weight
            )
            nb_non_trusted_public = (
                scores["is_public"] & (~scores["is_trusted"])
            ).sum()
            nb_non_trusted_private = (
                ~scores["is_public"] & (~scores["is_trusted"])
            ).sum()

            if (nb_non_trusted_private > 0) or (nb_non_trusted_public > 0):
                scores["voting_weight"].mask(
                    scores["is_public"] & (scores["voting_weight"] == 0),
                    min(
                        VOTE_WEIGHT_TRUSTED_PUBLIC,
                        2
                        * non_trusted_weight
                        / (2 * nb_non_trusted_public + nb_non_trusted_private),
                    ),
                    inplace=True,
                )
                scores["voting_weight"].mask(
                    ~scores["is_public"] & (scores["voting_weight"] == 0),
                    min(
                        VOTE_WEIGHT_TRUSTED_PRIVATE,
                        non_trusted_weight
                        / (2 * nb_non_trusted_public + nb_non_trusted_private),
                    ),
                    inplace=True,
                )

        w = scores.voting_weight
        theta = scores.score
        delta = scores.uncertainty
        rho = QrMed(2 * W, w, theta, delta)
        rho_uncertainty = QrUnc(2 * W, 1, w, theta, delta, qr_med=rho)
        rho_deviation = QrDev(2 * W, 1, w, theta, delta, qr_med=rho)
        global_scores[entity_id] = {
            "score": rho,
            "uncertainty": rho_uncertainty,
            "deviation": rho_deviation,
        }

    if len(global_scores) == 0:
        return pd.DataFrame(columns=["entity_id", "score", "uncertainty", "deviation"])

    result = pd.DataFrame.from_dict(global_scores, orient="index")
    result.index.name = "entity_id"
    return result.reset_index()

In [None]:
def get_user_scaling_weights(ml_input: MlInput):
    ratings_properties = ml_input.get_ratings_properties()[
        ["user_id", "is_trusted", "is_supertrusted"]
    ]
    df = ratings_properties.groupby("user_id").first()
    df["scaling_weight"] = SCALING_WEIGHT_NONTRUSTED
    df["scaling_weight"].mask(
        df.is_trusted,
        SCALING_WEIGHT_TRUSTED,
        inplace=True,
    )
    df["scaling_weight"].mask(
        df.is_supertrusted,
        SCALING_WEIGHT_SUPERTRUSTED,
        inplace=True,
    )
    return df["scaling_weight"].to_dict()

def get_significantly_different_pairs(scores: pd.DataFrame):
    """
    Find the set of pairs of alternatives
    that are significantly different, according to the contributor scores.
    (Used for collaborative preference scaling)
    """
    scores = scores[["uid", "score", "uncertainty"]]
    left, right = np.triu_indices(len(scores), k=1)
    pairs = (
        scores.iloc[left]
        .reset_index(drop=True)
        .join(
            scores.iloc[right].reset_index(drop=True),
            lsuffix="_a",
            rsuffix="_b",
        )
    )
    pairs.set_index(["uid_a", "uid_b"], inplace=True)
    return pairs.loc[
        np.abs(pairs.score_a - pairs.score_b)
        >= 2 * (pairs.uncertainty_a + pairs.uncertainty_b)
    ]

def compute_scaling(
    df: pd.DataFrame,
    ml_input: MlInput,
    users_to_compute=None,
    reference_users=None,
    compute_uncertainties=False,
):
    scaling_weights = get_user_scaling_weights(ml_input)
    df = df.rename({"entity_id": "uid"}, axis=1)

    if users_to_compute is None:
        users_to_compute = set(df.user_id.unique())
    else:
        users_to_compute = set(users_to_compute)

    if reference_users is None:
        reference_users = set(df.user_id.unique())
    else:
        reference_users = set(reference_users)

    s_dict = {}
    delta_s_dict = {}

    for (user_n, user_scores) in df[df.user_id.isin(users_to_compute)].groupby(
        "user_id"
    ):
        s_nqm = []
        delta_s_nqm = []
        s_weights = []

        ABn_all = get_significantly_different_pairs(user_scores)
        user_scores_uids = set(ABn_all.index.get_level_values("uid_a")) | set(
            ABn_all.index.get_level_values("uid_b")
        )

        for (user_m, m_scores) in df[
            df.user_id.isin(reference_users - {user_n})
        ].groupby("user_id"):
            common_uids = user_scores_uids.intersection(m_scores.uid)

            if len(common_uids) == 0:
                continue

            m_scores = m_scores[m_scores.uid.isin(common_uids)]
            ABm = get_significantly_different_pairs(m_scores)
            ABnm = ABn_all.join(ABm, how="inner", lsuffix="_n", rsuffix="_m")
            if len(ABnm) == 0:
                continue
            s_nqmab = np.abs(ABnm.score_a_m - ABnm.score_b_m) / np.abs(
                ABnm.score_a_n - ABnm.score_b_n
            )

            delta_s_nqmab = (
                (
                    np.abs(ABnm.score_a_m - ABnm.score_b_m)
                    + ABnm.uncertainty_a_m
                    + ABnm.uncertainty_b_m
                )
                / (
                    np.abs(ABnm.score_a_n - ABnm.score_b_n)
                    - ABnm.uncertainty_a_n
                    - ABnm.uncertainty_b_n
                )
            ) - s_nqmab

            s = QrMed(1, 1, s_nqmab, delta_s_nqmab)
            s_nqm.append(s)
            delta_s_nqm.append(QrUnc(1, 1, 1, s_nqmab, delta_s_nqmab, qr_med=s))
            s_weights.append(scaling_weights[user_m])

        s_weights = np.array(s_weights)
        theta_inf = np.max(user_scores.score.abs())
        s_nqm = np.array(s_nqm)
        delta_s_nqm = np.array(delta_s_nqm)
        if compute_uncertainties:
            qr_med = QrMed(8 * W * theta_inf, s_weights, s_nqm - 1, delta_s_nqm)
            s_dict[user_n] = 1 + qr_med
            delta_s_dict[user_n] = QrUnc(
                8 * W * theta_inf, 1, s_weights, s_nqm - 1, delta_s_nqm, qr_med=qr_med
            )
        else:
            # When dealing with a sufficiently trustworthy set of users
            # and we don't need to compute uncertainties, `BrMean`can be used
            # to be closer to the "sparse unanimity conditions" discussed in
            # [Robust sparse voting](https://arxiv.org/abs/2202.08656)
            s_dict[user_n] = 1 + BrMean(
                8 * W * theta_inf, s_weights, s_nqm - 1, delta_s_nqm
            )

    tau_dict = {}
    delta_tau_dict = {}
    for (user_n, user_scores) in df[df.user_id.isin(users_to_compute)].groupby(
        "user_id"
    ):
        tau_nqm = []
        delta_tau_nqm = []
        s_weights = []
        for (user_m, m_scores) in df[
            df.user_id.isin(reference_users - {user_n})
        ].groupby("user_id"):
            common_uids = list(set(user_scores.uid).intersection(m_scores.uid))

            if len(common_uids) == 0:
                continue

            m_scores = m_scores.set_index("uid").loc[common_uids]
            n_scores = user_scores.set_index("uid").loc[common_uids]

            tau_nqmab = (
                s_dict.get(user_m, 1) * m_scores.score - s_dict[user_n] * n_scores.score
            )
            delta_tau_nqmab = (
                s_dict[user_n] * n_scores.uncertainty
                + s_dict.get(user_m, 1) * m_scores.uncertainty
            )

            tau = QrMed(1, 1, tau_nqmab, delta_tau_nqmab)
            tau_nqm.append(tau)
            delta_tau_nqm.append(QrUnc(1, 1, 1, tau_nqmab, delta_tau_nqmab, qr_med=tau))
            s_weights.append(scaling_weights[user_m])

        s_weights = np.array(s_weights)
        tau_nqm = np.array(tau_nqm)
        delta_tau_nqm = np.array(delta_tau_nqm)

        if compute_uncertainties:
            qr_med = QrMed(8 * W, s_weights, tau_nqm, delta_tau_nqm)
            tau_dict[user_n] = qr_med
            delta_tau_dict[user_n] = QrUnc(
                8 * W, 1, s_weights, tau_nqm, delta_tau_nqm, qr_med=qr_med
            )
        else:
            tau_dict[user_n] = BrMean(8 * W, s_weights, tau_nqm, delta_tau_nqm)

    return pd.DataFrame(
        {
            "s": s_dict,
            "tau": tau_dict,
            **(
                {"delta_s": delta_s_dict, "delta_tau": delta_tau_dict}
                if compute_uncertainties
                else {}
            ),
        }
    )

def get_scaling_for_supertrusted(ml_input: MlInput, individual_scores: pd.DataFrame):
    rp = ml_input.get_ratings_properties()
    rp.set_index(["user_id", "entity_id"], inplace=True)
    rp = rp[rp.is_supertrusted]
    df = individual_scores.join(rp, on=["user_id", "entity_id"], how="inner")
    return compute_scaling(df, ml_input=ml_input)
    
def compute_scaled_scores(
    ml_input: MlInput, individual_scores: pd.DataFrame
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Returns:
        - scaled individual scores: Dataframe with columns
            * `user_id`
            * `entity_id`
            * `score`
            * `uncertainty`
            * `is_public`
            * `is_trusted`
            * `is_supertrusted`
        - scalings: DataFrame with index `entity_id` and columns:
            * `s`: scaling factor
            * `tau`: translation value
            * `delta_s`: uncertainty on `s`
            * `delta_tau`: uncertainty on `tau`
    """
    if len(individual_scores) == 0:
        scores = pd.DataFrame(
            columns=[
                "user_id",
                "entity_id",
                "score",
                "uncertainty",
                "is_public",
                "is_trusted",
                "is_supertrusted",
            ]
        )
        scalings = pd.DataFrame(columns=["s", "tau", "delta_s", "delta_tau"])
        return scores, scalings

    supertrusted_scaling = get_scaling_for_supertrusted(ml_input, individual_scores)
    rp = ml_input.get_ratings_properties()

    non_supertrusted_users = rp["user_id"][~rp.is_supertrusted].unique()
    supertrusted_users = rp["user_id"][rp.is_supertrusted].unique()

    rp.set_index(["user_id", "entity_id"], inplace=True)
    df = individual_scores.join(rp, on=["user_id", "entity_id"], how="left")
    df["is_public"].fillna(False, inplace=True)
    df["is_trusted"].fillna(False, inplace=True)
    df["is_supertrusted"].fillna(False, inplace=True)

    df = df.join(supertrusted_scaling, on="user_id")
    df["s"].fillna(1, inplace=True)
    df["tau"].fillna(0, inplace=True)
    df["score"] = df["s"] * df["score"] + df["tau"]
    df["uncertainty"] *= df["s"]
    df.drop(["s", "tau"], axis=1, inplace=True)
    
    non_supertrusted_scaling = compute_scaling(
        df,
        ml_input=ml_input,
        users_to_compute=non_supertrusted_users,
        reference_users=supertrusted_users,
        compute_uncertainties=True,
    )

    df = df.join(non_supertrusted_scaling, on="user_id")
    df["s"].fillna(1, inplace=True)
    df["tau"].fillna(0, inplace=True)
    df["delta_s"].fillna(0, inplace=True)
    df["delta_tau"].fillna(0, inplace=True)
    df["uncertainty"] = (
        df["s"] * df["uncertainty"]
        + df["delta_s"] * df["score"].abs()
        + df["delta_tau"]
    )
    df["score"] = df["score"] * df["s"] + df["tau"]
    df.drop(["s", "tau", "delta_s", "delta_tau"], axis=1, inplace=True)

    all_scalings = pd.concat([supertrusted_scaling, non_supertrusted_scaling])
    return df, all_scalings

In [None]:
R_MAX = 10  # Maximum score for a comparison in the input
ALPHA = 0.01  # Signal-to-noise hyperparameter
def compute_individual_score(scores: pd.DataFrame):
    """
    Computation of contributor scores and score uncertainties,
    based on their comparisons.

    At this stage, scores will not be normalized between contributors.
    """
    scores = scores[["entity_a", "entity_b", "score"]]
    scores_sym = pd.concat(
        [
            scores,
            pd.DataFrame(
                {
                    "entity_a": scores.entity_b,
                    "entity_b": scores.entity_a,
                    "score": -1 * scores.score,
                }
            ),
        ]
    )

    # "Comparison tensor": matrix with all comparisons, values in [-R_MAX, R_MAX]
    r = scores_sym.pivot(index="entity_a", columns="entity_b", values="score")

    r_tilde = r / (1.0 + R_MAX)
    r_tilde2 = r_tilde ** 2

    # r.loc[a:b] is negative when a is prefered to b.
    l = -1.0 * r_tilde / np.sqrt(1.0 - r_tilde2)  # noqa: E741
    k = (1.0 - r_tilde2) ** 3

    L = k.mul(l).sum(axis=1)
    K_diag = pd.DataFrame(
        data=np.diag(k.sum(axis=1) + ALPHA),
        index=k.index,
        columns=k.index,
    )
    K = K_diag.sub(k, fill_value=0)

    # theta_star = K^-1 * L
    theta_star_numpy = np.linalg.solve(K, L)
    theta_star = pd.Series(theta_star_numpy, index=L.index)

    # Compute uncertainties
    theta_star_ab = pd.DataFrame(
        np.subtract.outer(theta_star_numpy, theta_star_numpy),
        index=theta_star.index,
        columns=theta_star.index,
    )
    sigma2 = (1.0 + (np.nansum(k * (l - theta_star_ab) ** 2) / 2)) / len(scores)
    delta_star = pd.Series(np.sqrt(sigma2) / np.sqrt(np.diag(K)), index=K.index)

    result = pd.DataFrame(
        {
            "score": theta_star,
            "uncertainty": delta_star,
        }
    )
    result.index.name = "entity_id"
    return result


def get_individual_scores(
    ml_input: MlInput, criteria: str, single_user_id: Optional[int] = None
) -> pd.DataFrame:
    comparisons_df = ml_input.get_comparisons(criteria=criteria, user_id=single_user_id)

    individual_scores = []
    for (user_id, user_comparisons) in comparisons_df.groupby("user_id"):
        scores = compute_individual_score(user_comparisons)
        if scores is None:
            continue
        scores["user_id"] = user_id
        individual_scores.append(scores.reset_index())

    if len(individual_scores) == 0:
        return pd.DataFrame(columns=["user_id", "entity_id", "score", "uncertainty"])

    result = pd.concat(individual_scores, ignore_index=True, copy=False)
    return result[["user_id", "entity_id", "score", "uncertainty"]]


In [None]:


def _run_mehestan_for_criterion(criteria: str, ml_input: MlInput):
    indiv_scores = get_individual_scores(ml_input, criteria)
    scaled_scores, scalings = compute_scaled_scores(
        ml_input, individual_scores=indiv_scores
    )
    indiv_scores["criteria"] = criteria
    for mode in ScoreMode:
        global_scores = get_global_scores(scaled_scores, score_mode=mode)
        global_scores["criteria"] = criteria

In [None]:
def run_mehestan(ml_input: MlInput):
    list_criteria=[
    'largely_recommended',
	'reliability',
	'pedagogy',
	'importance',	
	'layman_friendly',	
	'entertaining_relaxing',	
	'engaging',	
	'diversity_inclusion',	
	'better_habits',	
	'backfire_risk',
    ]
    for criteria in list_criteria:
        _run_mehestan_for_criterion(criteria,ml_input)

In [None]:
ml_input=MlInputFromPublicDataset("tournesol_public_export.csv")
run_mehestan(ml_input)