In [1]:
import pandas as pd
from pathlib import Path

filepath = "eval_dataset_peer_review.csv"

df = pd.read_csv(Path(filepath), index_col=0)
df

Unnamed: 0_level_0,query,concluded,reviewer 1,reviewer 2,labeled_at,Column3
Column1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,síndrome de Cockett,True,K94,K73,10/15/23 11:19,
1,Dormencia,True,N05|N06,N05|N06|S01,10/15/23 11:21,
2,preferecia,True,,,10/15/23 11:21,
3,espasmo no olho,True,F16|F95,F16|N08,10/15/23 11:23,
4,E790,True,T99,T99,10/15/23 11:24,
...,...,...,...,...,...,...
7507,busca ativa,False,,,,
7508,inflamação,False,,,,
7509,tireoid,False,,,,
7510,dor,False,,,,


In [2]:
completed_queries = df[df["concluded"]==True].fillna('').drop(columns=["Column3"])
completed_queries["total_agreement"] = False
completed_queries["partial_agreement"] = False
completed_queries["no_agreement"] = False
completed_queries


Unnamed: 0_level_0,query,concluded,reviewer 1,reviewer 2,labeled_at,total_agreement,partial_agreement,no_agreement
Column1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,síndrome de Cockett,True,K94,K73,10/15/23 11:19,False,False,False
1,Dormencia,True,N05|N06,N05|N06|S01,10/15/23 11:21,False,False,False
2,preferecia,True,,,10/15/23 11:21,False,False,False
3,espasmo no olho,True,F16|F95,F16|N08,10/15/23 11:23,False,False,False
4,E790,True,T99,T99,10/15/23 11:24,False,False,False
...,...,...,...,...,...,...,...,...
432,sangramento,True,A10,A10,1/12/24 20:07,False,False,False
433,dor coluna,True,L03|L86|L02|L83|L84,L02|L03,1/12/24 20:12,False,False,False
434,conjutivite viral,True,F70,F70|F71,1/12/24 20:12,False,False,False
435,nao encontrado,True,,,1/12/24 20:12,False,False,False


In [3]:
completed_queries_records = completed_queries.to_dict('records')

records_with_updated_agreements = []

for completed_query in completed_queries_records:
    reviewer_1_codes = set([code for code in completed_query["reviewer 1"].split("|") if code != ''])
    reviewer_2_codes = set([code for code in completed_query["reviewer 2"].split("|") if code != ''])


    if reviewer_1_codes == reviewer_2_codes:
        completed_query["total_agreement"] = True

    elif len(reviewer_1_codes.intersection(reviewer_2_codes)) == 0:
        completed_query["partial_agreement"] = True

    else:
        completed_query["no_agreement"] = True

    records_with_updated_agreements.append(completed_query)

records_with_updated_agreements_df = pd.DataFrame.from_records(records_with_updated_agreements)
records_with_updated_agreements_df

Unnamed: 0,query,concluded,reviewer 1,reviewer 2,labeled_at,total_agreement,partial_agreement,no_agreement
0,síndrome de Cockett,True,K94,K73,10/15/23 11:19,False,True,False
1,Dormencia,True,N05|N06,N05|N06|S01,10/15/23 11:21,False,False,True
2,preferecia,True,,,10/15/23 11:21,True,False,False
3,espasmo no olho,True,F16|F95,F16|N08,10/15/23 11:23,False,False,True
4,E790,True,T99,T99,10/15/23 11:24,True,False,False
...,...,...,...,...,...,...,...,...
432,sangramento,True,A10,A10,1/12/24 20:07,True,False,False
433,dor coluna,True,L03|L86|L02|L83|L84,L02|L03,1/12/24 20:12,False,False,True
434,conjutivite viral,True,F70,F70|F71,1/12/24 20:12,False,False,True
435,nao encontrado,True,,,1/12/24 20:12,True,False,False


In [4]:
cols = ["total_agreement", "partial_agreement", "no_agreement"]
total = len(records_with_updated_agreements_df)

for col in cols:
    s = records_with_updated_agreements_df[col].sum()
    percentage = s/total
    print(f"""
Column:         {col}
Sum:            {s}
Total:          {total}
Percentage:     {percentage*100:.1f}""")


Column:         total_agreement
Sum:            230
Total:          437
Percentage:     52.6

Column:         partial_agreement
Sum:            93
Total:          437
Percentage:     21.3

Column:         no_agreement
Sum:            114
Total:          437
Percentage:     26.1


In [5]:
records_with_updated_agreements_df.to_csv('agreements.csv')

In [9]:
import numpy as np
import pandas as pd

def proportional_agreement(row_codes_1, row_codes_2):
    """
    Calculate the proportion of agreement for two sets of codes.

    Parameters:
        row_codes_1 (set): Codes from reviewer 1.
        row_codes_2 (set): Codes from reviewer 2.

    Returns:
        float: Proportion of agreement (0 to 1).
    """
    if not row_codes_1 and not row_codes_2:  # Both are empty
        return 1.0  # Perfect agreement when both are empty

    if not row_codes_1 or not row_codes_2:  # One is empty
        return 0.0  # No agreement if one is empty and the other is not

    # Calculate the proportion of agreement
    intersection = len(row_codes_1 & row_codes_2)
    union = len(row_codes_1 | row_codes_2)
    return intersection / union if union > 0 else 0

def calculate_kappa(data):
    """
    Calculate Cohen's Kappa statistic using proportional agreement.

    Parameters:
        data (pd.DataFrame): DataFrame containing reviewer codes in columns 'reviewer 1' and 'reviewer 2'.

    Returns:
        float: Cohen's Kappa statistic.
    """
    # Preprocess data: split reviewer codes into sets
    data['reviewer_1_codes'] = data['reviewer 1'].apply(lambda x: set(x.split('|')) if isinstance(x, str) else set())
    data['reviewer_2_codes'] = data['reviewer 2'].apply(lambda x: set(x.split('|')) if isinstance(x, str) else set())

    # Compute proportional agreement for all rows
    data['proportional_agreement'] = data.apply(
        lambda row: proportional_agreement(row['reviewer_1_codes'], row['reviewer_2_codes']), axis=1
    )

    # Calculate observed agreement as the mean of proportional agreements
    observed_agreement = data['proportional_agreement'].mean()

    # Calculate expected agreement (p_e)
    reviewer_1_counts = data['reviewer_1_codes'].explode().value_counts()
    reviewer_2_counts = data['reviewer_2_codes'].explode().value_counts()
    total_codes = reviewer_1_counts.sum()

    expected_agreement = sum(
        (reviewer_1_counts.get(code, 0) / total_codes) * (reviewer_2_counts.get(code, 0) / total_codes)
        for code in set(reviewer_1_counts.index).union(reviewer_2_counts.index)
    )

    # Calculate Cohen's Kappa
    kappa = (observed_agreement - expected_agreement) / (1 - expected_agreement) if (1 - expected_agreement) > 0 else 0

    return kappa


In [10]:
kappa = calculate_kappa(records_with_updated_agreements_df)
print(f"Cohen's Kappa: {kappa:.3f}")

Cohen's Kappa: 0.637
