## Imports

In [1]:
from google.colab import drive
drive.mount('/content/drive')

import numpy as np
from sklearn.metrics import cohen_kappa_score
import pandas as pd

SEED = 312
N_BOOTSTRAP = 5000

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
ROOT_PATH = ""
researcher1_PATH = ROOT_PATH + ""
researcher2_PATH = ROOT_PATH + ""

## Rules (FA2)

In [3]:
rules_cols = [
    "lt_cond_score", "lt_absorb_score", "lt_runoff_score",
    "et_cond_score", "et_absorb_score", "et_runoff_score",
    "gt_cond_score", "gt_absorb_score", "gt_runoff_score",
]

In [None]:
df_researcher1_fa2 = pd.read_csv(researcher1_PATH + "FA2_Rules.csv")
df_researcher1_fa2 = df_researcher1_fa2.dropna(subset=["XYZ ID"])

assert not df_researcher1_fa2[rules_cols].isna().any().any()

df_researcher1_fa2["score"] = df_researcher1_fa2[rules_cols].sum(axis=1)

df_researcher1_fa2.head()

In [None]:
df_researcher2_fa2 = pd.read_csv(researcher2_PATH + "FA2_Rules.csv")
df_researcher2_fa2 = df_researcher2_fa2.dropna(subset=["XYZ ID"])

assert not df_researcher2_fa2[rules_cols].isna().any().any()

df_researcher2_fa2["score"] = df_researcher2_fa2[rules_cols].sum(axis=1)

df_researcher2_fa2.head()

In [None]:
assert list(df_researcher1_fa2["XYZ ID"]) == list(df_researcher2_fa2["XYZ ID"])

rules_researcher1_scores, rules_researcher2_scores = df_researcher1_fa2["score"], df_researcher2_fa2["score"]

print(cohen_kappa_score(rules_researcher1_scores,rules_researcher2_scores,weights='quadratic'))

0.8663017982799062


## Debug (FA3)

In [7]:
debug_cols = [
    "incorrectly_set_absorption_limit_score",
    "rainfall_compared_to_absorption_score",
    "absorption_set_to_absorption_limit_score",
    "greater_than_nested_in_less_than_score",
    "absorption_limit_swapped_with_absorption_score"
]

In [None]:
df_researcher1_fa3 = pd.read_csv(researcher1_PATH + "FA3_Debugging.csv")
df_researcher1_fa3 = df_researcher1_fa3.dropna(subset=["XYZ ID"])

assert not df_researcher1_fa3[debug_cols].isna().any().any()

df_researcher1_fa3["score"] = df_researcher1_fa3[debug_cols].sum(axis=1)

df_researcher1_fa3.head()

In [None]:
df_researcher2_fa3 = pd.read_csv(researcher2_PATH + "FA3_Debugging.csv")
df_researcher2_fa3 = df_researcher2_fa3.dropna(subset=["XYZ ID"])

assert not df_researcher2_fa3[debug_cols].isna().any().any()

df_researcher2_fa3["score"] = df_researcher2_fa3[debug_cols].sum(axis=1)

df_researcher2_fa3.head()

In [None]:
assert list(df_researcher1_fa3["XYZ ID"]) == list(df_researcher2_fa3["XYZ ID"])

debug_researcher1_scores, debug_researcher2_scores = df_researcher1_fa3["score"], df_researcher2_fa3["score"]

print(cohen_kappa_score(debug_researcher1_scores,debug_researcher2_scores,weights='quadratic'))

0.9411764705882353


## Engineering (FA4)

In [None]:
df_researcher1_fa4 = pd.read_csv(researcher1_PATH + "FA4_Engineering.csv")
df_researcher1_fa4 = df_researcher1_fa4.dropna(subset=["XYZ ID"])
df_researcher1_fa4 = df_researcher1_fa4.rename(columns={"Score": "score"})

assert not df_researcher1_fa4["score"].isna().any()

df_researcher1_fa4.head()

In [None]:
df_researcher2_fa4 = pd.read_csv(researcher2_PATH + "FA4_Engineering.csv")
df_researcher2_fa4 = df_researcher2_fa4.dropna(subset=["XYZ ID"])
df_researcher2_fa4 = df_researcher2_fa4.rename(columns={"Score": "score"})

assert not df_researcher2_fa4["score"].isna().any()

df_researcher2_fa4.head()

In [None]:
assert list(df_researcher1_fa4["XYZ ID"]) == list(df_researcher2_fa4["XYZ ID"])

engineering_researcher1_scores, engineering_researcher2_scores = df_researcher1_fa4["score"], df_researcher2_fa4["score"]

print(cohen_kappa_score(engineering_researcher1_scores,engineering_researcher2_scores,weights='quadratic'))

0.9074410163339383


## CIs

In [14]:
import numpy as np
from sklearn.metrics import cohen_kappa_score

def bootstrap_qwk_ci(y1, y2, *, n_boot=N_BOOTSTRAP, ci=0.95, random_state=SEED):
    rng = np.random.default_rng(random_state)
    y1 = np.asarray(y1)
    y2 = np.asarray(y2)
    n = y1.size
    idx = np.arange(n)

    kappa = cohen_kappa_score(y1, y2, weights='quadratic')

    boots = np.empty(n_boot)
    for i in range(n_boot):
        sample = rng.choice(idx, size=n, replace=True)
        boots[i] = cohen_kappa_score(y1[sample], y2[sample], weights='quadratic')

    alpha = 1 - ci
    lower, upper = np.percentile(boots, [100*alpha/2, 100*(1 - alpha/2)])
    moe = (upper - lower) / 2
    return kappa, lower, upper, moe

In [None]:
fa_sets = {
    "FA2": (df_researcher1_fa2["score"], df_researcher2_fa2["score"]),
    "FA3": (df_researcher1_fa3["score"], df_researcher2_fa3["score"]),
    "FA4": (df_researcher1_fa4["score"], df_researcher2_fa4["score"]),
}

records = []

for fa, (researcher1, researcher2) in fa_sets.items():
    kappa, lo, hi, moe = bootstrap_qwk_ci(
        researcher1, researcher2,
    )
    records.append({"FA": fa,
                    "QWK": kappa,
                    "95% CI Lower": lo,
                    "95% CI Upper": hi,
                    "Margin of Error": moe})

summary = pd.DataFrame(records).set_index("FA")
summary_pct = summary.mul(100)
summary_pct_formatted = summary_pct.round(2).astype(str)

print(summary_pct_formatted)

       QWK 95% CI Lower 95% CI Upper Margin of Error
FA                                                  
FA2  86.63        52.32        100.0           23.84
FA3  94.12         81.9        100.0            9.05
FA4  90.74        76.19        96.68           10.24
