### Import libraries and preprocess manual annotation files


In [None]:
# Import libraries
import pandas as pd
from sklearn.metrics import cohen_kappa_score
import openpyxl

In [None]:
# Set paths for files to HB annotation and UY annotation
HB_PATH = r"C:\Users\seohy\nlplearnerdata\interrater_reliability\HB_annotation.xlsx"
UY_PATH = r"C:\Users\seohy\nlplearnerdata\interrater_reliability\UY_annotation.xlsx"

In [None]:
# Read in and process HB annotation
HB_sheets = pd.read_excel(HB_PATH, sheet_name = None)

# Combine sheets into one df
HB_combined = pd.concat(HB_sheets.values(), axis = 0, ignore_index = True)

# Drop and select relevant columns
HB_combined = HB_combined[["ID", "FORM", "POS", "HEAD", "DEPREL"]]

# Rename columns to include HB
HB_combined = HB_combined.rename(
    {
        "ID": "ID_HB", 
        "FORM": "FORM_HB",
        "POS": "POS_HB",
        "HEAD": "HEAD_HB",
        "DEPREL": "DEPREL_HB"
    }, 
    axis=1
)

In [None]:
# Check and verify preprocessing results
HB_combined.head(5)

In [None]:
# Read in and process UY annotation
UY_sheets = pd.read_excel(UY_PATH, sheet_name = None)

# Combine sheets into one df
UY_combined = pd.concat(UY_sheets.values(), axis = 0, ignore_index = True)

# Drop and select only relevant columns
UY_combined = UY_combined[["ID", "FORM", "POS", "HEAD", "DEPREL"]]

# Rename columns to include UY
UY_combined = UY_combined.rename(
    {
        "ID": "ID_UY", 
        "FORM": "FORM_UY",
        "POS": "POS_UY",
        "HEAD": "HEAD_UY",
        "DEPREL": "DEPREL_UY"
    }, 
    axis=1
)

In [None]:
# Check and verify preprocessing results
UY_combined.head(5)

In [None]:
# Combine the two processed dfs
df_combined = pd.concat([HB_combined, UY_combined], axis = 1)

# Reorder columns to align Align rows
df_combined = df_combined[[
    "ID_HB",
    "ID_UY",
    "FORM_HB",
    "FORM_UY",
    "POS_HB",
    "POS_UY",
    "HEAD_HB",
    "HEAD_UY",
    "DEPREL_HB",
    "DEPREL_UY"
]]

In [None]:
# Check and verify preprocessing results
df_combined.head(5)

### !!Sanity check!!


In [None]:
# Sainty check - tokenization is the same
# Should return nothing (no rows) if tokenization matches and align
df_combined[df_combined["FORM_HB"] != df_combined["FORM_UY"]]

### Compute interrater reliability for POS and DP


In [None]:
# Cohen's kappa for POS 
pos_kappa = cohen_kappa_score(df_combined["POS_HB"], df_combined["POS_UY"])
print(f"Cohen's kappa for POS annotation: {pos_kappa}")

In [None]:
# Retrieve the total number of tokens
tokens = len(df_combined)

# Compute UAS and LAS
same_heads = (df_combined["HEAD_HB"] == df_combined["HEAD_UY"]).sum()  
UAS =  same_heads / tokens * 100
same_heads_and_relation = ((df_combined["HEAD_HB"] == df_combined["HEAD_UY"]) & (df_combined["DEPREL_HB"] == df_combined["DEPREL_UY"])).sum()

LAS = same_heads_and_relation / tokens  * 100

# Print output of UAS and LAS
print(f"UAS for DP: {UAS}%")
print(f"LAS for DP: {LAS}%")

### Compute interrater reliability for learner errors
