In [2]:
import pandas as pd
import krippendorff
import numpy as np

In [3]:
def summarise_categorical(df, column):
    """
    Summarise a categorical column in a DataFrame.
    """
    summary = pd.DataFrame({
        'Count': df[column].value_counts(),
        'Proportion': df[column].value_counts(normalize=True)
        })
    
    summary["Proportion"] = summary["Proportion"].apply(lambda x: f"{x:.2%}")
    display(summary)

In [5]:
# load the annotations data
df = pd.read_csv("./writing_assistance_annotations.csv")

# summarise the 'final_label' column
summarise_categorical(df, "final_label")

Unnamed: 0_level_0,Count,Proportion
final_label,Unnamed: 1_level_1,Unnamed: 2_level_1
0 - NOT writing aid,387,77.40%
1 - writing aid,113,22.60%


In [12]:
def krippendorff_alpha(data, level_of_measurement='nominal', category_order=None):
    """Calculate Krippendorff's alpha for the given data."""
    # Create a pivot table with ratings as values
    pivot_table = data[["annot1_label", "annot2_label"]]
    
    # Convert string category names to numeric values
    if category_order is not None:
        # For ordinal data with specified order
        # Check that all categories in the data are in the category_order
        missing_categories = set(pivot_table.values.flatten()) - set(category_order)
        if missing_categories:
            print(f"Warning: The following categories are not in the specified order: {missing_categories}")

        # Create a mapping from category to numeric value
        category_map = {cat: i for i, cat in enumerate(category_order)}
        
        # Apply the mapping to convert categories to ordered numeric values
        # Any values not in category_order will become NaN
        pivot_table = pivot_table.apply(lambda x: x.map(category_map))
    else:
        # For nominal data without specified order
        pivot_table = pivot_table.apply(lambda x: pd.factorize(x)[0])
        # replace any -1 values with NaN
        pivot_table = pivot_table.replace(-1, np.nan)

    # Convert to a numpy array
    ratings_array = pivot_table.to_numpy().T

    # Calculate Krippendorff's alpha
    alpha = krippendorff.alpha(reliability_data=ratings_array, level_of_measurement=level_of_measurement)
    return alpha

# summarise the 'disagreement' column, after filling NaN values with False
df["disagreement"] = df["disagreement"].fillna(False)
summarise_categorical(df, "disagreement")

# Calculate Krippendorff's alpha for the 'annot1_label' and 'annot2_label' columns, using ordinal measurement
alpha = krippendorff_alpha(df, level_of_measurement='nominal', category_order=["0 - NOT writing aid", "1 - writing aid"])
print(f"Krippendorff's alpha: {alpha:.4f}")


Unnamed: 0_level_0,Count,Proportion
disagreement,Unnamed: 1_level_1,Unnamed: 2_level_1
False,493,98.60%
True,7,1.40%


Krippendorff's alpha: 0.9604


In [None]:


# load the prompt templates
templates = pd.read_csv("./writing_assistance_templates.csv")

def create_prompt(template, user_prompt):
    
    # remove linebreaks and tabs
    user_prompt = user_prompt.replace("\n", " ").replace("\t", " ")

    # collapse multiple spaces into one
    user_prompt = " ".join(user_prompt.split())

    return template.format(user_prompt = user_prompt)

df_dict = {}

for i, row in templates.iterrows():
    df_dict[row["id"]] = df.copy()
    df_dict[row["id"]]["eval_prompt"] = df_dict[row["id"]]["user_prompt"].apply(lambda x: create_prompt(row["prompt_template"], x))

df_dict[row["id"]]


Unnamed: 0,id,user_prompt,annot1_label,annot1_template,eval_prompt
0,prism-542,How many genders exist?,0 - clear no,,Below is a prompt from a user to a language mo...
1,prism-661,Why are women look down in cristianity?,0 - clear no,,Below is a prompt from a user to a language mo...
2,wildchat-577546,Write a story where a man is falsely framed as...,1 - clear yes,Write a story about X,Below is a prompt from a user to a language mo...
3,lmsys-91188,Life. You could say it started when I was a ki...,0 - clear no,,Below is a prompt from a user to a language mo...
4,wildchat-114665,Script about Virginia cancelling 2 games over a,1 - clear yes,Script about X,Below is a prompt from a user to a language mo...
...,...,...,...,...,...
495,prism-3691,The government shouldn't be able to restrict f...,0 - clear no,,Below is a prompt from a user to a language mo...
496,wildchat-616998,"Continue, after the doctor’s appointment, Moll...",0 - clear no,,Below is a prompt from a user to a language mo...
497,prism-5172,Hi. Can you tell me who is likely to be the P...,0 - clear no,,Below is a prompt from a user to a language mo...
498,lmsys-158247,How should citizens who believe the laws in th...,0 - clear no,,Below is a prompt from a user to a language mo...


In [None]:
for i in df_dict:
    df_dict[i][["id", "user_prompt", "eval_prompt"]].to_csv("./eval_prompts/writingaid_prompts_230524_{}.csv".format(i), index=False)

In [5]:
# apply best prompt template to all clean relevance filtered samples

all_clean_df = pd.read_csv("../data/all_clean_relevance_filtered.csv")
all_clean_df["eval_prompt"] = all_clean_df["user_prompt"].apply(lambda x: create_prompt(templates.iloc[1]["prompt_template"], x))
all_clean_df.to_csv("../data/all_clean_relevance_filtered_prompts.csv", index=False)