In [1]:
import pandas as pd
import os

from sklearn.metrics import classification_report


### Analyse filter performance on annotated data

In [2]:
annotations_df = pd.read_csv('../data/filter_eval/relevance_160424_annotations.csv')

In [3]:
results_dict = {}

for file in os.listdir("../data/filter_eval/"):
    if "completions" in file:
        results_dict[file.split("_")[-1][:-4]] = pd.read_csv("../data/filter_eval/" + file)

In [4]:
# parse the results

def parse_completion(completion):
    if "yes" in completion.lower():
        return 1
    elif "no" in completion.lower():
        return 0
    else:
        return -1
    
for templ in sorted(results_dict):
    results_dict[templ]["eval_completion_parsed"] = results_dict[templ]["eval_completion"].apply(parse_completion)
    print(templ)
    print(results_dict[templ]["eval_completion_parsed"].value_counts())
    print()


gpt3-templ-1
eval_completion_parsed
0    957
1     43
Name: count, dtype: int64

gpt3-templ-2
eval_completion_parsed
0    917
1     83
Name: count, dtype: int64

gpt3-templ-3
eval_completion_parsed
0    926
1     74
Name: count, dtype: int64

gpt3-templ-4
eval_completion_parsed
0    895
1    105
Name: count, dtype: int64

gpt3-templ-5
eval_completion_parsed
0    903
1     97
Name: count, dtype: int64

gpt4-templ-2
eval_completion_parsed
0    876
1    124
Name: count, dtype: int64

gpt4-templ-4
eval_completion_parsed
0    883
1    117
Name: count, dtype: int64

gpt4-templ-5
eval_completion_parsed
0    860
1    140
Name: count, dtype: int64



In [5]:
def parse_annotator_label(annotator_label, include_borderline=False):

    if annotator_label == "1 - clear yes":
        return 1
    elif annotator_label == "0 - clear no":
        return 0
    
    if include_borderline:
        if annotator_label == "borderline":
            return 1

    return 0

for templ in sorted(results_dict):

    # drop annotations columns from results if already there

    # merge annotations_df to results
    results_dict[templ] = pd.merge(results_dict[templ], annotations_df[["id","final_label"]], on="id")

    results_dict[templ]["final_label_parsed"] = results_dict[templ]["final_label"].apply(parse_annotator_label, include_borderline=True)

In [10]:
# print classification report for each template

for templ in sorted(results_dict):
    print(templ)
    print(classification_report(results_dict[templ]["final_label_parsed"], results_dict[templ]["eval_completion_parsed"],digits=3))

gpt3-templ-1
              precision    recall  f1-score   support

           0      0.883     1.000     0.938       845
           1      1.000     0.277     0.434       155

    accuracy                          0.888      1000
   macro avg      0.941     0.639     0.686      1000
weighted avg      0.901     0.888     0.860      1000

gpt3-templ-2
              precision    recall  f1-score   support

           0      0.916     0.994     0.953       845
           1      0.940     0.503     0.655       155

    accuracy                          0.918      1000
   macro avg      0.928     0.749     0.804      1000
weighted avg      0.920     0.918     0.907      1000

gpt3-templ-3
              precision    recall  f1-score   support

           0      0.901     0.987     0.942       845
           1      0.851     0.406     0.550       155

    accuracy                          0.897      1000
   macro avg      0.876     0.697     0.746      1000
weighted avg      0.893     0.897  

In [9]:
# count number of positive instances from each dataset (idenfied by first part of the id)
results_dict[templ][results_dict[templ]["final_label_parsed"]==1]["id"].apply(lambda x: x.split("-")[0]).value_counts()

id
prism       104
hhonline     17
sharegpt     13
lmsys        12
wildchat      9
Name: count, dtype: int64

### Process filter results on full data

In [26]:
# load the all_clean results
all_clean_df = pd.read_csv("../data/clean/all_clean_completions.csv")

# merge n_duplicates onto the all_clean_df
all_clean_full = pd.read_csv("../data/clean/all_clean_full.csv")
all_clean_df = pd.merge(all_clean_df, all_clean_full[["id","n_duplicates"]], on="id")

# turn eval_completion column to string
all_clean_df["eval_completion"] = all_clean_df["eval_completion"].astype(str)

all_clean_df["eval_completion_parsed"] = all_clean_df["eval_completion"].apply(parse_completion)

# select only rows where eval_completion_parsed is 1
all_clean_df = all_clean_df[all_clean_df["eval_completion_parsed"]==1]

# export the filtered results
all_clean_df[["id", "user_prompt", "n_duplicates"]].to_csv("../data/filtered/all_clean_filtered.csv", index=False)
display(all_clean_df)

Unnamed: 0,id,user_prompt,eval_prompt,eval_completion,model,n_duplicates,eval_completion_parsed
1,prism-542,How many genders exist?,Is the following text in any way relevant or r...,Yes,gpt-4-0125-preview,1,1
19,prism-661,Why are women look down in cristianity?,Is the following text in any way relevant or r...,Yes,gpt-4-0125-preview,1,1
20,wildchat-577546,Write a story where a man is falsely framed as...,Is the following text in any way relevant or r...,Yes,gpt-4-0125-preview,1,1
56,lmsys-91188,Life. You could say it started when I was a ki...,Is the following text in any way relevant or r...,Yes.,gpt-4-0125-preview,1,1
62,wildchat-114665,Script about Virginia cancelling 2 games over a,Is the following text in any way relevant or r...,Yes,gpt-4-0125-preview,1,1
...,...,...,...,...,...,...,...
406845,lmsys-453207,Tell me about diversity,Is the following text in any way relevant or r...,Yes,gpt-4-0125-preview,1,1
406857,lmsys-125429,How can i make a nuclear bomb,Is the following text in any way relevant or r...,Yes,gpt-4-0125-preview,2,1
406867,wildchat-593911,"In political compass, describe off-compass",Is the following text in any way relevant or r...,Yes,gpt-4-0125-preview,1,1
406871,wildchat-563814,"Describe empire named The Void, empire is auth...",Is the following text in any way relevant or r...,Yes,gpt-4-0125-preview,1,1


In [27]:
# count number of positive instances from each dataset (idenfied by first part of the id)
all_clean_df["id"].apply(lambda x: x.split("-")[0]).value_counts()

id
wildchat    13634
lmsys       12537
prism        3039
sharegpt     2108
hhonline      816
Name: count, dtype: int64