In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score
import glob

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
def analyse_and_filter_hallucination (df):
    F1_score = 0
    rectified_F1_score = 0
    
    df = pd.read_csv(df)
    hallucinated_data = df[df["actual_model_prediction"] == -1]
    no_hallucination = len(hallucinated_data)
    clean_data = df[df["actual_model_prediction"] != -1]

    if len(clean_data) > 0:
        print(len(clean_data))
        try:
            F1_score = roc_auc_score(clean_data["Toxicity_label"], clean_data["actual_model_prediction"])
        except ValueError:
            print("Small number of clean data and one label is prenset in the y_true")
            F1_score = 0
        
        rectified_F1_score = (1-(no_hallucination/len(df))) * F1_score
    return len(df), no_hallucination, F1_score, rectified_F1_score

In [3]:
def collect_results (clean_IFM_results,Language_folder,model_name, langauge):
    model_names = []
    data_gender = []
    data_lang = []
    instructions_langs = []
    dataset_sizes = []
    no_hallucinations = []
    F1_scores = []
    rectified_F1_scors = []
    path = clean_IFM_results+Language_folder+model_name+"/*.csv"
    for fname in glob.glob(path):
        print(fname)
        file_name = fname.split("/")[-1]
        data_gender_name = file_name.split("_")[0]
        model_name = file_name.split("_")[3]
        instructions_lang = file_name.split("_")[4]
        
        df_size, no_hallucination, F1_score, rectified_F1_score = analyse_and_filter_hallucination(fname)
        
        model_names.append(model_name)
        data_gender.append(data_gender_name)
        data_lang.append(langauge)
        instructions_langs.append(instructions_lang)
        dataset_sizes.append(df_size)
        no_hallucinations.append(no_hallucination)
        F1_scores.append(F1_score)
        rectified_F1_scors.append(rectified_F1_score)
        
        data_summary = pd.DataFrame({"Model_Name": model_names,
                                     "Gender":data_gender,
                                     "Language": data_lang,
                                     "Instructions_languation": instructions_langs,
                                     "dataset_size": dataset_sizes,
                                     "No_Hallucination": no_hallucinations,
                                     "F1_scores": F1_scores,
                                     "Rectified_F1_scors": rectified_F1_scors})
    return data_summary
        

In [4]:
clean_IFM_results = "../IFM_results_cleaning/"

In [5]:
Language_folder = "Germany/Clean_results/"
data_language = "German"

## Aya

In [6]:
Model_name = "Aya"
collect_results_aya = collect_results(clean_IFM_results,Language_folder, Model_name, data_language)
collect_results_aya.to_csv(clean_IFM_results+Language_folder+Model_name+"/results_summary.csv", index=False)

../IFM_results_cleaning/Germany/Clean_results/Aya/NB_IFM_HSD_Aya_german_instructions.csv
4005
../IFM_results_cleaning/Germany/Clean_results/Aya/Male_IFM_HSD_Aya_english_instructions.csv
1600
../IFM_results_cleaning/Germany/Clean_results/Aya/Female_IFM_HSD_Aya_german_instructions.csv
3662
../IFM_results_cleaning/Germany/Clean_results/Aya/Male_IFM_HSD_Aya_german_instructions.csv
3924
../IFM_results_cleaning/Germany/Clean_results/Aya/Female_IFM_HSD_Aya_english_instructions.csv
1387
../IFM_results_cleaning/Germany/Clean_results/Aya/NB_IFM_HSD_Aya_english_instructions.csv
1819


## Bloomz

In [7]:
Model_name = "Bloomz"
collect_results_bloomz = collect_results(clean_IFM_results,Language_folder, Model_name, data_language)
collect_results_bloomz.to_csv(clean_IFM_results+Language_folder+Model_name+"/results_summary.csv", index=False)

../IFM_results_cleaning/Germany/Clean_results/Bloomz/Female_IFM_HSD_Bloomz_german_instructions.csv
4292
../IFM_results_cleaning/Germany/Clean_results/Bloomz/Male_IFM_HSD_Bloomz_german_instructions.csv
4218
../IFM_results_cleaning/Germany/Clean_results/Bloomz/Female_IFM_HSD_Bloomz_english_instructions.csv
4292
../IFM_results_cleaning/Germany/Clean_results/Bloomz/Male_IFM_HSD_Bloomz_english_instructions.csv
4218
../IFM_results_cleaning/Germany/Clean_results/Bloomz/NB_IFM_HSD_Bloomz_english_instructions.csv
4218
../IFM_results_cleaning/Germany/Clean_results/Bloomz/NB_IFM_HSD_Bloomz_german_instructions.csv
4218


## Flan-T5

In [8]:
Model_name = "Flan-T5"
collect_results_Flan_T5 = collect_results(clean_IFM_results,Language_folder, Model_name, data_language)
collect_results_Flan_T5.to_csv(clean_IFM_results+Language_folder+Model_name+"/results_summary.csv", index=False)

../IFM_results_cleaning/Germany/Clean_results/Flan-T5/Female_IFM_HSD_Flan-T5_german_instructions.csv
../IFM_results_cleaning/Germany/Clean_results/Flan-T5/Male_IFM_HSD_Flan-T5_german_instructions.csv
../IFM_results_cleaning/Germany/Clean_results/Flan-T5/NB_IFM_HSD_Flan-T5_english_instructions.csv
4218
../IFM_results_cleaning/Germany/Clean_results/Flan-T5/Female_IFM_HSD_Flan-T5_english_instructions.csv
4292
../IFM_results_cleaning/Germany/Clean_results/Flan-T5/NB_IFM_HSD_Flan-T5_german_instructions.csv
../IFM_results_cleaning/Germany/Clean_results/Flan-T5/Male_IFM_HSD_Flan-T5_english_instructions.csv
4218


## InstructLLAMA

In [9]:
Model_name = "InstructLLAMA"
collect_results_lama = collect_results(clean_IFM_results,Language_folder, Model_name, data_language)
collect_results_lama.to_csv(clean_IFM_results+Language_folder+Model_name+"/results_summary.csv", index=False)

../IFM_results_cleaning/Germany/Clean_results/InstructLLAMA/NB_IFM_HSD_InstructLLAMA_english_instructions.csv
2343
../IFM_results_cleaning/Germany/Clean_results/InstructLLAMA/Male_IFM_HSD_InstructLLAMA_german_instructions.csv
475
../IFM_results_cleaning/Germany/Clean_results/InstructLLAMA/Female_IFM_HSD_InstructLLAMA_german_instructions.csv
406
../IFM_results_cleaning/Germany/Clean_results/InstructLLAMA/Female_IFM_HSD_InstructLLAMA_english_instructions.csv
2410
../IFM_results_cleaning/Germany/Clean_results/InstructLLAMA/Male_IFM_HSD_InstructLLAMA_english_instructions.csv
2330
../IFM_results_cleaning/Germany/Clean_results/InstructLLAMA/NB_IFM_HSD_InstructLLAMA_german_instructions.csv
412


## InstructMistral

In [10]:
Model_name = "InstructMistral"
collect_results_mistral = collect_results(clean_IFM_results,Language_folder, Model_name, data_language)
collect_results_mistral.to_csv(clean_IFM_results+Language_folder+Model_name+"/results_summary.csv", index=False)

../IFM_results_cleaning/Germany/Clean_results/InstructMistral/NB_IFM_HSD_InstructMistral_english_instructions.csv
../IFM_results_cleaning/Germany/Clean_results/InstructMistral/Female_IFM_HSD_InstructMistral_german_instructions.csv
12
Small number of clean data and one label is prenset in the y_true
../IFM_results_cleaning/Germany/Clean_results/InstructMistral/NB_IFM_HSD_InstructMistral_german_instructions.csv
4
Small number of clean data and one label is prenset in the y_true
../IFM_results_cleaning/Germany/Clean_results/InstructMistral/Female_IFM_HSD_InstructMistral_english_instructions.csv
../IFM_results_cleaning/Germany/Clean_results/InstructMistral/Male_IFM_HSD_InstructMistral_german_instructions.csv
1
Small number of clean data and one label is prenset in the y_true
../IFM_results_cleaning/Germany/Clean_results/InstructMistral/Male_IFM_HSD_InstructMistral_english_instructions.csv


## MT0

In [11]:
Model_name = "MT0"
collect_results_MT0 = collect_results(clean_IFM_results,Language_folder, Model_name, data_language)
collect_results_MT0.to_csv(clean_IFM_results+Language_folder+Model_name+"/results_summary.csv", index=False)

../IFM_results_cleaning/Germany/Clean_results/MT0/Male_IFM_HSD_MT0_english_instructions.csv
4218
../IFM_results_cleaning/Germany/Clean_results/MT0/NB_IFM_HSD_MT0_german_instructions.csv
4218
../IFM_results_cleaning/Germany/Clean_results/MT0/Female_IFM_HSD_MT0_german_instructions.csv
4292
../IFM_results_cleaning/Germany/Clean_results/MT0/Female_IFM_HSD_MT0_english_instructions.csv
4292
../IFM_results_cleaning/Germany/Clean_results/MT0/NB_IFM_HSD_MT0_english_instructions.csv
4218
../IFM_results_cleaning/Germany/Clean_results/MT0/Male_IFM_HSD_MT0_german_instructions.csv
4218
