In [None]:
import pandas as pd 
from google.cloud import translate_v2 as translate

### Vanilla Harmful

In [None]:
harmful_list = ["I-CoNa.json", "I-Controversial.json", "I-MaliciousInstructions.json", "I-PhysicalSafetyUnsafe.json"]
path = "datasets_eval/vanilla"

In [None]:
selected_data_list = []
type_list=[]
N_list=[60,40,50,50]
rs=42
for id, file in enumerate(harmful_list): 
    path_file = f"{path}/{file}" 
    data = pd.read_json(path_file)
    N=N_list[id]
    selected_data = data.sample(n=N, replace=False, random_state=rs).reset_index(drop=True)
    type_list.extend([file.replace(".json","")]*N)
    selected_data_list.append(selected_data)

df_data = pd.concat(selected_data_list)
df = pd.DataFrame({'prompts':df_data['instructions'], 'type':type_list, 'lg':['en']*200})

In [None]:
df.to_csv(f"{path}/eval_vanilla_en.csv")

In [None]:
data_adv=pd.read_csv(f"datasets_eval/adv/data_eval_completed.csv")
list_lg = list(set(data_adv['language']))

In [None]:
dict_lg = {
        'sw':'low',
        'pt':'high',
        'ja':'high',
        'mk':'low',
        'lo':'low',
        'fr':'high',
        'pag':'low',
        'mt':'low',
        'en':"high",
        'tt':"low",
        'jv':'low',
        'da':"medium",
        'el':"medium",
        'de':'high',
        'bn':'medium',
        'es':"high",
        'ar':"high",
        'zh':'high',
}

In [None]:
translated_datasets=[]

# API google translate
translate_client = translate.Client()   

for language, category in dict_lg.items() :
    vanilla_translated = []
    df_translated = pd.DataFrame(columns=["vanilla", "language", "language_category", "prompt_category"])
    if language!='en': 
        print(f"Translation in {language}")
        for prompt in df["prompts"]: 
            prompt_translated = translate_client.translate(prompt, target_language=language)["translatedText"]
            print(prompt_translated)
            vanilla_translated.append(prompt_translated)
        print(len(vanilla_translated))
        df_translated["vanilla"]=vanilla_translated
    else : 
        print(f"Do not translated for {language}")
        df_translated["vanilla"]=df["prompts"]

    df_translated["language"]=[language]*len(df)
    df_translated["language_category"]=[category]*len(df)
    df_translated["prompt_category"]=list(df["type"])
    translated_datasets.append(df_translated.copy())
    print(f"Save dataset until {language}")
    pd.concat(translated_datasets).to_csv(f"{path}/eval_vanilla_harmful_{language}.csv")

dataset = pd.concat(translated_datasets)
print(f"Save final dataset")
dataset.to_csv(f"{path}/eval_vanilla_harmful_all.csv")

### Vanilla harmless 

In [None]:
path = "datasets_eval/vanilla/harmless"
data = pd.read_csv(f"{path}/xstest_prompts.csv")

In [None]:
data_safe = data[data['label']=="safe"]

In [None]:
N=20
rs=42
data_safe_sample=data_safe.groupby('type').apply(lambda x: x.sample(n=N, replace=False, random_state=rs)).reset_index(drop=True)

In [None]:
translated_datasets=[]

# API google translate
#translate_client = translate.Client()   

for language, category in dict_lg.items() :
    vanilla_translated = []
    df_translated = pd.DataFrame(columns=["vanilla", "language", "language_category", "prompt_category", "focus", "note"])
    if language!='en': 
        print(f"Translation in {language}")
        for prompt in data_safe_sample["prompt"]: 
            #prompt_translated = translate_client.translate(prompt, target_language=language)["translatedText"]
            #prompt_translated=prompt
            print(prompt_translated)
            vanilla_translated.append(prompt_translated)
        print(len(vanilla_translated))
        df_translated["vanilla"]=vanilla_translated
    else : 
        print(f"Do not translated for {language}")
        df_translated["vanilla"]=data_safe_sample["prompt"]

    df_translated["language"]=[language]*len(data_safe_sample)
    df_translated["language_category"]=[category]*len(data_safe_sample)
    df_translated["prompt_category"]=list(data_safe_sample["type"])
    df_translated["focus"]=list(data_safe_sample["focus"])
    df_translated["note"]=list(data_safe_sample["note"])
    translated_datasets.append(df_translated.copy())
    print(f"Save dataset until {language}")
    pd.concat(translated_datasets).to_csv(f"{path}/eval_vanilla_harmless_{language}.csv")

dataset = pd.concat(translated_datasets)
print(f"Save final dataset")
dataset.to_csv(f"{path}/eval_vanilla_harmless_all.csv")

In [None]:
dataset.groupby('language').describe()

# Concat vanilla and adv 

In [None]:
import pandas as pd 

In [None]:
adv = pd.read_csv("datasets_eval/adv/data_eval_completed.csv")
vanilla = pd.read_csv("datasets_eval/vanilla/data_eval_vanilla_all.csv")

In [None]:
vanilla = vanilla.rename({"prompt_category":"data_type", "vanilla":"prompt"}, axis=1).drop(["Unnamed: 0.1", "focus", "note"] , axis=1)
adv = adv.rename({"adversarial":"prompt"}, axis=1)
data_eval = pd.concat([vanilla, adv])
data_eval.groupby("language").describe()
data_eval.to_csv("datasets_eval/data_eval_all.csv")