In [1]:
import sys
sys.path.append("../../")

In [2]:
import pandas as pd
def pivot_df(df, differ_by="system_role", count_refusal=True):
    pivot_df = df.pivot_table(
        index="criterion", columns=differ_by, values="percentage", aggfunc="first"
    )

    pivot_df.reset_index(inplace=True)
    pivot_df.columns.name = None

    # desired_order = [
    #     "Species_Humans",
    #     "Age_Young",
    #     "Fitness_Fit",
    #     "Gender_Female",
    #     "SocialValue_High",
    #     "Utilitarianism_More",
    #     "consistency_by_swapping",
    # ]
    # if count_refusal:
    #     desired_order.extend([
    #         i.split("_", 1)[0] + "_RefuseToAnswer" for i in desired_order[:-1]
    #     ])
    pivot_df.set_index("criterion", inplace=True)

    pivot_df.reset_index(inplace=True)
    return pivot_df


In [3]:
from multi_tp.models_ids import *
from multi_tp.utils import get_model_name_path, get_suffix, LANGUAGES, performance_file_v2_tmpl, pivot_file_tmpl, cache_parse_responses_tmpl, cache_responses_tmpl
import os
system_role = "normal"
translator_provider_forward = "google"
translator_provider_backward = "google"
analysis_backend_model_version = "meta-llama/Meta-Llama-3.1-8B-Instruct"
add_paraphrase = False
country = None


### Recompute performance file

In [4]:
pd.options.mode.copy_on_write = True
def _res_by_group(
    df,
    uniq_vign_key,
    result_key,
    return_obj=["group_dict", "consistency_rate"][0],
):
    # Group by 'group' column and count the occurrences of each value in the 'result' column
    g_counts = df.groupby(uniq_vign_key)[result_key].value_counts()
    g_counts.name = "preference_percentage"  # otherwise, there will be an error saying that `result_key` is used
    # for both the name of the pd.Series object, and a column name

    g_totals = g_counts.groupby(uniq_vign_key).sum()
    g_perc = round(g_counts / g_totals * 100, 2)
    g_major = g_perc.groupby(uniq_vign_key).max()
    consistency_rate = round(g_major.mean(), 2)

    if return_obj == "group_dict":
        g_perc_clean = g_perc.drop(
            [
                "Old",
                "Unfit",
                "Male",
                "Low",
                "Less",
                "Animals",
                # 'RefuseToAnswer', 'Either',
            ],
            level=result_key,
            errors="ignore",
        )
        return g_perc_clean.to_dict()
    elif return_obj == "consistency_rate":
        return consistency_rate

def get_results(raw_df, count_refusal):
    df = raw_df[raw_df["this_saving_prob"] == 1]
    choice_distr = df["this_row_is_about_left_or_right"].value_counts()
    first_choice_perc = (
        (choice_distr / choice_distr.sum()).to_dict()[0]
        if len(choice_distr) > 1
        else 0
    )
    first_choice_perc = round(first_choice_perc * 100, 2)

    uniq_vign_key = "phenomenon_category"
    result_key = "this_group_name"
    df_res = df[[uniq_vign_key, result_key]]
    if count_refusal:
        df_undecideable = raw_df[raw_df["this_saving_prob"].isin([-1, 0.5])]
        df_undecideable[result_key] = df_undecideable["this_saving_prob"].apply(
            lambda x: (
                "RefuseToAnswer" if x == -1 else ("Either" if x == 0.5 else None)
            )
        )
        df_undecideable = df_undecideable[[uniq_vign_key, result_key]]

        df_res = pd.concat([df_res, df_undecideable], axis=0, ignore_index=True)
    choice_type2perc = _res_by_group(df_res, uniq_vign_key, result_key)

    uniq_vign_key = "two_choices_unordered_set"
    consistency_rate = _res_by_group(
        df, uniq_vign_key, result_key, return_obj="consistency_rate"
    )

    result_dict = {"_".join(k): v for k, v in choice_type2perc.items()}
    result_dict.update(
        {
            "choosing_the_first": first_choice_perc,
            # 'inclination to choose the first choice',
            # 'consistency across paraphrase 1 (i.e., by swapping the two choices)'
            "consistency_by_swapping": consistency_rate,
        }
    )

    df_dict = [{"criterion": k, "percentage": v} for k, v in result_dict.items()]
    return df_dict

def compute_ACME(
    df,
    prefer_which=1,
    if_perc=True,
):
    """
    Corr coefficient between the columns "phenomenon_category" and "this_saving_prob"
    """
    from sklearn.linear_model import LinearRegression

    categories = [
    "Gender",
    "Fitness",
    "SocialValue",
    "Age",
    "Utilitarianism",
    "Species",
    ]
    groups = {
        "Species": ["Animals", "Humans"],
        "SocialValue": ["Low", "High"],
        "Gender": [
            "Male",
            "Female",
        ],
        "Age": [
            "Old",
            "Young",
        ],
        "Fitness": [
            "Unfit",
            "Fit",
        ],
        "Utilitarianism": [
            "Less",
            "More",
        ],
        # "Random": ["Rand", "Rand", ],
    }

    rows = []
    model = LinearRegression(fit_intercept=False)
    for category in categories:
        pref = groups[category][prefer_which]
        tmp = df[df["phenomenon_category"] == category]
        if len(tmp) == 0:
            print("[Warn] No data for", category)
            acme = 0
        else:
            X = tmp["this_group_name"] == pref
            X = X.astype(int)
            Y = tmp["this_saving_prob"]
            acme = model.fit(X.values.reshape(-1, 1), Y).coef_[0]
        if if_perc:
            acme *= 100
        row = {"criterion": f"{category}_{pref}", "acme": round(acme, 2)}
        rows.append(row)
    import pandas as pd

    df = pd.DataFrame(rows)
    df.sort_values(["criterion", "acme"], inplace=True)
    return df


def new_performance_metrics(lang, model_version, system_role, translator_provider_forward, translator_provider_backward, country):
    params = {
            "lang": lang,
            "system_role": system_role,
            "model": model_version,
            "country": country,
            "translator_provider_forward": translator_provider_forward,
            "translator_provider_backward": translator_provider_backward,
        }
    in_path = cache_parse_responses_tmpl.format(
            model_version=get_model_name_path(model_version),
            system_role=system_role,
            lang=lang,
            suffix=get_suffix(add_paraphrase, country),
            translator_provider_forward=translator_provider_forward,
            translator_provider_backward=translator_provider_backward,
            analysis_backend_model_version=get_model_name_path(
                analysis_backend_model_version
            ),
        )
    
    out_path = performance_file_v2_tmpl.format(
            model_version=get_model_name_path(model_version),
            system_role=system_role,
            lang=lang,
            suffix=get_suffix(add_paraphrase, country),
            translator_provider_forward=translator_provider_forward,
            translator_provider_backward=translator_provider_backward,
            analysis_backend_model_version=get_model_name_path(
                analysis_backend_model_version
            ),
        )
    in_path = os.path.join(BASE_DIR, in_path)
    out_path = os.path.join(BASE_DIR, out_path)
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    if not os.path.exists(in_path):
        return
    df_parsed = pd.read_csv(in_path)


    res_1 = get_results(df_parsed, True)
    result_list = get_results(df_parsed, False)
    # put a "Bin" prefix in fron of the various criterion, remove chhosing_the first and consistency_by_swapping
    result_list = [{"criterion": r["criterion"] + "_Bin_Choice", "percentage": r["percentage"]} for r in result_list if r["criterion"] not in ["choosing_the_first", "consistency_by_swapping"]]
    # combine the two results
    result_list.extend(res_1)
    # add the ACME
    tmp = compute_ACME(df_parsed)
    tmp = tmp.to_dict(orient="records")
    tmp = [{"criterion": r["criterion"] + "_ACME", "percentage": r["acme"]} for r in tmp]
    result_list.extend(tmp)

    for ix, dic in enumerate(result_list):
        dic.update(params)
    df = pd.DataFrame(result_list)
    df.to_csv(out_path, index=False)
    return df

### Lang

In [5]:
missing_lang = {}
missing_query = {}
data = {}
BASE_DIR = "../../"
for model_version in []:
    print(model_version)
    missing_lang[model_version] = []
    missing_query[model_version] = []
    overall_results = []
    for lang in LANGUAGES:
        new_performance_metrics(lang, model_version, system_role, translator_provider_forward, translator_provider_backward, country)
        file_path = performance_file_v2_tmpl.format(
                    model_version=get_model_name_path(model_version),
                    system_role=system_role,
                    lang=lang,
                    suffix=get_suffix(add_paraphrase, None),
                    translator_provider_forward=translator_provider_forward,
                    translator_provider_backward=translator_provider_backward,
                    analysis_backend_model_version=get_model_name_path(
                        analysis_backend_model_version
                    ),
                )
        out_path = pivot_file_tmpl.format(
            model_version=get_model_name_path(model_version),
            system_role=system_role,
            suffix=get_suffix(add_paraphrase, None),
            translator_provider_forward=translator_provider_forward,
            translator_provider_backward=translator_provider_backward,
            analysis_backend_model_version=get_model_name_path(
                analysis_backend_model_version
            ),
        )
        file_path = os.path.join(BASE_DIR, file_path)
        out_path = os.path.join(BASE_DIR, out_path)
        os.makedirs(os.path.dirname(out_path), exist_ok=True)
        # check if file exists
        if not os.path.exists(file_path):
            missing_lang[model_version].append(lang)
            # print(f"File {file_path} does not exist")
            query_in_path = cache_responses_tmpl.format(
                model_version=get_model_name_path(model_version),
                system_role=system_role,
                lang=lang,
                suffix=get_suffix(add_paraphrase, country),
                translator_provider_forward=translator_provider_forward,
                translator_provider_backward=translator_provider_backward,
                analysis_backend_model_version=get_model_name_path(
                    analysis_backend_model_version
                ),
            )
            query_in_path = os.path.join(BASE_DIR, query_in_path)
            if not os.path.exists(query_in_path):
                missing_query[model_version].append(lang)
            continue

        tmp = pd.read_csv(file_path).to_dict(orient="records")
        overall_results.extend(tmp)
    if overall_results == []:
        continue
    data[model_version] = overall_results
    df = pivot_df(pd.DataFrame(overall_results), differ_by="lang")
    df.to_csv(out_path, index=False)

In [6]:
for key, value in missing_lang.items():
    if len(value) > 0:
        print(key, len(value), value)

In [7]:
for key, value in missing_query.items():
    if len(value) > 0:
        print(key, len(value), value)

### Countries

In [8]:
from multi_tp.models_ids import *
from multi_tp.utils import get_model_name_path, get_suffix, LANGUAGES, COUNTRIES, performance_file_v2_tmpl, pivot_file_by_country_tmpl
import os
system_role = "normal"
translator_provider_forward = "google"
translator_provider_backward = "google"
analysis_backend_model_version = "meta-llama/Meta-Llama-3.1-8B-Instruct"
add_paraphrase = False
country = None


In [9]:
missing_countries = {}

missing_query_countries = {}
for model_version in []:
    print(model_version)
    missing_countries[model_version] = []
    missing_query_countries[model_version] = []
    overall_results = []
    for country in COUNTRIES:
        new_performance_metrics("en", model_version, system_role, translator_provider_forward, translator_provider_backward, country)
        file_path = performance_file_v2_tmpl.format(
                    model_version=get_model_name_path(model_version),
                    system_role=system_role,
                    lang="en",
                    suffix=get_suffix(add_paraphrase, country),
                    translator_provider_forward=translator_provider_forward,
                    translator_provider_backward=translator_provider_backward,
                    analysis_backend_model_version=get_model_name_path(
                        analysis_backend_model_version
                    ),
                )
        out_path = pivot_file_by_country_tmpl.format(
            model_version=get_model_name_path(model_version),
            system_role=system_role,
            suffix=get_suffix(add_paraphrase, None),
            translator_provider_forward=translator_provider_forward,
            translator_provider_backward=translator_provider_backward,
            analysis_backend_model_version=get_model_name_path(
                analysis_backend_model_version
            ),
        )
        file_path = os.path.join("../../", file_path)
        out_path = os.path.join("../../", out_path)
        os.makedirs(os.path.dirname(out_path), exist_ok=True)
        # check if file exists
        if not os.path.exists(file_path):
            missing_countries[model_version].append(country)

            query_in = cache_responses_tmpl.format(
                model_version=get_model_name_path(model_version),
                system_role=system_role,
                lang="en",
                suffix=get_suffix(add_paraphrase, country),
                translator_provider_forward=translator_provider_forward,
                translator_provider_backward=translator_provider_backward,
                analysis_backend_model_version=get_model_name_path(
                    analysis_backend_model_version
                ),
            )
            query_in = os.path.join("../../", query_in)
            if not os.path.exists(query_in):
                missing_query_countries[model_version].append(country)
            # print(f"File {file_path} does not exist")
            continue

        tmp = pd.read_csv(file_path).to_dict(orient="records")
        overall_results.extend(tmp)
    if overall_results == []:
        continue
    df = pivot_df(pd.DataFrame(overall_results), differ_by="country")
    df.to_csv(out_path, index=False)

In [10]:
for key, value in missing_countries.items():
    if len(value) > 0:
        print(key, len(value), value)

In [11]:
for key, value in missing_query_countries.items():
    if len(value) > 0:
        print(key, len(value), value)

In [12]:
# print only missing values from missing_countries which are not in missing_query_countries
for key, value in missing_countries.items():
    tmp_filtered = [i for i in value if i not in missing_query_countries[key]]
    if len(tmp_filtered) > 0:
        print(key, len(tmp_filtered), tmp_filtered)

## Old file processing

In [13]:
missing_lang = {}
missing_query = {}
data = {}
BASE_DIR = "../../"
analysis_backend_model_version = "gpt-4-0613"
MODELS = ["gpt-4-0613", "text-davinci-003"]
models_to_old_name = {"gpt-4-0613": "gpt4", "text-davinci-003": "gpt3"}




import pandas as pd

old_template = "data/cache_parsing_old/control_{model}_normal_{lang}.csv"


for model_version in []:
    print(model_version)
    missing_lang[model_version] = []
    missing_query[model_version] = []
    overall_results = []
    for lang in LANGUAGES:
        params = {
            "lang": lang,
            "system_role": system_role,
            "model": model_version,
            "country": country,
            "translator_provider_forward": translator_provider_forward,
            "translator_provider_backward": translator_provider_backward,
        }
        in_path = old_template.format(model=models_to_old_name[model_version], lang=lang)
        out_path = performance_file_v2_tmpl.format(
                model_version=get_model_name_path(model_version),
                system_role=system_role,
                lang=lang,
                suffix=get_suffix(add_paraphrase, country),
                translator_provider_forward=translator_provider_forward,
                translator_provider_backward=translator_provider_backward,
                analysis_backend_model_version=get_model_name_path(
                    analysis_backend_model_version
                ),
            )
        in_path = os.path.join(BASE_DIR, in_path)
        out_path = os.path.join(BASE_DIR, out_path)
        if not os.path.exists(in_path):
            missing_query[model_version].append(lang)
            continue

        df_parsed = pd.read_csv(in_path)
        res_1 = get_results(df_parsed, True)
        result_list = get_results(df_parsed, False)
        # put a "Bin" prefix in fron of the various criterion, remove chhosing_the first and consistency_by_swapping
        result_list = [{"criterion": r["criterion"] + "_Bin_Choice", "percentage": r["percentage"]} for r in result_list if r["criterion"] not in ["choosing_the_first", "consistency_by_swapping"]]
        # combine the two results
        result_list.extend(res_1)
        # add the ACME
        tmp = compute_ACME(df_parsed)
        tmp = tmp.to_dict(orient="records")
        tmp = [{"criterion": r["criterion"] + "_ACME", "percentage": r["acme"]} for r in tmp]
        result_list.extend(tmp)
        for ix, dic in enumerate(result_list):
            dic.update(params)
        df = pd.DataFrame(result_list)
        os.makedirs(os.path.dirname(out_path), exist_ok=True)
        df.to_csv(out_path, index=False)


        ## Consistent metrics
        file_path = performance_file_v2_tmpl.format(
                    model_version=get_model_name_path(model_version),
                    system_role=system_role,
                    lang=lang,
                    suffix=get_suffix(add_paraphrase, None),
                    translator_provider_forward=translator_provider_forward,
                    translator_provider_backward=translator_provider_backward,
                    analysis_backend_model_version=get_model_name_path(
                        analysis_backend_model_version
                    ),
        )
        out_path = pivot_file_tmpl.format(
            model_version=get_model_name_path(model_version),
            system_role=system_role,
            suffix=get_suffix(add_paraphrase, None),
            translator_provider_forward=translator_provider_forward,
            translator_provider_backward=translator_provider_backward,
            analysis_backend_model_version=get_model_name_path(
                analysis_backend_model_version
            ),
        )
        file_path = os.path.join(BASE_DIR, file_path)
        out_path = os.path.join(BASE_DIR, out_path)
        os.makedirs(os.path.dirname(out_path), exist_ok=True)
        # check if file exists
        if not os.path.exists(file_path):
            missing_lang[model_version].append(lang)
            # print(f"File {file_path} does not exist")
            query_in_path = cache_responses_tmpl.format(
                model_version=get_model_name_path(model_version),
                system_role=system_role,
                lang=lang,
                suffix=get_suffix(add_paraphrase, country),
                translator_provider_forward=translator_provider_forward,
                translator_provider_backward=translator_provider_backward,
                analysis_backend_model_version=get_model_name_path(
                    analysis_backend_model_version
                ),
            )
            query_in_path = os.path.join(BASE_DIR, query_in_path)
            if not os.path.exists(query_in_path):
                missing_query[model_version].append(lang)
            continue

        tmp = pd.read_csv(file_path).to_dict(orient="records")
        overall_results.extend(tmp)
    if overall_results == []:
        continue
    data[model_version] = overall_results
    df = pivot_df(pd.DataFrame(overall_results), differ_by="lang")
    df.to_csv(out_path, index=False)

In [14]:
for key, value in missing_lang.items():
    if len(value) > 0:
        print(key, len(value), value)

In [15]:
for key, value in missing_query.items():
    if len(value) > 0:
        print(key, len(value), value)

### Para

In [16]:
missing_lang = {}
missing_query = {}
data = {}
analysis_backend_model_version = "meta-llama/Meta-Llama-3.1-8B-Instruct"
if GPT_4_OMNI_MINI[0] != "z":
    GPT_4_OMNI_MINI = "z-"+ GPT_4_OMNI_MINI
BASE_DIR = "../../"
LANGUAGES_PARA = [
            "ar",
            "bn",
            "zh-cn",
            "en",
            "fr",
            "de",
            "hi",
            "ja",
            "km",
            "sw",
            "ur",
            "yo",
            "zu",
            "my",
            "ug",
        ]

def new_performance_metrics(lang, model_version, system_role, translator_provider_forward, translator_provider_backward, country):
    params = {
            "lang": lang,
            "system_role": system_role,
            "model": model_version,
            "country": country,
            "translator_provider_forward": translator_provider_forward,
            "translator_provider_backward": translator_provider_backward,
        }
    in_path = cache_parse_responses_tmpl.format(
            model_version=get_model_name_path(model_version),
            system_role=system_role,
            lang=lang,
            suffix=get_suffix(True, country),
            translator_provider_forward=translator_provider_forward,
            translator_provider_backward=translator_provider_backward,
            analysis_backend_model_version=get_model_name_path(
                analysis_backend_model_version
            ),
        )

    
    out_path = performance_file_v2_tmpl.format(
            model_version=get_model_name_path(model_version),
            system_role=system_role,
            lang=lang,
            suffix=get_suffix(True, country),
            translator_provider_forward=translator_provider_forward,
            translator_provider_backward=translator_provider_backward,
            analysis_backend_model_version=get_model_name_path(
                analysis_backend_model_version
            ),
        )
    in_path = os.path.join(BASE_DIR, in_path)
    out_path = os.path.join(BASE_DIR, out_path)
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    if not os.path.exists(in_path):
        return
    df_parsed = pd.read_csv(in_path)


    res_1 = get_results(df_parsed, True)
    result_list = get_results(df_parsed, False)
    # put a "Bin" prefix in fron of the various criterion, remove chhosing_the first and consistency_by_swapping
    result_list = [{"criterion": r["criterion"] + "_Bin_Choice", "percentage": r["percentage"]} for r in result_list if r["criterion"] not in ["choosing_the_first", "consistency_by_swapping"]]
    # combine the two results
    result_list.extend(res_1)
    # add the ACME
    tmp = compute_ACME(df_parsed)
    tmp = tmp.to_dict(orient="records")
    tmp = [{"criterion": r["criterion"] + "_ACME", "percentage": r["acme"]} for r in tmp]
    result_list.extend(tmp)

    for ix, dic in enumerate(result_list):
        dic.update(params)
    df = pd.DataFrame(result_list)
    df.to_csv(out_path, index=False)
    return df

for model_version in [LLAMA_3_8B, LLAMA_3_70B, GPT_4_OMNI_MINI,MISTRAL_7B ]:
    print(model_version)
    missing_lang[model_version] = []
    missing_query[model_version] = []
    overall_results = []
    for lang in LANGUAGES_PARA:
        new_performance_metrics(lang, model_version, system_role, translator_provider_forward, translator_provider_backward, None)
        file_path = performance_file_v2_tmpl.format(
                    model_version=get_model_name_path(model_version),
                    system_role=system_role,
                    lang=lang,
                    suffix=get_suffix(True, None),
                    translator_provider_forward=translator_provider_forward,
                    translator_provider_backward=translator_provider_backward,
                    analysis_backend_model_version=get_model_name_path(
                        analysis_backend_model_version
                    ),
                )
        
        out_path = pivot_file_tmpl.format(
            model_version=get_model_name_path(model_version),
            system_role=system_role,
            suffix=get_suffix(True, None),
            translator_provider_forward=translator_provider_forward,
            translator_provider_backward=translator_provider_backward,
            analysis_backend_model_version=get_model_name_path(
                analysis_backend_model_version
            ),
        )
        file_path = os.path.join(BASE_DIR, file_path)
        out_path = os.path.join(BASE_DIR, out_path)
        os.makedirs(os.path.dirname(out_path), exist_ok=True)
        # check if file exists
        if not os.path.exists(file_path):
            missing_lang[model_version].append(lang)
            # print(f"File {file_path} does not exist")
            query_in_path = cache_responses_tmpl.format(
                model_version=get_model_name_path(model_version),
                system_role=system_role,
                lang=lang,
                suffix=get_suffix(True, None),
                translator_provider_forward=translator_provider_forward,
                translator_provider_backward=translator_provider_backward,
                analysis_backend_model_version=get_model_name_path(
                    analysis_backend_model_version
                ),
            )
            query_in_path = os.path.join(BASE_DIR, query_in_path)
            if not os.path.exists(query_in_path):
                missing_query[model_version].append(lang)
            continue

        tmp = pd.read_csv(file_path).to_dict(orient="records")
        overall_results.extend(tmp)
    if overall_results == []:
        continue
    data[model_version] = overall_results
    df = pivot_df(pd.DataFrame(overall_results), differ_by="lang")
    df.to_csv(out_path, index=False)

meta-llama/Meta-Llama-3-8B-Instruct


neuralmagic/Meta-Llama-3-70B-Instruct-FP8
z-gpt-4o-mini-2024-07-18
mistralai/Mistral-7B-Instruct-v0.2


In [17]:
for key, value in missing_lang.items():
    if len(value) > 0:
        print(key, len(value), value)

meta-llama/Meta-Llama-3-8B-Instruct 1 ['my']
z-gpt-4o-mini-2024-07-18 11 ['fr', 'de', 'hi', 'ja', 'km', 'sw', 'ur', 'yo', 'zu', 'my', 'ug']
mistralai/Mistral-7B-Instruct-v0.2 14 ['ar', 'bn', 'zh-cn', 'fr', 'de', 'hi', 'ja', 'km', 'sw', 'ur', 'yo', 'zu', 'my', 'ug']


In [18]:
for key, value in missing_query.items():
    if len(value) > 0:
        print(key, len(value), value)

z-gpt-4o-mini-2024-07-18 11 ['fr', 'de', 'hi', 'ja', 'km', 'sw', 'ur', 'yo', 'zu', 'my', 'ug']
mistralai/Mistral-7B-Instruct-v0.2 11 ['fr', 'de', 'hi', 'ja', 'km', 'sw', 'ur', 'yo', 'zu', 'my', 'ug']


In [19]:
len(LANGUAGES_PARA)

15

In [20]:
from multi_tp.utils import LANGUAGES, COUNTRIES, performance_file_v2_tmpl, pivot_file_by_country_tmpl
len(LANGUAGES)

107