# Analysis Notebook

In [3]:
import pandas as pd
from pathlib import Path

In [4]:
DATA_DIR = Path("../plots")

In [50]:
leaderboard = pd.read_csv(DATA_DIR / "Aya_RM_Benchmarks - full.csv")
top_ten = leaderboard.sort_values(by="Avg_Multilingual", ascending=False).reset_index(drop=True).head(10)["Model"].to_list()
model_type = leaderboard.sort_values(by="Avg_Multilingual", ascending=False).reset_index(drop=True).head(10)["Model_Type"].to_list()

In [41]:
def sort_dataframe_by_order(df, column_name, order_list):
    """
    Filter and sort a DataFrame based on a predefined order of values in a specified column.
    
    Parameters:
    df (pd.DataFrame): Input DataFrame
    column_name (str): Name of the column to sort by
    order_list (list): List of values defining the desired order
    
    Returns:
    pd.DataFrame: Filtered and sorted DataFrame
    """
    # Filter and sort
    df_result = df[df[column_name].isin(order_list)].copy()
    order_dict = {val: index for index, val in enumerate(order_list)}
    df_result['sort_order'] = df_result[column_name].map(order_dict)
    return df_result.sort_values('sort_order').drop('sort_order', axis=1).reset_index(drop=True).rename(columns={column_name: "Model"})

In [46]:
results_category = {
    "chat": sort_dataframe_by_order(pd.read_csv(DATA_DIR / "chat_leaderboard.csv"), column_name="Unnamed: 0", order_list=top_ten),
    "chat_hard":sort_dataframe_by_order(pd.read_csv(DATA_DIR / "chat-hard_leaderboard.csv"), column_name="Unnamed: 0", order_list=top_ten),
    "safety":sort_dataframe_by_order(pd.read_csv(DATA_DIR / "safety_leaderboard.csv"), column_name="Unnamed: 0", order_list=top_ten),
    "reasoning":sort_dataframe_by_order(pd.read_csv(DATA_DIR / "reasoning_leaderboard.csv"), column_name="Unnamed: 0", order_list=top_ten),
}

In [58]:
delta_df = {
    "Model": top_ten,
    "Model_Type": model_type,
}
for category, results_df in results_category.items():
    results_df["Avg"] = results_df["Avg"] * 100
    results_df["eng_Latn"] = results_df["eng_Latn"] * 100
    results_df["delta"] = results_df["Avg"] - results_df["eng_Latn"]
    delta_df[category] = results_df["delta"].to_list()

In [59]:
pd.DataFrame(delta_df)

Unnamed: 0,Model,Model_Type,chat,chat_hard,safety,reasoning
0,openai/gpt-4-turbo-2024-04-09,Generative RM,-1.546242,-3.545359,-3.218002,0.844814
1,openai/gpt-4o-2024-05-13,Generative RM,-2.760678,-5.991787,-4.148092,-2.834079
2,google/gemma-2-9b-it,Generative RM,-0.581029,-6.465797,-4.774244,-0.624124
3,LxzGordon/URM-LLaMa-3.1-8B,Sequence Classifier,,,,
4,meta-llama/Meta-Llama-3.1-70B-Instruct,Generative RM,-1.815071,-11.615303,-8.513508,-2.865601
5,meta-llama/Meta-Llama-3-70B-Instruct,Generative RM,-2.388924,-9.046439,2.899366,-2.099771
6,CIR-AMS/BTRM_Qwen2_7b_0613,Sequence Classifier,,,,
7,cohere/command-r-plus-08-2024,Generative RM,,,,
8,allenai/tulu-2-dpo-13b,DPO,-20.385063,-2.337985,-11.46082,1.036116
9,cohere/c4ai-aya-23-35b,Generative RM,,,,
