In [70]:
import pandas as pd

df = pd.read_json('data_4072.json', orient='records')

In [71]:
df.columns

Index(['eval_name', 'Precision', 'Type', 'T', 'Weight type', 'Architecture',
       'Model', 'fullname', 'Model sha', 'Average ⬆️', 'Hub License', 'Hub ❤️',
       '#Params (B)', 'Available on the hub', 'MoE', 'Flagged',
       'Chat Template', 'CO₂ cost (kg)', 'IFEval Raw', 'IFEval', 'BBH Raw',
       'BBH', 'MATH Lvl 5 Raw', 'MATH Lvl 5', 'GPQA Raw', 'GPQA', 'MUSR Raw',
       'MUSR', 'MMLU-PRO Raw', 'MMLU-PRO', 'Merged', 'Official Providers',
       'Upload To Hub Date', 'Submission Date', 'Generation', 'Base Model'],
      dtype='object')

In [134]:
df["T"].unique()

array(['💬', '🟢', '🔶', '🤝', '🟩', '🌸', '❓'], dtype=object)

In [91]:
df["Type"].unique()

array(['💬 chat models (RLHF, DPO, IFT, ...)', '🟢 pretrained',
       '🔶 fine-tuned on domain-specific datasets',
       '🤝 base merges and moerges', '🟩 continuously pretrained',
       '🌸 multimodal', '❓ other'], dtype=object)

In [72]:
#in df["Hub License"] if it is 

In [138]:
import math
def get_rank(num_criteria = [], confident=0.5, num_top_model=5, str_criteria = []):
    """
    The function will rank the models based on selected criteria. If the criteria is empty, we will take the average of all the metrics.
    @param criteria: The metrics that is most important for this specific tasks. If it is None, we will take the average of these matrics.
    @param confident: how much confident user think num_criteria and str_criteria are correct in range of 0-1.
    @param num_top_model: The number of top models we want to show.
    @param str_criteria: The criteria 

    @return: The rank of the models.
    """
    assert type(num_criteria) == list, "criteria should be a list"
    # matrices = ["MMLU-PRO", "MUSR", "GPQA", "MATH Lvl 5", "BBH", "IFEval", "CO₂ cost (kg)", "Flagged", "MoE", "#Params (B)", "Hub ❤️"]
    matrices = ["MMLU-PRO", "MUSR", "GPQA", "MATH Lvl 5", "BBH", "IFEval", "CO₂ cost (kg)", "Flagged", "MoE", "#Params (B)", "Hub ❤️"]
    for c in num_criteria:
        assert c in matrices, f"criteria {c} is not allowed. Note that it can only take one of the following values: ['MMLU-PRO', 'MUSR', 'GPQA', 'MATH Lvl 5', 'BBH', 'IFEval', 'CO₂ cost (kg)', 'Flagged', 'MoE', '#Params (B)', 'Hub ❤️']" 
    assert type(confident) == float, "weight should be a float"
    assert 0 <= confident <= 1, "weight should be between 0 and 1"
 
    assert type(str_criteria) == list, "str_criteria should be a list"
    for c in str_criteria:
        assert type(c) == tuple, "Entry of the str_criteria should be a tuple"
        assert c[0] in df.columns, f"criteria {c[0]} is not allowed. Note that it can only take one of the following values: {df.columns}"
        assert c[1] in df[c[0]].unique(), f"criteria {c[1]} is not allowed for {c[0]}. Note that it can only take one of the following values: {df[c[0]].unique()}"

    MAX_SCORE = 100
    

    this_df = df.copy()
    this_df["new_score"] = 0    

    #add more condition
    if len(num_criteria) == 0: 
        this_df["new_score"] = df["Average ⬆️"]
    #add a new column new_score that get the average of the selected criteria in df 
    else:
        
        for c in num_criteria:
            this_df["new_score"] += this_df[c]
        
        for s_tuple in str_criteria:
        #     if s_tuple[1] == this_df[s_tuple[0]]:
        #         this_df["str_score"] += MAX_SCORE
            this_df.loc[this_df[s_tuple[0]] == s_tuple[1], "new_score"] += MAX_SCORE
        this_df["new_score"] = this_df["new_score"] / (len(num_criteria) + len(str_criteria))
        this_df["new_score"] = this_df["new_score"] * confident + df["Average ⬆️"] * (1 - confident)

    this_df = this_df.sort_values(by=["new_score"], ascending=False)

    
    return this_df["eval_name"][:num_top_model]





    


    

### Ranking of tech Industry

In [140]:
#tech industry
tech_cri = ["#Params (B)"]

tech_industry_models = [("Architecture", "GPTNeoXForCausalLM"), ("Architecture", "LlamaForCausalLM"), ("Architecture", "Qwen2ForCausalLM"), ("Architecture", "Qwen2MoeForCausalLM"), ("Architecture","T5ForConditionalGeneration"), ("Architecture", "CohereForCausalLM"), ("Architecture", "GPTJForCausalLM")]
print(get_rank(tech_cri, confident=0.5, num_top_model=5, str_criteria = tech_industry_models))

1040        MaziyarPanahi_calme-3.2-instruct-78b_bfloat16
1036        MaziyarPanahi_calme-3.1-instruct-78b_bfloat16
2219              dfurman_CalmeRys-78B-Orpo-v0.1_bfloat16
1030             MaziyarPanahi_calme-2.4-rys-78b_bfloat16
2519    huihui-ai_Qwen2.5-72B-Instruct-abliterated_bfl...
Name: eval_name, dtype: object


### Ranking of legal industry

In [141]:
#Legal industry
legal_cri = ["MMLU-PRO", ]
print(get_rank(legal_cri, confident=0.5, num_top_model=5))

1040    MaziyarPanahi_calme-3.2-instruct-78b_bfloat16
1036    MaziyarPanahi_calme-3.1-instruct-78b_bfloat16
2219          dfurman_CalmeRys-78B-Orpo-v0.1_bfloat16
1030         MaziyarPanahi_calme-2.4-rys-78b_bfloat16
3267         newsbang_Homer-v1.0-Qwen2.5-72B_bfloat16
Name: eval_name, dtype: object


### Manufacture

In [148]:
manufac_num_cri = ["MMLU-PRO", "#Params (B)"]
best_architecture_for_manufacturing = [("Architecture", "LlamaForCausalLM"), ("Architecture", "GPTJForCausalLM"), ("Architecture", "CohereForCausalLM"), ("Architecture", "T5ForConditionalGeneration"), ("Architecture", "RwkvForCausalLM")]
#create another list that contain everything in best_architecture_for_manufacturing and ("Type", "🔶 fine-tuned on domain-specific datasets")
find_tune = [("Type", "🔶 fine-tuned on domain-specific datasets")]
manu_str_cri = best_architecture_for_manufacturing + find_tune

print(get_rank(manufac_num_cri, confident=0.5, num_top_model=5, str_criteria=manu_str_cri))

2250                   dnhkng_RYS-Llama3.1-Large_bfloat16
1696    VAGOsolutions_Llama-3.1-SauerkrautLM-70b-Instr...
1040        MaziyarPanahi_calme-3.2-instruct-78b_bfloat16
1426                   Sao10K_70B-L3.3-Cirrus-x1_bfloat16
218           Daemontatox_Llama3.3-70B-CogniLink_bfloat16
Name: eval_name, dtype: object


### Customer service

In [149]:
cs_num_cri = ["IFEval", "MMLU-PRO"]
cs_str_cri = [("T", "💬"), ("Type", "💬 chat models (RLHF, DPO, IFT, ...)")]

print(get_rank(cs_num_cri, confident=0.5, num_top_model=5, str_criteria=cs_str_cri))


1036    MaziyarPanahi_calme-3.1-instruct-78b_bfloat16
2219          dfurman_CalmeRys-78B-Orpo-v0.1_bfloat16
1030         MaziyarPanahi_calme-2.4-rys-78b_bfloat16
1314               Qwen_Qwen2.5-72B-Instruct_bfloat16
1013     MaziyarPanahi_calme-2.1-qwen2.5-72b_bfloat16
Name: eval_name, dtype: object
