In [1]:
import pandas as pd

df = pd.read_json('data_4072.json', orient='records')

In [2]:
df.columns

Index(['eval_name', 'Precision', 'Type', 'T', 'Weight type', 'Architecture',
       'Model', 'fullname', 'Model sha', 'Average ⬆️', 'Hub License', 'Hub ❤️',
       '#Params (B)', 'Available on the hub', 'MoE', 'Flagged',
       'Chat Template', 'CO₂ cost (kg)', 'IFEval Raw', 'IFEval', 'BBH Raw',
       'BBH', 'MATH Lvl 5 Raw', 'MATH Lvl 5', 'GPQA Raw', 'GPQA', 'MUSR Raw',
       'MUSR', 'MMLU-PRO Raw', 'MMLU-PRO', 'Merged', 'Official Providers',
       'Upload To Hub Date', 'Submission Date', 'Generation', 'Base Model'],
      dtype='object')

In [3]:
df['Available on the hub']

0        True
1        True
2        True
3        True
4        True
        ...  
4067    False
4068    False
4069     True
4070     True
4071     True
Name: Available on the hub, Length: 4072, dtype: bool

In [4]:
df["T"].unique()

array(['💬', '🟢', '🔶', '🤝', '🟩', '🌸', '❓'], dtype=object)

In [5]:
df["Type"].unique()

array(['💬 chat models (RLHF, DPO, IFT, ...)', '🟢 pretrained',
       '🔶 fine-tuned on domain-specific datasets',
       '🤝 base merges and moerges', '🟩 continuously pretrained',
       '🌸 multimodal', '❓ other'], dtype=object)

In [6]:
#in df["Hub License"] if it is 

In [31]:

def get_rank(num_criteria = [], confident=0.5, num_top_model=5, str_criteria = [], range_matrices = {},rank_reversed = False):
    """
    The function will rank the models based on selected criteria. If the criteria is empty, we will take the average of all the metrics.
    @param criteria: The metrics that is most important for this specific tasks. If it is None, we will take the average of these matrics.
    @param confident: how much confident user think num_criteria and str_criteria are correct in range of 0-1.
    @param num_top_model: The number of top models we want to show.
    @param str_criteria: The criteria 
    @param range_matrices: The range of the criteria that we want to consider, where the key are matrices and 
    @param rank_reversed: If True, the rank will be reversed.

    @return: The rank of the models.
    """
    assert type(num_criteria) == list, "criteria should be a list"
    # matrices = ["MMLU-PRO", "MUSR", "GPQA", "MATH Lvl 5", "BBH", "IFEval", "CO₂ cost (kg)", "Flagged", "MoE", "#Params (B)", "Hub ❤️"]
    matrices = ["MMLU-PRO", "MUSR", "GPQA", "MATH Lvl 5", "BBH", "IFEval", "CO₂ cost (kg)", "Flagged", "MoE", "#Params (B)", "Hub ❤️"]
    for c in num_criteria:
        assert c in matrices, f"criteria {c} is not allowed. Note that it can only take one of the following values: ['MMLU-PRO', 'MUSR', 'GPQA', 'MATH Lvl 5', 'BBH', 'IFEval', 'CO₂ cost (kg)', 'Flagged', 'MoE', '#Params (B)', 'Hub ❤️']" 
    assert type(confident) == float, "confident should be a float"
    assert 0 <= confident <= 1, "confident should be between 0 and 1"
    assert isinstance(range_matrices, dict), "range_matrices should be a dict"
    assert type(rank_reversed) == bool, "rank_reversed should be a bool"
 
    assert type(str_criteria) == list, "str_criteria should be a list"
    for c in str_criteria:
        assert type(c) == tuple, "Entry of the str_criteria should be a tuple"
        assert c[0] in df.columns, f"criteria {c[0]} is not allowed. Note that it can only take one of the following values: {df.columns}"
        assert c[1] in df[c[0]].unique(), f"criteria {c[1]} is not allowed for {c[0]}. Note that it can only take one of the following values: {df[c[0]].unique()}"

    for matric, range in range_matrices.items():
        assert matric in matrices, f"matric {matric} is not allowed. Note that it can only take one of the following values: {matrices}"
        assert isinstance(range, tuple), "Entry of the range_matrices should be a tuple"
        assert len(range) == 2, "Entry of the range_matrices should be a tuple of length 2"
        assert (isinstance(range[0], float) or isinstance(range[0], int)) and (isinstance(range[1], float) or isinstance(range[1], int)), "Entry of the range_matrices should be a tuple of number"
        assert range[0] < range[1], "Entry of the range_matrices should be a tuple of floats where the first element is smaller than the second element"
    MAX_SCORE = 100
    

    this_df = df.copy()
    this_df["new_score"] = 0    

    #add more condition
    if len(num_criteria) == 0: 
        this_df["new_score"] = df["Average ⬆️"]
    #add a new column new_score that get the average of the selected criteria in df 
    else:
        
        for c in num_criteria:
            this_df["new_score"] += this_df[c]
        
        for s_tuple in str_criteria:
        #     if s_tuple[1] == this_df[s_tuple[0]]:
        #         this_df["str_score"] += MAX_SCORE
            this_df.loc[this_df[s_tuple[0]] == s_tuple[1], "new_score"] += MAX_SCORE
        this_df["new_score"] = this_df["new_score"] / (len(num_criteria) + len(str_criteria))
        this_df["new_score"] = this_df["new_score"] * confident + df["Average ⬆️"] * (1 - confident)
    
    for matric, range in range_matrices.items():
        this_df.loc[this_df[matric] < range[0], "new_score"] = 0
        this_df.loc[this_df[matric] > range[1], "new_score"] = 0


    if rank_reversed:
        this_df = this_df.sort_values(by=["new_score"], ascending=True)
    else:
        this_df = this_df.sort_values(by=["new_score"], ascending=False)
    
    

    
    return this_df["eval_name"][:num_top_model]





    


    

### Ranking of tech Industry

In [46]:
#tech industry
tech_cri = ["#Params (B)", "MUSR"]
tech_industry_models = [("Architecture", "GPTNeoXForCausalLM"), ("Architecture", "LlamaForCausalLM"), ("Architecture", "Qwen2ForCausalLM"), ("Architecture", "Qwen2MoeForCausalLM"), ("Architecture","T5ForConditionalGeneration"), ("Architecture", "CohereForCausalLM"), ("Architecture", "GPTJForCausalLM")]
print(get_rank(tech_cri, confident=0.5, num_top_model=3, str_criteria = tech_industry_models))

1040    MaziyarPanahi_calme-3.2-instruct-78b_bfloat16
1036    MaziyarPanahi_calme-3.1-instruct-78b_bfloat16
2219          dfurman_CalmeRys-78B-Orpo-v0.1_bfloat16
Name: eval_name, dtype: object


## 20% confident - tech 

In [38]:
print(get_rank(tech_cri, confident=0.2, num_top_model=5, str_criteria = tech_industry_models))

1040        MaziyarPanahi_calme-3.2-instruct-78b_bfloat16
1036        MaziyarPanahi_calme-3.1-instruct-78b_bfloat16
2219              dfurman_CalmeRys-78B-Orpo-v0.1_bfloat16
1030             MaziyarPanahi_calme-2.4-rys-78b_bfloat16
2519    huihui-ai_Qwen2.5-72B-Instruct-abliterated_bfl...
Name: eval_name, dtype: object


### Academic

In [44]:
academic_num_cri = ["MUSR", "MATH Lvl 5", "GPQA"]
print(get_rank(academic_num_cri, confident=0.5, num_top_model=3))

1040    MaziyarPanahi_calme-3.2-instruct-78b_bfloat16
2219          dfurman_CalmeRys-78B-Orpo-v0.1_bfloat16
1036    MaziyarPanahi_calme-3.1-instruct-78b_bfloat16
Name: eval_name, dtype: object


### 20% confident academic

In [39]:
print(get_rank(academic_num_cri, confident=0.2, num_top_model=5))

1040        MaziyarPanahi_calme-3.2-instruct-78b_bfloat16
2219              dfurman_CalmeRys-78B-Orpo-v0.1_bfloat16
1036        MaziyarPanahi_calme-3.1-instruct-78b_bfloat16
1030             MaziyarPanahi_calme-2.4-rys-78b_bfloat16
2519    huihui-ai_Qwen2.5-72B-Instruct-abliterated_bfl...
Name: eval_name, dtype: object


### Ranking of legal industry

In [62]:
#Legal industry
legal_cri = ["MMLU-PRO", "BBH"]
print(get_rank(legal_cri, confident=0.1, num_top_model=5))

1040    MaziyarPanahi_calme-3.2-instruct-78b_bfloat16
1036    MaziyarPanahi_calme-3.1-instruct-78b_bfloat16
2219          dfurman_CalmeRys-78B-Orpo-v0.1_bfloat16
1030         MaziyarPanahi_calme-2.4-rys-78b_bfloat16
1314               Qwen_Qwen2.5-72B-Instruct_bfloat16
Name: eval_name, dtype: object


### 20% confident legal


In [63]:
print(get_rank(legal_cri, confident=0.9, num_top_model=5))

1040    MaziyarPanahi_calme-3.2-instruct-78b_bfloat16
1036    MaziyarPanahi_calme-3.1-instruct-78b_bfloat16
1030         MaziyarPanahi_calme-2.4-rys-78b_bfloat16
2219          dfurman_CalmeRys-78B-Orpo-v0.1_bfloat16
3267         newsbang_Homer-v1.0-Qwen2.5-72B_bfloat16
Name: eval_name, dtype: object


### Manufacture

In [64]:
manufac_num_cri = ["MMLU-PRO", "#Params (B)", "BBH", "IFEval"]
best_architecture_for_manufacturing = [("Architecture", "LlamaForCausalLM"), ("Architecture", "GPTJForCausalLM"), ("Architecture", "CohereForCausalLM"), 
                                       ("Architecture", "T5ForConditionalGeneration"), ("Architecture", "RwkvForCausalLM")]
find_tune = [("Type", "🔶 fine-tuned on domain-specific datasets")]
manu_str_cri = best_architecture_for_manufacturing + find_tune

print(get_rank(manufac_num_cri, confident=0.5, num_top_model=3, str_criteria=manu_str_cri, range_matrices = {"CO₂ cost (kg)": (0, 8), "#Params (B)": (0, 10)}))

2282       ehristoforu_falcon3-ultraset_float16
3875    unsloth_phi-4-unsloth-bnb-4bit_bfloat16
3874            unsloth_phi-4-bnb-4bit_bfloat16
Name: eval_name, dtype: object


### 20% confident manufacture

In [56]:
print(get_rank(manufac_num_cri, confident=0.1, num_top_model=3, str_criteria=manu_str_cri))

1040    MaziyarPanahi_calme-3.2-instruct-78b_bfloat16
1036    MaziyarPanahi_calme-3.1-instruct-78b_bfloat16
2219          dfurman_CalmeRys-78B-Orpo-v0.1_bfloat16
Name: eval_name, dtype: object


### Customer service

In [60]:
cs_num_cri = ["IFEval", "MMLU-PRO"]
cs_str_cri = [("T", "💬"), ("Type", "💬 chat models (RLHF, DPO, IFT, ...)")]

print(get_rank(cs_num_cri, confident=0.8, num_top_model=10, str_criteria=cs_str_cri))


1036    MaziyarPanahi_calme-3.1-instruct-78b_bfloat16
2219          dfurman_CalmeRys-78B-Orpo-v0.1_bfloat16
1030         MaziyarPanahi_calme-2.4-rys-78b_bfloat16
1013     MaziyarPanahi_calme-2.1-qwen2.5-72b_bfloat16
1314               Qwen_Qwen2.5-72B-Instruct_bfloat16
1020     MaziyarPanahi_calme-2.2-qwen2.5-72b_bfloat16
3049       meta-llama_Llama-3.3-70B-Instruct_bfloat16
1310               Qwen_Qwen2.5-32B-Instruct_bfloat16
2343           fluently-lm_FluentlyLM-Prinum_bfloat16
3099    mistralai_Mistral-Large-Instruct-2411_float16
Name: eval_name, dtype: object


### customer service - 20% confident

In [61]:

print(get_rank(cs_num_cri, confident=0.2, num_top_model=10, str_criteria=cs_str_cri))

1036    MaziyarPanahi_calme-3.1-instruct-78b_bfloat16
2219          dfurman_CalmeRys-78B-Orpo-v0.1_bfloat16
1030         MaziyarPanahi_calme-2.4-rys-78b_bfloat16
1314               Qwen_Qwen2.5-72B-Instruct_bfloat16
1013     MaziyarPanahi_calme-2.1-qwen2.5-72b_bfloat16
1020     MaziyarPanahi_calme-2.2-qwen2.5-72b_bfloat16
2343           fluently-lm_FluentlyLM-Prinum_bfloat16
2296         ehristoforu_qwen2.5-test-32b-it_bfloat16
1310               Qwen_Qwen2.5-32B-Instruct_bfloat16
3099    mistralai_Mistral-Large-Instruct-2411_float16
Name: eval_name, dtype: object


### Rank by Co2

In [52]:
co2_cri = ["CO₂ cost (kg)"]
print("ranking for co2 with range(10, 10000)")
print(get_rank(co2_cri, confident=1.0, rank_reversed=True, num_top_model=3,range_matrices = {"CO₂ cost (kg)": (10, 10000)}))

print("normal co2 rank")
print(get_rank(co2_cri, confident=1.0, num_top_model=3, rank_reversed=True))


ranking for co2 with range(10, 10000)
0              0-hero_Matter-0.2-7B-DPO_bfloat16
2674    jaspionjader_Kosmos-EVAA-v12-8B_bfloat16
2675     jaspionjader_Kosmos-EVAA-v2-8B_bfloat16
Name: eval_name, dtype: object
normal co2 rank
2436                           gpt2_float16
2191     cpayne1303_llama-43m-beta_bfloat16
2190    cpayne1303_cp2024-instruct_bfloat16
Name: eval_name, dtype: object
