In [None]:
import glob
import pandas as pd
from util.evaluate_helper import get_experiments_md, extract_single_alphabet_answer

csv_files = glob.glob("results/*.csv")

for file_path in csv_files:
    df = pd.read_csv(file_path)
    df['pred'] = df.apply(extract_single_alphabet_answer, axis=1)
    df.to_csv(file_path, index=False)
    print(f"Processed {file_path}")

## CLIcK

### Open source models

In [None]:
dataset = "CLIcK"
csv_path_dict = {
    "Phi-3.5-MoE-instruct": f"results/[{dataset}] Phi-3-5-MoE-instruct.csv",
    "Phi-3.5-mini-instruct": f"results/[{dataset}] Phi-3-5-mini-instruct.csv",
    "Phi-3-mini-128k-instruct-June": f"results/[{dataset}] Phi-3-mini-128k-June.csv",
    "Llama-3.1-8B-Instruct": f"results/[{dataset}] llama-3-1-8b-instruct.csv",
}
print(get_experiments_md(dataset, csv_path_dict))

In [None]:
### Proprietary models

In [None]:
dataset = "CLIcK"
csv_path_dict = {
    "GPT-4o": f"results/[{dataset}] gpt-4o-240513.csv",
    "GPT-4o-mini": f"results/[{dataset}] gpt-4o-mini-240718.csv",
    "GPT-4-turbo": f"results/[{dataset}] gpt-4-turbo-240409.csv",
    "GPT-3.5-turbo": f"results/[{dataset}] gpt-35-turbo-230613.csv"
}
print(get_experiments_md(dataset, csv_path_dict))

## HAERAE 1.0

### Open source models

In [None]:
dataset = "HAERAE"
csv_path_dict = {
    "Phi-3.5-MoE-instruct": f"results/[{dataset}] Phi-3-5-MoE-instruct.csv",
    "Phi-3.5-mini-instruct": f"results/[{dataset}] Phi-3-5-mini-instruct.csv",
    "Phi-3-mini-128k-instruct-June": f"results/[{dataset}] Phi-3-mini-128k-June.csv",
    "Llama-3.1-8B-Instruct": f"results/[{dataset}] llama-3-1-8b-instruct.csv",
}
print(get_experiments_md(dataset, csv_path_dict))

### Proprietary models

In [None]:
dataset = "HAERAE"
csv_path_dict = {
    "GPT-4o": f"results/[{dataset}] gpt-4o-240513.csv",
    "GPT-4o-mini": f"results/[{dataset}] gpt-4o-mini-240718.csv",
    "GPT-4-turbo": f"results/[{dataset}] gpt-4-turbo-240409.csv",
    "GPT-3.5-turbo": f"results/[{dataset}] gpt-35-turbo-230613.csv"
}
print(get_experiments_md(dataset, csv_path_dict))

## KMMLU

### Open source models

#### zero-shot

In [None]:
dataset = "KMMLU"
csv_path_dict = {
    "Phi-3.5-MoE-instruct": f"results/[{dataset}] Phi-3-5-MoE-instruct.csv",
    "Phi-3.5-mini-instruct": f"results/[{dataset}] Phi-3-5-mini-instruct.csv",
    "Phi-3-mini-128k-instruct-June": f"results/[{dataset}] Phi-3-mini-128k-June.csv",
    "Llama-3.1-8B-Instruct": f"results/[{dataset}] llama-3-1-8b-instruct.csv",
}
print(get_experiments_md(dataset, csv_path_dict))

#### 5-shot

In [None]:
dataset = "KMMLU"
postfix = "5shot"
csv_path_dict = {
    "Phi-3.5-MoE-instruct": f"results/[{dataset}] Phi-3-5-MoE-instruct-{postfix}.csv",
    "Phi-3.5-mini-instruct": f"results/[{dataset}] Phi-3-5-mini-instruct-{postfix}.csv",
    "Phi-3-mini-128k-instruct-June": f"results/[{dataset}] Phi-3-mini-128k-June-{postfix}.csv",
    "Llama-3.1-8B-Instruct": f"results/[{dataset}] llama-3-1-8b-instruct-{postfix}.csv",
}
print(get_experiments_md(dataset, csv_path_dict))

### Proprietary models

#### zero-shot

In [None]:
dataset = "KMMLU"
csv_path_dict = {
    "GPT-4o": f"results/[{dataset}] gpt-4o-240513.csv",
    "GPT-4o-mini": f"results/[{dataset}] gpt-4o-mini-240718.csv",
    "GPT-4-turbo": f"results/[{dataset}] gpt-4-turbo-240409.csv",
    "GPT-3.5-turbo": f"results/[{dataset}] gpt-35-turbo-230613.csv"
}
get_experiments_md(dataset, csv_path_dict)

#### 5-shot

In [None]:
dataset = "KMMLU"
postfix = "5shot"
csv_path_dict = {
    "GPT-4o": f"results/[{dataset}] gpt-4o-240513-{postfix}.csv",
    "GPT-4o-mini": f"results/[{dataset}] gpt-4o-mini-240718-{postfix}.csv",
    "GPT-4-turbo": f"results/[{dataset}] gpt-4-turbo-240409-{postfix}.csv",
    "GPT-3.5-turbo": f"results/[{dataset}] gpt-35-turbo-230613-{postfix}.csv"
}
print(get_experiments_md(dataset, csv_path_dict))

## KMMLU-HARD

### Open source models

#### zero-shot

In [None]:
dataset = "KMMLU-HARD"
csv_path_dict = {
    "Phi-3.5-MoE-instruct": f"results/[{dataset}] Phi-3-5-MoE-instruct.csv",
    "Phi-3.5-mini-instruct": f"results/[{dataset}] Phi-3-5-mini-instruct.csv",
    "Phi-3-mini-128k-instruct-June": f"results/[{dataset}] Phi-3-mini-128k-June.csv",
    "Llama-3.1-8B-Instruct": f"results/[{dataset}] llama-3-1-8b-instruct.csv",
}
print(get_experiments_md(dataset, csv_path_dict))

#### 5-shot

In [None]:
dataset = "KMMLU-HARD"
postfix = "5shot"
csv_path_dict = {
    "Phi-3.5-MoE-instruct": f"results/[{dataset}] Phi-3-5-MoE-instruct-{postfix}.csv",
    "Phi-3.5-mini-instruct": f"results/[{dataset}] Phi-3-5-mini-instruct-{postfix}.csv",
    "Phi-3-mini-128k-instruct-June": f"results/[{dataset}] Phi-3-mini-128k-June-{postfix}.csv",
    "Llama-3.1-8B-Instruct": f"results/[{dataset}] llama-3-1-8b-instruct-{postfix}.csv",
}
print(get_experiments_md(dataset, csv_path_dict))

### Proprietary models

#### zero-shot

In [None]:
dataset = "KMMLU-HARD"
csv_path_dict = {
    "GPT-4o": f"results/[{dataset}] gpt-4o-240513.csv",
    "GPT-4o-mini": f"results/[{dataset}] gpt-4o-mini-240718.csv",
    "GPT-4-turbo": f"results/[{dataset}] gpt-4-turbo-240409.csv",
    "GPT-3.5-turbo": f"results/[{dataset}] gpt-35-turbo-230613.csv"
}
print(get_experiments_md(dataset, csv_path_dict))

#### 5-shot

In [None]:
dataset = "KMMLU-HARD"
postfix = "5shot"
csv_path_dict = {
    "GPT-4o": f"results/[{dataset}] gpt-4o-240513-{postfix}.csv",
    "GPT-4o-mini": f"results/[{dataset}] gpt-4o-mini-240718-{postfix}.csv",
    "GPT-4-turbo": f"results/[{dataset}] gpt-4-turbo-240409-{postfix}.csv",
    "GPT-3.5-turbo": f"results/[{dataset}] gpt-35-turbo-230613-{postfix}.csv"
}
print(get_experiments_md(dataset, csv_path_dict))