# Analysis

## Postprocessing

### Setup combined table


In [None]:
engines = ["llamafile"]
models = ["mistral", "dolphin-2.6-mistral", "dolphin-2.6-phi-2", "mixtral", "llama3", "phi-3"]
templates = ["LIT", "controlled", "controlled_md", "via_description", "via_description_1_shot"]
datasets = ["codenet", "avatar", "evalplus", "basicbench", "bithacks"]

pp_steps = ['MARKDOWN_CODEBLOCKS', 'NO_MARKDOWN', 'MISSING_MD_START', 'MISSING_MD_END', 'CODE_HEURISTIC', 'NATURAL_TEXT', 'ESCAPED_UNDERSCORES', 'MD_IN_CODE']
pp_errors = ['NESTED', 'CODE_FENCE_IN_CODE']

In [None]:
import os
from pathlib import Path

def list_json_files(directory: str | Path):
    json_files = []
    for root, _, files in os.walk(directory):
        for filename in files:
            if filename.endswith(".json"):
                json_file = os.path.join(root, filename)
                json_files.append(json_file)
    return json_files


In [None]:
import os
import json
import pandas as pd
from pathlib import Path
from codetransbench.utils.config import load_config
path_to_config = "../codetransbenchmark/config/config.yaml"
if path_to_config:
    config = load_config(path_to_config)
else:
    config = load_config()

pp_reports_dir = config.postprocessing_reports_dir

def build_combined_postprocessing_table(pp_reports_dir: str | Path):
    report_files = list_json_files(pp_reports_dir)
    # print(report_files)

    df_list = []

    for report_f in report_files:
        # postprocessing_reports/engine_model_template_attempt/jsonfilename
        run_id = report_f.split(os.path.sep)[-2]
        run_info = run_id.split("_")
        if len(run_info) == 3:
            continue
        engine = run_info[0]
        model = run_info[1]
        template = "_".join(run_info[2:-1])
        attempt = int(run_info[-1])
        # directory/pp_report_dataset.json
        dataset = os.path.splitext(os.path.basename(report_f))[0].split("_")[-1]

        # print(engine, model, template, attempt, dataset) 

        with open(report_f, 'r') as f:
            data = json.load(f)

        pp_steps_statistic = dict()
        pp_errors_statistic = dict()

        data_list = []
        for file in data:
            file_info = {}
            file_info["file_id"] = file
            # file_info["report"] = data[file]

            info_list = file.split("_")
            file_info["source_pl"] = info_list[1]
            file_info["target_pl"] = info_list[2]
            file_info["filename"] = "_".join(info_list[3:])

            for step in data[file]['pp_steps']:
                file_info[step] = 1
                if step not in pp_steps_statistic.keys():
                    pp_steps_statistic[step] = 1
                else:
                    pp_steps_statistic[step] += 1

                # if step in ["NATURAL_TEXT", "NO_MARKDOWN", "CODE_HEURISTIC"]:
                #     print(step, file)
        
            for error in data[file]['pp_errors']:
                file_info[error] = 1
                if error not in pp_errors_statistic.keys():
                    pp_errors_statistic[error] = 1
                else:
                    pp_errors_statistic[error] += 1
                # print(error, file)

            data_list.append(file_info)

        df = pd.json_normalize(data_list)
        df["engine"] = engine
        df["model"] = model
        df["template"] = template
        df["attempt"] = attempt
        df["dataset"] = dataset

        df_list.append(df)

    combined_table = pd.concat(df_list)
    combined_table.reset_index(drop=True, inplace=True)
    return combined_table

combined_table = build_combined_postprocessing_table(pp_reports_dir)
ex = combined_table.sample(10)
ex


### General statistic

In [None]:
ex[["model"] + pp_steps + pp_errors + ["filename"]].groupby(["model"]).count()

In [None]:
def create_count_table(grouping_features: str | list[str], filter_query: str=None) -> pd.DataFrame:
    combined_table = build_combined_postprocessing_table(pp_reports_dir)
    if filter_query:
        combined_table = combined_table.query(filter_query)
    if not isinstance(grouping_features, list):
        grouping_features = [grouping_features]
    count_table = combined_table[grouping_features + pp_steps + pp_errors + ["filename"]].groupby(grouping_features).count()
    total = count_table.sum()
    if len(grouping_features) == 1:
        total_index = "Total"
    else:
        total_index = tuple(["Total"] + ["-"] * (len(grouping_features) - 1))
    count_table.loc[total_index, count_table.columns] = total
    count_table.rename({"filename": "Total Files"}, axis="columns", inplace=True)
    count_table = count_table.convert_dtypes()
    return count_table

def create_percentage_table(count_table: pd.DataFrame)-> pd.DataFrame:
    percentage_table = count_table[pp_steps + pp_errors].div(count_table["Total Files"], axis=0)
    return percentage_table * 100


In [None]:
count_per_model = create_count_table("model", "attempt == 1 and template == 'controlled_md' and model != 'dolphincoder-starcoder2-15b'")
count_per_model

In [None]:
# not interesting
# count_per_model.plot(kind='bar')

In [None]:
from matplotlib.ticker import PercentFormatter
pd.options.plotting.backend = "matplotlib"
import matplotlib.pyplot as plt
plt.style.use("thesis_style_sheet_big.mplstyle")
#plt.style.use("default")

def make_bar_plot(graph_data: pd.DataFrame, percentage=True, title: str | None = None, xlabel=None, ylabel=None, bar_labels=False, xrot=0, ylim=100, label_col=5):

    ax = graph_data.plot(kind='bar', title=title, xlabel=xlabel, ylabel=ylabel)
    ax.legend(bbox_to_anchor=(0, 1.02, 1, 0.2), loc="lower left",
                mode="expand", borderaxespad=0, ncol=label_col)
    ax.tick_params(axis='x', labelrotation=xrot)
    if percentage:
        ax.set_ylim([0, ylim])
        ax.yaxis.set_major_formatter(PercentFormatter())
    if bar_labels:
        for container in ax.containers:
            ax.bar_label(container, fmt='%.2f')

In [None]:
percentage_per_model = create_percentage_table(count_per_model).rename(index={"codestral": "Codestral", "dolphin-2.6-mistral": "D-Mistral", "dolphin-2.6-phi-2": "D-Phi-2", "dolphin-2.7-mixtral": "D-Mixtral", "llama3-8b": "Llama 3", "phi3": "Phi-3", "mistral": "Mistral", "controlled_md": "MD", "via_description": "VT", "mixtral": "Mixtral", "controlled": "RM"})
# percentage_per_model.transpose().plot(kind="pie", subplots=True, figsize=(40, 20), legend=True)
make_bar_plot(percentage_per_model.filter([col for col in pp_steps + pp_errors if col not in ['MARKDOWN_CODEBLOCKS', 'MISSING_MD_START']]), xrot=90, ylim=None, label_col=3, xlabel="Model")

In [None]:
percentage_per_model.transpose()

In [None]:
combined_table[pp_steps + pp_errors].count()

In [None]:
count_per_template = create_count_table("template")
count_per_template

In [None]:
percentage_per_template = create_percentage_table(count_per_template)
percentage_per_template#.plot(kind="pie", subplots=True, figsize=(40, 30))

In [None]:
count_per_template = create_count_table("template", filter_query="attempt == 1")
percentage_per_template = create_percentage_table(count_per_template)
percentage_per_template#.plot(kind="pie", subplots=True, figsize=(40, 30))

In [None]:
count_per_target_pl = create_count_table("target_pl")
percentage_per_target_pl = create_percentage_table(count_per_target_pl)
count_per_target_pl

### Per model: per Template


In [None]:
count_model_template = create_count_table(["model", "template"], filter_query="attempt == 1")
percentage_model_template = create_percentage_table(count_model_template)
count_model_template

In [None]:
percentage_model_template

### Per model: Per target language


In [None]:
combined_table[["model", "target_pl"] + pp_steps + pp_errors].groupby(["model", "target_pl"]).count()

count_model_target_pl = create_count_table(["model", "target_pl"])
percentage_model_target_pl = create_percentage_table(count_model_target_pl)
count_model_target_pl

In [None]:
percentage_model_target_pl


### Per template: per model 
This is the same information as Per model: per template


### Per attempt

In [None]:
count_per_attempt = create_count_table("attempt")
count_per_attempt

In [None]:
percentage_per_attempt = create_percentage_table(count_per_attempt)
percentage_per_attempt

In [None]:
create_percentage_table(create_count_table(["model", "attempt"]))

In [None]:
import os
import json


pp_reports_dir = config.postprocessing_reports_dir

report_files = list_json_files(pp_reports_dir)
print(report_files)

for report_f in report_files:
    # postprocessing_reports/engine_model_template_attempt/jsonfilename
    run_id = report_f.split(os.path.sep)[-2]
    run_info = run_id.split("_")
    if len(run_info) == 3:
        continue
    engine = run_info[0]
    model = run_info[1]
    template = "_".join(run_info[2:-1])
    attempt = int(run_info[-1])
    # directory/pp_report_dataset.json
    dataset = os.path.splitext(os.path.basename(report_f))[0].split("_")[-1]

    print(engine, model, template, attempt, dataset) 

    with open(report_f, 'r') as f:
        data = json.load(f)
    
    num_files = len(data)
    pp_steps_statistic = dict()
    pp_errors_statistic = dict()
    num_errors = 0
    total_steps = 0
    for file in data:
        num_errors += len(data[file]['pp_errors'])
        total_steps += len(data[file]['pp_steps'])
        for step in data[file]['pp_steps']:
            if step not in pp_steps_statistic.keys():
                pp_steps_statistic[step] = 1
            else:
                pp_steps_statistic[step] += 1

            # if step in ["NATURAL_TEXT", "NO_MARKDOWN", "CODE_HEURISTIC"]:
            #     print(step, file)
        
        for error in data[file]['pp_errors']:
            if error not in pp_errors_statistic.keys():
                pp_errors_statistic[error] = 1
            else:
                pp_errors_statistic[error] += 1
            # print(error, file)
            
    print("Total files:", num_files)
    print("Total errors: ", num_errors)
    print("Total steps:", total_steps)

    for error, count in pp_errors_statistic.items():
        print(error, count)

    for step, count in pp_steps_statistic.items():
        print(step, count)