In [None]:
experiment_ids = [
	256, # ETR
	231, # Random Forest
	215, # Elastic Net,
	214, # Ridge
	210, # LASSO
	257, # ANN
	258, # CNN
	144, # NGB
	140, # PLS
	136, # XGBoost
	134, # SVR
	45, # GBR
]

In [None]:
models = {
	"Ridge": "Ridge",
	"LASSO": "\\gls{lasso}",
	"ElasticNet": "\\gls{enet}",
	"PLS": "\\gls{pls}",
	"SVR": "\\gls{svr}",
	"RandomForest": "\\gls{rf}",
	"NGB": "\\gls{ngboost}",
	"GBR": "\\gls{gbr}",
	"XGB": "\\gls{xgboost}",
	"ExtraTrees": "\\gls{etr}",
	"ANN": "\\gls{ann}",
	"CNN": "\\gls{cnn}",
}

In [None]:
import mlflow
import numpy as np

from pathlib import Path
from lib.reproduction import major_oxides

In [None]:
client = mlflow.tracking.MlflowClient()

data = {}

for experiment_id in experiment_ids:
	data[experiment_id] = client.search_runs(experiment_id)

data[experiment_ids[0]]

In [None]:
results = {}
model_means = {}

# iterate over the data dictionary and print the runs
for experiment_id, runs in data.items():
    for run in runs:
        # check if the target parameter is present
        if "target" not in run.data.params:
            continue

        model_name = run.data.tags["mlflow.runName"].split("_")[0]
        latex_name = model_name

        if model_name in models:
            latex_name = models[latex_name]

        target = run.data.params["target"]
        rmse = run.data.metrics["rmse"]
        rmse_cv = run.data.metrics["rmse_cv"]
        std_dev = run.data.metrics["std_dev"]
        std_dev_cv = run.data.metrics["std_dev_cv"]

        print(f"{model_name} - {target}, RMSE: {rmse}, RMSE CV: {rmse_cv}, STD DEV: {std_dev}, STD DEV CV: {std_dev_cv}")

        if model_name not in results:
            results[model_name] = {}
            model_means[model_name] = {"rmse": [], "rmse_cv": [], "std_dev": [], "std_dev_cv": []}

        results[model_name][target] = {
            "latex_name": latex_name,
            "rmse": rmse,
            "rmse_cv": rmse_cv,
            "std_dev": std_dev,
            "std_dev_cv": std_dev_cv,
        }

        # Collect metrics for calculating the mean
        model_means[model_name]["rmse"].append(rmse)
        model_means[model_name]["rmse_cv"].append(rmse_cv)
        model_means[model_name]["std_dev"].append(std_dev)
        model_means[model_name]["std_dev_cv"].append(std_dev_cv)

# Calculate means
for model_name, metrics in model_means.items():
    model_means[model_name] = {
        "rmse": np.mean(metrics["rmse"]),
        "rmse_cv": np.mean(metrics["rmse_cv"]),
        "std_dev": np.mean(metrics["std_dev"]),
        "std_dev_cv": np.mean(metrics["std_dev_cv"]),
    }

In [None]:
latex_table = "\\begin{table*}[]\n"
latex_table += "\\centering\n"
latex_table += "\\resizebox{1\\textwidth}{!}{%\n"
latex_table += "\\begin{tabular}{l|cccc|cccc|cccc}\n"

model_keys = list(models.keys())

for i in range(0, len(model_keys), 3):
    chunk = model_keys[i:i+3]

    # Header row
    header_row = "Model"
    for model in chunk:
        header_row += f" & \\multicolumn{{4}}{{c}}{{{models[model]}}}"

    latex_table += header_row + " \\\\\n"

    # Metric row
    metric_row = "Metric"
    for _ in chunk:
        metric_row += " & \\multicolumn{1}{c}{RMSEP} & \\multicolumn{1}{c}{RMSECV} & \\multicolumn{1}{c}{Std. dev.} & \\multicolumn{1}{c}{Std. dev. CV}"

    latex_table += metric_row + " \\\\\n"
    latex_table += "\\hline\n"

    # Data rows
    for target in major_oxides:
        row = f"$\\ce{{{target}}}$"

        for model in chunk:
            if model in results and target in results[model]:
                metrics = results[model][target]
                row += f" & {metrics['rmse']:.4f} & {metrics['rmse_cv']:.4f} & {metrics['std_dev']:.4f} & {metrics['std_dev_cv']:.4f}"
            else:
                print(f"Missing data for {model} - {target}")
                row += " & - & - & - & -"

        latex_table += row + " \\\\\n"

    latex_table += "\\hline\n"

    # Mean rows
    mean_row = "Mean"
    for model in chunk:
        if model in model_means:
            mean_metrics = model_means[model]
            mean_row += f" & {mean_metrics['rmse']:.4f} & {mean_metrics['rmse_cv']:.4f} & {mean_metrics['std_dev']:.4f} & {mean_metrics['std_dev_cv']:.4f}"
        else:
            mean_row += " & - & - & - & -"

    latex_table += mean_row + " \\\\\n"
    latex_table += "\\hline\n"

latex_table += "\\end{tabular}%\n"
latex_table += "}\n"
latex_table += "\\caption{Initial results for the different models and metrics.}\n"
latex_table += "\\end{table*}\n"

# Write the LaTeX table string to a file
path = Path("./../report_thesis/src/sections/results/init_results_table.tex")

with open(path, "w") as file:
    file.write(latex_table)