In [17]:
experiment_ids = [
	256, # ETR
	231, # Random Forest
	215, # Elastic Net,
	214, # Ridge
	210, # LASSO
	257, # ANN
	258, # CNN
	144, # NGB
	140, # PLS
	136, # XGBoost
	134, # SVR
	45, # GBR
]

In [18]:
models = {
	"Ridge": "Ridge",
	"LASSO": "\\gls{lasso}",
	"ElasticNet": "\\gls{enet}",
	"PLS": "\\gls{pls}",
	"SVR": "\\gls{svr}",
	"RandomForest": "\\gls{rf}",
	"NGB": "\\gls{ngboost}",
	"GBR": "\\gls{gbr}",
	"XGB": "\\gls{xgboost}",
	"ExtraTrees": "\\gls{etr}",
	"ANN": "\\gls{ann}",
	"CNN": "\\gls{cnn}",
}

In [19]:
import mlflow

from pathlib import Path
from lib.reproduction import major_oxides

In [20]:
client = mlflow.tracking.MlflowClient()

data = {}

for experiment_id in experiment_ids:
	data[experiment_id] = client.search_runs(experiment_id)

data[experiment_ids[0]]

[<Run: data=<RunData: metrics={'rmse': 0.5906100342615692,
  'rmse_cv': 0.6424577351239987,
  'rmse_cv_1': 0.9116611135556014,
  'rmse_cv_2': 0.5921384021032136,
  'rmse_cv_3': 0.4801717587923786,
  'rmse_cv_4': 0.5858596660448013,
  'std_dev': 0.5402558765049962,
  'std_dev_cv': 0.6358257805769801,
  'std_dev_cv_1': 0.8899857936885336,
  'std_dev_cv_2': 0.5926943020924624,
  'std_dev_cv_3': 0.4771710417246671,
  'std_dev_cv_4': 0.5834519848022575}, params={'norm': '3', 'target': 'K2O'}, tags={'mlflow.runName': 'ExtraTrees_K2O',
  'mlflow.source.name': '/home/patrick/git/thesis-chemcam/baseline/venv/lib/python3.12/site-packages/ipykernel_launcher.py',
  'mlflow.source.type': 'LOCAL',
  'mlflow.user': 'patrick'}>, info=<RunInfo: artifact_uri='mlflow-artifacts:/256/61e0900a907044b19e47a1c5b8bbd7fa/artifacts', end_time=1717674250681, experiment_id='256', lifecycle_stage='active', run_id='61e0900a907044b19e47a1c5b8bbd7fa', run_name='ExtraTrees_K2O', run_uuid='61e0900a907044b19e47a1c5b8bbd7

In [21]:
results = {}

# iterate over the data dictionary and print the runs
for experiment_id, runs in data.items():
    for run in runs:
        # check if the target parameter is present
        if "target" not in run.data.params:
            continue

        model_name = run.data.tags["mlflow.runName"].split("_")[0]
        latex_name = model_name

        if model_name in models:
            latex_name = models[latex_name]

        target = run.data.params["target"]
        rmse = run.data.metrics["rmse"]
        rmse_cv = run.data.metrics["rmse_cv"]
        std_dev = run.data.metrics["std_dev"]
        std_dev_cv = run.data.metrics["std_dev_cv"]

        print(f"{model_name} - {target}, RMSE: {rmse}, RMSE CV: {rmse_cv}, STD DEV: {std_dev}, STD DEV CV: {std_dev_cv}")

        if model_name not in results:
            results[model_name] = {}

        results[model_name][target] = {
            "latex_name": latex_name,
            "rmse": rmse,
            "rmse_cv": rmse_cv,
            "std_dev": std_dev,
            "std_dev_cv": std_dev_cv,
        }

ExtraTrees - K2O, RMSE: 0.5906100342615692, RMSE CV: 0.6424577351239987, STD DEV: 0.5402558765049962, STD DEV CV: 0.6358257805769801
ExtraTrees - Na2O, RMSE: 0.41130040120423883, RMSE CV: 1.0308820357981807, STD DEV: 0.4086259152643041, STD DEV CV: 1.0280842938230461
ExtraTrees - CaO, RMSE: 1.8368145740619075, RMSE CV: 1.5145670466352126, STD DEV: 1.8306876508727035, STD DEV CV: 1.5096592993972884
ExtraTrees - MgO, RMSE: 0.9059522847184235, RMSE CV: 1.755129260089178, STD DEV: 0.8954914393749801, STD DEV CV: 1.737829566205682
ExtraTrees - FeOT, RMSE: 2.143825234586502, RMSE CV: 3.299180678511288, STD DEV: 2.125893408481197, STD DEV CV: 3.2573773383417346
ExtraTrees - Al2O3, RMSE: 1.845407360318083, RMSE CV: 2.368166798412971, STD DEV: 1.8466817710335772, STD DEV CV: 2.3589079736900422
ExtraTrees - TiO2, RMSE: 0.3296626686499523, RMSE CV: 0.4391687192787556, STD DEV: 0.3207845505628321, STD DEV CV: 0.43815548539082017
ExtraTrees - SiO2, RMSE: 3.9945776000850604, RMSE CV: 5.2304305738697

In [22]:
latex_table = "\\begin{table*}[]\n"
latex_table += "\\centering\n"
latex_table += "\\resizebox{1\\textwidth}{!}{%\n"
latex_table += "\\begin{tabular}{l|cccc|cccc|cccc}\n"

model_keys = list(models.keys())

for i in range(0, len(model_keys), 3):
    chunk = model_keys[i:i+3]

    # Header row
    header_row = "Model"
    for model in chunk:
        header_row += f" & \\multicolumn{{4}}{{c}}{{{models[model]}}}"

    latex_table += header_row + " \\\\\n"

    # Metric row
    metric_row = "Metric"
    for _ in chunk:
        metric_row += " & \\multicolumn{1}{c}{RMSEP} & \\multicolumn{1}{c}{RMSECV} & \\multicolumn{1}{c}{Std. dev.} & \\multicolumn{1}{c}{Std. dev. CV}"

    latex_table += metric_row + " \\\\\n"
    latex_table += "\\hline\n"

    # Data rows
    for target in major_oxides:
        row = f"$\\ce{{{target}}}$"

        for model in chunk:
            if model in results and target in results[model]:
                metrics = results[model][target]
                row += f" & {metrics['rmse']:.4f} & {metrics['rmse_cv']:.4f} & {metrics['std_dev']:.4f} & {metrics['std_dev_cv']:.4f}"
            else:
                print(f"Missing data for {model} - {target}")
                row += " & - & - & - & -"

        latex_table += row + " \\\\\n"

    latex_table += "\\hline\n"

latex_table += "\\end{tabular}%\n"
latex_table += "}\n"
latex_table += "\\caption{Initial results for the different models and metrics.}\n"
latex_table += "\\end{table*}\n"

# Write the LaTeX table string to a file
path = Path("./../report_thesis/src/sections/results/init_results_table.tex")

with open(path, "w") as file:
    file.write(latex_table)

Missing data for CNN - MgO
Missing data for CNN - CaO
Missing data for ANN - Na2O
Missing data for CNN - Na2O
Missing data for ANN - K2O
Missing data for CNN - K2O
