# Convert a Subgroup Discovery Result Set from pandas-compatible .csv File to LaTeX-table-compatible .csv File

## Default Values for Papermill Parameters

In [1]:
PARAM_PATTERN_COLUMN = "pattern"
PARAM_RESULT_SET_PATH = "../outputs/sorted_result_set_groupby_sd_result_set_average_ranking_loss.csv"
PARAM_DATA_IN_PATH = "../../data"
PARAM_CSV_RESULT_FILENAME = "result_set_table.csv"
PARAM_LATEX_RESULT_FILENAME = "result_set_table.tex"
PARAM_COLUMN_NAME_MAP = {"interestingness": "Interestingness", "pattern": "Pattern", "average_ranking_loss": "ARL"}
PARAM_COLUMN_FORMAT = "rlr"
PARAM_LINE_SPACE = "5pt"

## Import and Set Parameters

In [2]:
from subroc.datasets.metadata import DatasetName
from subroc.datasets.reader import DatasetReader, DatasetStage
from subroc import pattern_to_latex
from subroc import util

import pandas as pd
import os

# fill environment variables into params
PARAM_RESULT_SET_PATH = util.prepend_experiment_output_path(PARAM_RESULT_SET_PATH)
PARAM_DATA_IN_PATH = util.prepend_experiment_output_path(PARAM_DATA_IN_PATH)

STAGE_OUTPUT_PATH = os.environ.get("STAGE_OUTPUT_PATH", "../outputs")

# Dataset
DATASET_READER = DatasetReader(PARAM_DATA_IN_PATH)

DATASET_NAME = DatasetName.OpenML_ADULT
DATASET_STAGE = DatasetStage.RAW

## Read the Data

In [3]:
# read data
data, dataset_meta = DATASET_READER.read_dataset(DATASET_NAME, DATASET_STAGE)

## Read the Result Set

In [None]:
result_set = pd.read_csv(f"{PARAM_RESULT_SET_PATH}")

## Make LaTeX Representation of Patterns in the Result Set

In [None]:
original_columns = result_set.columns.values.tolist()
latex_compatible_result_set = pd.DataFrame(columns=original_columns)


def format_number(n):
    if isinstance(n, float):
        return f"{n:.4f}"
    else:
        return str(n)


for i, result in enumerate(result_set.itertuples()):
    sel_conjunction = util.from_str_Conjunction(result.pattern)
    sel_conjunction = util.restore_categorical(sel_conjunction, data)
    result_pattern_latex = r"\makecell[l]{$" + pattern_to_latex.convert_Conjunction(sel_conjunction) + r"$}"
    num_newlines = result_pattern_latex.count(r"\\")
    updated_column = [r"\makecell[r]{" + format_number(result[col_i+1]) + r"\\\ "*num_newlines + r"}" if original_column != PARAM_PATTERN_COLUMN else result_pattern_latex for col_i, original_column in enumerate(original_columns)]
    updated_column[0] = r"\addlinespace[" + PARAM_LINE_SPACE + r"] " + updated_column[0]
    print(updated_column)
    latex_compatible_result_set.loc[i] = updated_column

latex_compatible_result_set.rename(columns=PARAM_COLUMN_NAME_MAP, inplace=True)

## Write the Converted Result Set to .csv

In [None]:
latex_compatible_result_set.to_csv(f"{STAGE_OUTPUT_PATH}/{PARAM_CSV_RESULT_FILENAME}", index=False)

## Write the Converted Result Set to .tex

In [None]:


def replace_text_underscores(s) -> str:
    if not isinstance(s, str):
        return s
    
    return s.replace("_", "\\_")


latex_compatible_result_set = latex_compatible_result_set.rename(columns=replace_text_underscores, errors='raise')
latex_compatible_result_set = latex_compatible_result_set.map(replace_text_underscores)
latex_compatible_result_set.to_latex(f"{STAGE_OUTPUT_PATH}/{PARAM_LATEX_RESULT_FILENAME}", float_format="%.4f", column_format=PARAM_COLUMN_FORMAT, index=False)