In [9]:
import pandas as pd
import json
import os

In [10]:
frames = []
for root, dirs, files in os.walk("../../results/final_dataset_train"):
   for name in files:
      if name.endswith(".json"):
          frame = pd.read_json(os.path.join(root, name), orient="record", lines=True)
          frames.append(frame)
results = pd.concat(frames)
results.columns

Index(['task_id', 'input_src_path', 'max_vocab_size', 'problem_type',
       'window_size', 'step_size', 'encode_type', 'test_split_percentage',
       'train_dev_split_percentage', 'oversampling_enabled',
       'ratio_after_oversampling', 'undersampling_enabled',
       'ratio_after_undersampling', 'embedding_vecor_length', 'epochs',
       'batch_size', 'num_lstm_cells', 'dropout_emb_lstm',
       'dropout_lstm_dense', 'recall', 'precision', 'roc', 'f1',
       'train_recall', 'train_precision', 'train_roc', 'train_f1',
       'train_dev_recall', 'train_dev_precision', 'train_dev_roc',
       'train_dev_f1', 'cm', 'cm_normalized', 'n_trees_in_forest',
       'max_features', 'n_estimators', 'learning_rate', 'subsample',
       'class_weight'],
      dtype='object')

In [11]:
pd.set_option("display.max.columns", None)
pd.set_option("display.precision", 4)


In [12]:
#important fix, check later if all experiments rerun
results = results[results["max_vocab_size"] == 100000]
results = results[results["input_src_path"] == "final_dataset"]

In [13]:
merge_columns = ['window_size', 'step_size', 'encode_type','oversampling_enabled', 'ratio_after_oversampling', 'undersampling_enabled', 'ratio_after_undersampling', 'n_trees_in_forest', 'max_features', 'class_weight']

random_forest_RN = results[(results["problem_type"] == "RETURN_NULL")  & (results["n_trees_in_forest"].notnull())]
random_forest_RN.columns = random_forest_RN.columns.map(lambda a: a + "_RF" if a not in merge_columns else a)

random_forest_CC = results[(results["problem_type"] == "RETURN_NULL")  & (results["n_trees_in_forest"].notnull())]
random_forest_CC.columns = random_forest_CC.columns.map(lambda a: a + "_CC" if a not in merge_columns else a)

random_forest_CCS = results[(results["problem_type"] == "RETURN_NULL")  & (results["n_trees_in_forest"].notnull())]
random_forest_CCS.columns = random_forest_CCS.columns.map(lambda a: a + "_CCS" if a not in merge_columns else a)


merged = random_forest_RN.merge(random_forest_CC, on=merge_columns, suffixes=("_LEFT1", "_RIGHT1")).merge(random_forest_CCS, on=merge_columns, suffixes=("_LEFT2", "_RIGHT2"))

merged.columns

Index(['task_id_RF', 'input_src_path_RF', 'max_vocab_size_RF',
       'problem_type_RF', 'window_size', 'step_size', 'encode_type',
       'test_split_percentage_RF', 'train_dev_split_percentage_RF',
       'oversampling_enabled', 'ratio_after_oversampling',
       'undersampling_enabled', 'ratio_after_undersampling',
       'embedding_vecor_length_RF', 'epochs_RF', 'batch_size_RF',
       'num_lstm_cells_RF', 'dropout_emb_lstm_RF', 'dropout_lstm_dense_RF',
       'recall_RF', 'precision_RF', 'roc_RF', 'f1_RF', 'train_recall_RF',
       'train_precision_RF', 'train_roc_RF', 'train_f1_RF',
       'train_dev_recall_RF', 'train_dev_precision_RF', 'train_dev_roc_RF',
       'train_dev_f1_RF', 'cm_RF', 'cm_normalized_RF', 'n_trees_in_forest',
       'max_features', 'n_estimators_RF', 'learning_rate_RF', 'subsample_RF',
       'class_weight', 'task_id_CC', 'input_src_path_CC', 'max_vocab_size_CC',
       'problem_type_CC', 'test_split_percentage_CC',
       'train_dev_split_percentage_CC', '

In [17]:
import os
import jinja2
from jinja2 import Template
latex_jinja_env = jinja2.Environment(
    block_start_string='\BLOCK{',
    block_end_string='}',
    variable_start_string='\VAR{',
    variable_end_string='}',
    comment_start_string='\#{',
    comment_end_string='}',
    line_statement_prefix='%%',
    line_comment_prefix='%#',
    trim_blocks=True,
    autoescape=False)

In [21]:
with open("table_train_all.tex.jinja2", "r") as f:
    t = latex_jinja_env.from_string(f.read())
    rendered = t.render(data=merged)
    print(rendered)

% Please add the following required packages to your document preamble:
% \usepackage{booktabs}
\begin{table}[]
\begin{tabular}{@{}llllllllllllll@{}}
\toprule
          & \multicolumn{4}{c}{Parameter}                                                                                                                                                                                                                                   & \multicolumn{3}{c}{RN}                                                                                                                                                                         & \multicolumn{3}{c}{CCS}                                                                                                                                                                           & \multicolumn{3}{c}{CC}                                                                                                                                                                