In [None]:
import os
import json
import re
import numpy as np
import pandas as pd
from typing import List, Tuple

from exp_spec_info import *

In [None]:
# Data dir paths
flattened_solve_group_dir = "C:\\Users\\dosre\\dev\\thesis-data\\experimental-transfer\\experimentation\\output_data"
extracted_data_path = "C:\\Users\\dosre\\dev\\thesis-data\\extracted_data.pkl"

# Number of iterations for each convergence experiment
n_iterations = 3

##### Collect and Examine Data Population

In [None]:
# Iterate over every intended setup and find if corresponding data file exists
data_population = []
for setup in RAW_SETUPS:
    for matrix in SETUP_MATRIX_MAPPING[SETUP_TO_ID_MAPPING[setup]]:
        for solver in RAW_SOLVERS:
            for inner_iter in INNER_ITERS:
                for experiment_iteration in range(n_iterations):

                    data_path = os.path.join(
                        flattened_solve_group_dir,
                        f"{setup}_inner_iter_{inner_iter}",
                        matrix,
                        str(experiment_iteration),
                        solver+".json"
                    )

                    data_population.append({
                        "setup": SETUP_TO_ID_MAPPING[setup],
                        "matrix": matrix,
                        "solver": SOLVER_TO_ID_MAPPING[solver],
                        "inner_iter": inner_iter,
                        "experiment_iter":experiment_iteration,
                        "data_path": data_path,
                        "populated": os.path.exists(data_path)
                    })

data_population = pd.DataFrame(
    data_population,
    columns=[
        "setup",
        "matrix",
        "solver",
        "inner_iter",
        "experiment_iter",
        "data_path",
        "populated"
    ]
)

##### Analyze Missing Data

In [None]:
missing_data_population = data_population[~data_population["populated"]]
print(f"Percent of expected data collected: {1-len(missing_data_population)/len(data_population)}")
display(missing_data_population)

##### Filter Populated Data and Extract

In [None]:
def read_phase_change(json_data, key) -> int:
    if key in json_data:
        return json_data[key]
    else:
        return -1

def translate_phase_change_outer_to_inner(outer_iter, inner_iters) -> int:
    if outer_iter == -1:
        return -1
    else:
        return int(np.sum(inner_iters[:(outer_iter-1)])+1)

def extract_lin_solve_data(row):

     lin_solve_data = json.loads(
          re.sub(
              r"(-nan|nan|-inf|inf)", "NaN", open(row["data_path"], "r").read()
          )
     )

     additional_row_data = {
     "initiated":lin_solve_data["initiated"] == "true",
     "converged":lin_solve_data["converged"] == "true",
     "terminated":lin_solve_data["terminated"] == "true",
     "outer_iters":lin_solve_data["outer_iterations"],
     "inner_iters":int(np.sum(lin_solve_data["inner_iterations"])),
     "elapsed_time_ms":lin_solve_data["elapsed_time_ms"]
     }

     additional_row_data["HS_trans_outer_iter"] = read_phase_change(
          lin_solve_data,
          "hlf_sgl_cascade_change"
     )
     additional_row_data["HS_trans_inner_iter"] = translate_phase_change_outer_to_inner(
          additional_row_data["HS_trans_outer_iter"],
          lin_solve_data["inner_iterations"]
     )
     additional_row_data["SD_trans_outer_iter"] = read_phase_change(
          lin_solve_data,
          "sgl_dbl_cascade_change"
     )
     additional_row_data["SD_trans_inner_iter"] = translate_phase_change_outer_to_inner(
          additional_row_data["SD_trans_outer_iter"],
          lin_solve_data["inner_iterations"]
     )

     outer_res_norm_history = lin_solve_data["outer_res_norm_history"]
     inner_res_norm_history = lin_solve_data["inner_res_norm_history"]
     init_res_norm = outer_res_norm_history[0]

     outer_relres = np.array(outer_res_norm_history)/init_res_norm
     inner_relres = []
     for i in range(len(inner_res_norm_history)):
          if i == 0:
               inner_relres += inner_res_norm_history[i]
          else:
               inner_relres += inner_res_norm_history[i][1:]
     inner_relres = np.array(inner_relres)/init_res_norm

     additional_row_data["outer_relres"] = outer_relres
     additional_row_data["inner_relres"] = inner_relres

     assert additional_row_data["inner_iters"]+1 == additional_row_data["inner_relres"].size
     assert additional_row_data["outer_iters"]+1 == additional_row_data["outer_relres"].size

     return additional_row_data

In [None]:
data = data_population[data_population["populated"]]
data = data.join(data.apply(extract_lin_solve_data, axis=1, result_type="expand"))
data = data.drop("data_path", axis=1)
display(data)

In [None]:
data.to_pickle(extracted_data_path)