In [1]:
import os
import json
import re
import numpy as np
import pandas as pd
from typing import List, Tuple

from exp_spec_info import *

In [2]:
# Data dir paths
flattened_solve_group_dir = "C:\\Users\\dosre\\dev\\thesis-data\\experimental-transfer\\experimentation\\output_data"
extracted_data_path = "C:\\Users\\dosre\\dev\\thesis-data\\extracted_data.pkl"

##### Collect and Examine Data Population

In [3]:
# Iterate over every intended setup and find if corresponding data file exists
data_population = []
for setup in RAW_SETUPS:
    for matrix in SETUP_MATRIX_MAPPING[SETUP_TO_ID_MAPPING[setup]]:
        for solver in RAW_SOLVERS:
            for restart_param in RESTART_PARAMS:
                for experiment_iteration in range(N_EXPERIMENT_ITERATIONS):

                    data_path = os.path.join(
                        flattened_solve_group_dir,
                        f"{setup}_inner_iter_{restart_param}",
                        matrix,
                        str(experiment_iteration),
                        solver+".json"
                    )

                    data_population.append({
                        "setup": SETUP_TO_ID_MAPPING[setup],
                        "matrix": matrix,
                        "solver": SOLVER_TO_ID_MAPPING[solver],
                        "restart_param": restart_param,
                        "experiment_iter":experiment_iteration,
                        "data_path": data_path,
                        "populated": os.path.exists(data_path)
                    })

data_population = pd.DataFrame(
    data_population,
    columns=[
        "setup",
        "matrix",
        "solver",
        "restart_param",
        "experiment_iter",
        "data_path",
        "populated"
    ]
)

##### Analyze Missing Data

In [4]:
missing_data_population = data_population[~data_population["populated"]]
print(f"Percent of expected data collected: {1-len(missing_data_population)/len(data_population)}")
display(missing_data_population)

Percent of expected data collected: 0.601233552631579


Unnamed: 0,setup,matrix,solver,restart_param,experiment_iter,data_path,populated
0,ilu0,af23560,FP FP16,10,0,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
1,ilu0,af23560,FP FP16,10,1,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
2,ilu0,af23560,FP FP16,10,2,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
3,ilu0,af23560,FP FP16,20,0,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
4,ilu0,af23560,FP FP16,20,1,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
...,...,...,...,...,...,...,...
16555,ilutp1em4,Zhao2,PC HSD S2T,150,1,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
16556,ilutp1em4,Zhao2,PC HSD S2T,150,2,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
16557,ilutp1em4,Zhao2,PC HSD S2T,200,0,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
16558,ilutp1em4,Zhao2,PC HSD S2T,200,1,C:\Users\dosre\dev\thesis-data\experimental-tr...,False


##### Filter Populated Data and Extract

In [5]:
def read_phase_change(json_data, key) -> int:
    if key in json_data:
        return json_data[key]
    else:
        return -1

def translate_phase_change_outer_to_inner(outer_iter, inner_iters) -> int:
    if outer_iter == -1:
        return -1
    else:
        return int(np.sum(inner_iters[:(outer_iter-1)])+1)

def extract_lin_solve_data(row):

     lin_solve_data = json.loads(
          re.sub(
              r"(-nan|nan|-inf|inf)", "NaN", open(row["data_path"], "r").read()
          )
     )

     additional_row_data = {
     "initiated":lin_solve_data["initiated"] == "true",
     "converged":lin_solve_data["converged"] == "true",
     "terminated":lin_solve_data["terminated"] == "true",
     "outer_iters":lin_solve_data["outer_iterations"],
     "inner_iters":int(np.sum(lin_solve_data["inner_iterations"])),
     "elapsed_time_ms":lin_solve_data["elapsed_time_ms"]
     }

     additional_row_data["HS_trans_outer_iter"] = read_phase_change(
          lin_solve_data,
          "hlf_sgl_cascade_change"
     )
     additional_row_data["HS_trans_inner_iter"] = translate_phase_change_outer_to_inner(
          additional_row_data["HS_trans_outer_iter"],
          lin_solve_data["inner_iterations"]
     )
     additional_row_data["SD_trans_outer_iter"] = read_phase_change(
          lin_solve_data,
          "sgl_dbl_cascade_change"
     )
     additional_row_data["SD_trans_inner_iter"] = translate_phase_change_outer_to_inner(
          additional_row_data["SD_trans_outer_iter"],
          lin_solve_data["inner_iterations"]
     )

     outer_res_norm_history = lin_solve_data["outer_res_norm_history"]
     inner_res_norm_history = lin_solve_data["inner_res_norm_history"]
     init_res_norm = outer_res_norm_history[0]

     outer_relres = np.array(outer_res_norm_history)/init_res_norm
     inner_relres = []
     for i in range(len(inner_res_norm_history)):
          if i == 0:
               inner_relres += inner_res_norm_history[i]
          else:
               inner_relres += inner_res_norm_history[i][1:]
     inner_relres = np.array(inner_relres)/init_res_norm

     # Establish floor of lowest relres as REL_RES_TOL
     outer_relres[outer_relres <= REL_RES_TOL] = REL_RES_TOL
     inner_relres[inner_relres <= REL_RES_TOL] = REL_RES_TOL

     additional_row_data["outer_relres"] = outer_relres
     additional_row_data["inner_relres"] = inner_relres

     assert additional_row_data["inner_iters"]+1 == additional_row_data["inner_relres"].size
     assert additional_row_data["outer_iters"]+1 == additional_row_data["outer_relres"].size

     return additional_row_data

In [6]:
data = data_population[data_population["populated"]]
data = data.join(data.apply(extract_lin_solve_data, axis=1, result_type="expand"))
data = data.drop("data_path", axis=1)
display(data)

Unnamed: 0,setup,matrix,solver,restart_param,experiment_iter,populated,initiated,converged,terminated,outer_iters,inner_iters,elapsed_time_ms,HS_trans_outer_iter,HS_trans_inner_iter,SD_trans_outer_iter,SD_trans_inner_iter,outer_relres,inner_relres
4080,ilutp1em2,af23560,FP FP16,10,0,True,True,False,True,1500,15000,348242,-1,-1,-1,-1,"[1.0, 0.12240884161261857, 0.03422273561924297...","[1.0, 1.157900925472873, 0.7205927713142924, 0..."
4081,ilutp1em2,af23560,FP FP16,10,1,True,True,False,True,1500,15000,348189,-1,-1,-1,-1,"[1.0, 0.12200900834319824, 0.03265599746967163...","[1.0, 1.1281593633462907, 0.6993294403121761, ..."
4082,ilutp1em2,af23560,FP FP16,10,2,True,True,False,True,1500,15000,348163,-1,-1,-1,-1,"[1.0, 0.1211365949289795, 0.031721622859388454...","[1.0, 1.0871766587196103, 0.7007430796515004, ..."
4104,ilutp1em2,af23560,FP FP32,10,0,True,True,True,True,14,140,3234,-1,-1,-1,-1,"[1.0, 0.11734933791282731, 0.03215894871299636...","[1.0, 1.1575543836183488, 0.7210134584324849, ..."
4105,ilutp1em2,af23560,FP FP32,10,1,True,True,True,True,14,140,3246,-1,-1,-1,-1,"[1.0, 0.11869397039884688, 0.03146533932009303...","[1.0, 1.1296459986651255, 0.6990106966195974, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36475,unprecond,Zhao2,PC HSD S2T,150,1,True,True,True,True,27,3901,7718,16,2102,17,2252,"[1.0, 0.0035827563693714664, 0.001680961968812...","[1.0, 0.9798814359645774, 0.4454125543127461, ..."
36476,unprecond,Zhao2,PC HSD S2T,150,2,True,True,True,True,27,3901,7655,16,2102,17,2252,"[1.0, 0.0035542551630105045, 0.001682663103450...","[1.0, 0.9808629620107184, 0.4513029714477763, ..."
36477,unprecond,Zhao2,PC HSD S2T,200,0,True,True,True,True,20,4000,8345,13,2401,14,2601,"[1.0, 0.0033437792072247615, 0.000694396526344...","[1.0, 0.9766142994493332, 0.44873103183878926,..."
36478,unprecond,Zhao2,PC HSD S2T,200,1,True,True,True,True,19,3800,7930,12,2201,13,2401,"[1.0, 0.005431643250731864, 0.0008261741026476...","[1.0, 0.9779367541164282, 0.4471612329439548, ..."


In [7]:
data.to_pickle(extracted_data_path)