In [1]:
import os
import json
import re
import numpy as np
import pandas as pd
from typing import List, Tuple

from exp_spec_info import *

In [2]:
# Data dir paths
flattened_solve_group_dir = "C:\\Users\\dosre\\dev\\thesis-data\\experimental-transfer\\experimentation\\output_data"
extracted_data_path = "C:\\Users\\dosre\\dev\\thesis-data\\extracted_data.pkl"

##### Collect and Examine Data Population

In [3]:
# Iterate over every intended setup and find if corresponding data file exists
data_population = []
for setup in RAW_SETUPS:
    for matrix in SETUP_MATRIX_MAPPING[SETUP_TO_ID_MAPPING[setup]]:
        for solver in RAW_SOLVERS:
            for restart_param in RESTART_PARAMS:
                for experiment_iteration in range(N_EXPERIMENT_ITERATIONS):

                    data_path = os.path.join(
                        flattened_solve_group_dir,
                        f"{setup}_inner_iter_{restart_param}",
                        matrix,
                        str(experiment_iteration),
                        solver+".json"
                    )

                    data_population.append({
                        "setup": SETUP_TO_ID_MAPPING[setup],
                        "matrix": matrix,
                        "solver": SOLVER_TO_ID_MAPPING[solver],
                        "restart_param": restart_param,
                        "experiment_iter":experiment_iteration,
                        "data_path": data_path,
                        "populated": os.path.exists(data_path)
                    })

data_population = pd.DataFrame(
    data_population,
    columns=[
        "setup",
        "matrix",
        "solver",
        "restart_param",
        "experiment_iter",
        "data_path",
        "populated"
    ]
)

##### Analyze Missing Data

In [4]:
missing_data_population = data_population[~data_population["populated"]]
print(f"Percent of expected data collected: {1-len(missing_data_population)/len(data_population)}")
display(missing_data_population)

Percent of expected data collected: 0.22387609649122808


Unnamed: 0,setup,matrix,solver,restart_param,experiment_iter,data_path,populated
0,ilu0,af23560,FP FP16,10,0,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
1,ilu0,af23560,FP FP16,10,1,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
2,ilu0,af23560,FP FP16,10,2,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
3,ilu0,af23560,FP FP16,20,0,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
4,ilu0,af23560,FP FP16,20,1,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
...,...,...,...,...,...,...,...
36469,unprecond,Zhao2,PC HSD S2T,50,1,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
36470,unprecond,Zhao2,PC HSD S2T,50,2,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
36477,unprecond,Zhao2,PC HSD S2T,200,0,C:\Users\dosre\dev\thesis-data\experimental-tr...,False
36478,unprecond,Zhao2,PC HSD S2T,200,1,C:\Users\dosre\dev\thesis-data\experimental-tr...,False


##### Filter Populated Data and Extract

In [5]:
def read_phase_change(json_data, key) -> int:
    if key in json_data:
        return json_data[key]
    else:
        return -1

def translate_phase_change_outer_to_inner(outer_iter, inner_iters) -> int:
    if outer_iter == -1:
        return -1
    else:
        return int(np.sum(inner_iters[:(outer_iter-1)])+1)

def extract_lin_solve_data(row):

     lin_solve_data = json.loads(
          re.sub(
              r"(-nan|nan|-inf|inf)", "NaN", open(row["data_path"], "r").read()
          )
     )

     additional_row_data = {
     "initiated":lin_solve_data["initiated"] == "true",
     "converged":lin_solve_data["converged"] == "true",
     "terminated":lin_solve_data["terminated"] == "true",
     "outer_iters":lin_solve_data["outer_iterations"],
     "inner_iters":int(np.sum(lin_solve_data["inner_iterations"])),
     "elapsed_time_ms":lin_solve_data["elapsed_time_ms"]
     }

     additional_row_data["HS_trans_outer_iter"] = read_phase_change(
          lin_solve_data,
          "hlf_sgl_cascade_change"
     )
     additional_row_data["HS_trans_inner_iter"] = translate_phase_change_outer_to_inner(
          additional_row_data["HS_trans_outer_iter"],
          lin_solve_data["inner_iterations"]
     )
     additional_row_data["SD_trans_outer_iter"] = read_phase_change(
          lin_solve_data,
          "sgl_dbl_cascade_change"
     )
     additional_row_data["SD_trans_inner_iter"] = translate_phase_change_outer_to_inner(
          additional_row_data["SD_trans_outer_iter"],
          lin_solve_data["inner_iterations"]
     )

     outer_res_norm_history = lin_solve_data["outer_res_norm_history"]
     inner_res_norm_history = lin_solve_data["inner_res_norm_history"]
     init_res_norm = outer_res_norm_history[0]

     outer_relres = np.array(outer_res_norm_history)/init_res_norm
     inner_relres = []
     for i in range(len(inner_res_norm_history)):
          if i == 0:
               inner_relres += inner_res_norm_history[i]
          else:
               inner_relres += inner_res_norm_history[i][1:]
     inner_relres = np.array(inner_relres)/init_res_norm

     additional_row_data["outer_relres"] = outer_relres
     additional_row_data["inner_relres"] = inner_relres

     assert additional_row_data["inner_iters"]+1 == additional_row_data["inner_relres"].size
     assert additional_row_data["outer_iters"]+1 == additional_row_data["outer_relres"].size

     return additional_row_data

In [6]:
data = data_population[data_population["populated"]]
data = data.join(data.apply(extract_lin_solve_data, axis=1, result_type="expand"))
data = data.drop("data_path", axis=1)
display(data)

Unnamed: 0,setup,matrix,solver,restart_param,experiment_iter,populated,initiated,converged,terminated,outer_iters,inner_iters,elapsed_time_ms,HS_trans_outer_iter,HS_trans_inner_iter,SD_trans_outer_iter,SD_trans_inner_iter,outer_relres,inner_relres
16560,unpreconddense,af23560,FP FP16,10,0,True,True,False,True,1500,15000,162396,-1,-1,-1,-1,"[1.0, 0.26191688735114216, 0.1433399679231576,...","[1.0, 0.7654300510340972, 0.6727755341655796, ..."
16561,unpreconddense,af23560,FP FP16,10,1,True,True,False,True,1500,15000,162822,-1,-1,-1,-1,"[1.0, 0.2685580552433235, 0.1453978649226777, ...","[1.0, 0.757513762983052, 0.6779277393820136, 0..."
16562,unpreconddense,af23560,FP FP16,10,2,True,True,False,True,1500,15000,163339,-1,-1,-1,-1,"[1.0, 0.27748500207574484, 0.14432843943042656...","[1.0, 0.7524090915924405, 0.6675266546952295, ..."
16584,unpreconddense,af23560,FP FP32,10,0,True,True,False,True,1500,15000,184934,-1,-1,-1,-1,"[1.0, 0.2619283153801638, 0.1433397260418014, ...","[1.0, 0.7654423525881157, 0.6728151381900123, ..."
16585,unpreconddense,af23560,FP FP32,10,1,True,True,False,True,1500,15000,184287,-1,-1,-1,-1,"[1.0, 0.26859348089169455, 0.1454388179350554,...","[1.0, 0.7575270843166467, 0.6779693673676339, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36472,unprecond,Zhao2,PC HSD S2T,100,1,True,True,True,True,39,3801,6639,20,1802,21,1902,"[1.0, 0.006864433302234795, 0.0028329247375971...","[1.0, 0.9796197736458981, 0.44424981435155075,..."
36473,unprecond,Zhao2,PC HSD S2T,100,2,True,True,True,True,38,3701,6499,21,1902,22,2002,"[1.0, 0.006342051508492427, 0.0026946036825350...","[1.0, 0.9768609991788817, 0.44734658205822686,..."
36474,unprecond,Zhao2,PC HSD S2T,150,0,True,True,True,True,27,3901,7705,16,2102,17,2252,"[1.0, 0.003663027921936122, 0.0018994116795691...","[1.0, 0.9791277363805698, 0.4488118616729598, ..."
36475,unprecond,Zhao2,PC HSD S2T,150,1,True,True,True,True,27,3901,7718,16,2102,17,2252,"[1.0, 0.0035827563693714664, 0.001680961968812...","[1.0, 0.9798814359645774, 0.4454125543127461, ..."


In [7]:
data.to_pickle(extracted_data_path)