# Add plots for my implementation of adapt error model vs the actual error model vs the partials in order to confirm that the partials are informative, they just need to be combined with an error model; already working with the residual-based analysis

In [2]:
import warnings
warnings.simplefilter("ignore", category=DeprecationWarning)

import numpy as np
import pandas as pd
import plotly.express as px
import itertools
import subprocess
import os
import shutil
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from SALib.sample import latin

# need to use /usr/tce/bin/git binary because the default git version grabbed by this subprocess is too early for the --show-superproject-working-tree flag
REPO_ROOT = subprocess.check_output("/usr/tce/bin/git rev-parse --show-superproject-working-tree --show-toplevel | head -1", shell=True).strip().decode()

In [3]:
N_INPUT_FEATURES = 1
N_KERNELS = 9
KERNEL_NAMES = [
    "x_wxpy",
    "p_wxpy",
    "r_dot",
    "r_wxpy",
    "matvec",
    "a_dot",
    "A_in",
    "x_in",
    "b_in"
    ]

def HPCCG_adapt_analysis(path_to_A): 

    # compile if necessary
    if not os.path.exists("./test_HPCCG"):
        subprocess.check_call(f"source {REPO_ROOT}/scripts/activate_env.sh && make", shell=True)
        
    # run foo instrumented with ADAPT calls
    stdout = subprocess.check_output(f"./test_HPCCG {path_to_A}", shell=True).decode().split("\n")

    partials = []
    adapt_error_model = []
    truncation_errors = []
    for kernel_name in KERNEL_NAMES:
        pruned_lines = [line for line in stdout if kernel_name in line]
        
        assert(len(pruned_lines) == 1)
        
        line = pruned_lines[0]

        temp = float(line.split()[line.split().index("partials:") + 1])
        partials.append(temp)
            
        temp = float(line.split()[line.split().index("introduced:") + 1])
        adapt_error_model.append(temp)
            
        temp = float(line.split()[line.split().index("errors:") + 1])
        truncation_errors.append(temp)
            
    return partials, adapt_error_model, truncation_errors


def generate_matrices(dirname, condition_numbers, density=0.1, dim=2000, generation_method=2):

    # remove directory if it already exists
    if os.path.exists(f"{REPO_ROOT}/examples/HPCCG/test_matrices/{dirname}"):
        shutil.rmtree(f"{REPO_ROOT}/examples/HPCCG/test_matrices/{dirname}")
    
    # create directory for the matrices
    os.mkdir(f"{REPO_ROOT}/examples/HPCCG/test_matrices/{dirname}")

    # write out the desired condition numbers
    with open(f"{REPO_ROOT}/examples/HPCCG/test_matrices/{dirname}/cond_space.txt", "w") as f:
        for c in condition_numbers[:-1]:
            f.write(f"{c},")
        f.write("{}".format(condition_numbers[-1]))
    
    # alter matlab code to match the necessary parameters
    with open(f"{REPO_ROOT}/examples/HPCCG/test_matrices/generate_matrices.m", "r") as f:
        matrix_gen_matlab_src = f.readlines()
                
    with open(f"{REPO_ROOT}/examples/HPCCG/test_matrices/generate_matrices.m", "w") as f:
        for line in matrix_gen_matlab_src:
            if "dim = " in line:
                f.write(f"dim = {dim};\n")
            elif "density = " in line:
                f.write(f"density = {density};\n")
            elif "./" in line:
                f.write(line.replace("./", f"./{dirname}/"))
            elif "generation_method = " in line:
                f.write(f"generation_method = {generation_method};\n")
            else:
                f.write(line)
            
    # run matrix generation
    subprocess.check_call("./generate_matrices.sh", shell=True, cwd=f"{REPO_ROOT}/examples/HPCCG/test_matrices")
    
    # reset matlab code
    with open(f"{REPO_ROOT}/examples/HPCCG/test_matrices/generate_matrices.m", "w") as f:
        for line in matrix_gen_matlab_src:
            f.write(line)
    

class MyPreprocessor:
    def __init__(self, ref, lbs=[], ubs=[]):
        self.mean = np.mean(ref, axis=0)
        self.std = np.std(ref, axis=0)
        if len(lbs) == 0:
            self.min = np.min(ref, axis=0)
        else:
            self.min = lbs
        if len(ubs) == 0:
            self.max = np.max(ref, axis=0)
        else:
            self.max = ubs
                
    def normalize(self, ZZ):
        temp = (ZZ - self.min)/(self.max - self.min)        
        
        # impute a random constant between 0 and 1 for constant values
        try:
            temp[:, np.where(self.std < 1e-16)] = np.random.uniform()
        except IndexError:
            temp[np.where(self.std < 1e-16)] = np.random.uniform()

        return temp
    
    def reverse_normalize(self, ZZ):
        temp = ZZ * (self.max - self.min) + self.min
        
        try:
            temp[:, np.where(self.std < 1e-20)] = self.mean[np.where(self.std == 0)]
        except IndexError:
            temp[np.where(self.std < 1e-20)] = self.mean[np.where(self.std == 0)]
                
        return temp

def get_forest_partitioning(forest, XX_test=[], YY_test=[], depth=None, impurity=None, n_samples=None):
    assert(depth or impurity or n_samples)
    
    # get all the partitionings for the trees in the forest parameterized by depth, impurity, or
    # number of samples. Each of these partitionings is a dict where keys are the bounds of the partition
    # and the values are either the predicted sensitivities for that partition (in the case of depth or impurity
    # stopping values) or the impurity of that partition (in the case of an n_samples stopping value) 
    tree_partitionings = []
    R2_scores = []
    for tree in forest.estimators_:
        tree_partitionings.append(get_tree_partitioning(tree=tree, depth=depth, impurity=impurity, n_samples=n_samples))
        
        # if test data is provided, we include the score of the trees so that we can take a weighted average
        # of the partitionings using the R2 score of the individual trees
        if len(XX_test) != 0 and len(YY_test) != 0:
            R2_scores.append(tree.score(XX_test,YY_test))
            
    # collect all of the endpoints of all of the partitions from all of the trees so that we
    # can merge them into a single partitioning for the forest
    endpoints = [set()]*forest.n_features_
    for p in tree_partitionings:
        for partition_bounds in p.keys():
            for feature_no in range(forest.n_features_):
                endpoints[feature_no].add(partition_bounds[feature_no][0])
                endpoints[feature_no].add(partition_bounds[feature_no][1])
    intervals = [list(pairwise(sorted(x))) for x in endpoints]
    
    # given this new merged partitioning for the forest, inspect each forest partition and take an average of 
    # the values learned for that partition by each of the trees.
    # this average is optionally weighted by the R2 score of the tree from which the tree partition came
    forest_partitioning = {}
    for forest_partition_bounds in itertools.product(*intervals):

        partition_values = []
        partition_impurity = []
        for tree_no in range(len(tree_partitionings)):
            for tree_partition_bounds in tree_partitionings[tree_no].keys():    

                # check if the forest partition falls into the tree partition by checking
                # that each interval for each feature in the target partition is a subinterval of
                # the interval of the corresponding feature in this partition
                found = True
                for feature_no in range(forest.n_features_):
                    if not (forest_partition_bounds[feature_no][0] >= tree_partition_bounds[feature_no][0] and forest_partition_bounds[feature_no][1] <= tree_partition_bounds[feature_no][1]):
                        found = False
                        break

                # if it is the correct tree partition, add it's prediction (optionally weighed by it's R2 score)
                if found:
                    if R2_scores:
                        partition_values.append(abs(R2_scores[tree_no]) * tree_partitionings[tree_no][tree_partition_bounds][0])
                        partition_impurity.append(abs(R2_scores[tree_no]) * tree_partitionings[tree_no][tree_partition_bounds][1])
                    else:
                        partition_values.append(tree_partitionings[tree_no][tree_partition_bounds][0])
                        partition_impurity.append(tree_partitionings[tree_no][tree_partition_bounds][1])
                        
                    break
                else:
                    continue

        forest_partitioning[tuple([tuple(interval) for interval in forest_partition_bounds])] = (np.mean(partition_values, axis = 0).squeeze(), np.mean(partition_impurity, axis = 0).squeeze())
    return forest_partitioning
    
def get_tree_partitioning(tree, depth=None, impurity=None, n_samples=None):
    assert(depth or impurity or n_samples)
    return partitioning_helper(tree.tree_, 0, 0, [(0,1)]*tree.n_features_, depth, impurity, n_samples)
    
def partitioning_helper(tree, node_id, node_depth, node_thresholds, target_depth, target_impurity, target_n_sample):
    
    leaf_node = (tree.children_left[node_id] == -1)
    
    # if the stopping criteria is met, save the bounds of the partition, the predicted value
    # of the partition, and the impurity of the partition
    if ((target_depth and ((node_depth == target_depth) or leaf_node))
        or (target_impurity and ((tree.impurity[node_id] < target_impurity)))
           or (target_n_sample and ((tree.n_node_samples[node_id] <= target_n_sample)))
             or (leaf_node)):
        partition = tuple([(threshold[0], threshold[1]) for threshold in node_thresholds])
        return { partition : (tree.value[node_id], tree.impurity[node_id])}
    
    # otherwise, proceed recursively
    else:
        split_feature = tree.feature[node_id]
        split_threshold = tree.threshold[node_id]
        
        left_node_thresholds = [(x[0], split_threshold) if k_no == split_feature else x for k_no, x in enumerate(node_thresholds)]
        right_node_thresholds = [(split_threshold, x[1]) if k_no == split_feature else x for k_no, x in enumerate(node_thresholds)]
        
        left_partitioning = partitioning_helper(tree, tree.children_left[node_id], node_depth + 1, left_node_thresholds, target_depth, target_impurity, target_n_sample)
        right_partitioning = partitioning_helper(tree, tree.children_right[node_id], node_depth + 1, right_node_thresholds, target_depth, target_impurity, target_n_sample)
        
        return {**left_partitioning, **right_partitioning} 
    
def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = itertools.tee(iterable)
    next(b, None)
    return zip(a, b)    

In [4]:
TOTAL_SAMPLES = 2**5
ORIGINAL_BOUNDS = [
    [10**1, 10**3],
]
DENSITY = 0.1
DIM = 100

In [5]:
samples_per_iter = int(np.sqrt(TOTAL_SAMPLES))
min_samples_per_leaf_sequence = np.linspace(0.5, 2/TOTAL_SAMPLES, int(np.ceil(TOTAL_SAMPLES//samples_per_iter)))

XX3 = np.array([]).reshape(-1, N_INPUT_FEATURES)
SS3 = np.array([]).reshape(-1, N_KERNELS)
df3 = []

# define trivial partition with impurity 1.0
partitioning = {tuple([tuple(interval) for interval in ORIGINAL_BOUNDS]) : 1}

# iterate until we have the required number of samples
i = -1
while XX3.shape[0] < TOTAL_SAMPLES:
    i +=1
    
    XX_this_iter = np.array([]).reshape(-1, N_INPUT_FEATURES)
    SS_this_iter = np.array([]).reshape(-1, N_KERNELS)

    # for the first round of sampling, uniform
    if XX3.shape[0] == 0:
        feature_vals = []
        for feature_no in range(N_INPUT_FEATURES):
            feature_vals.append(np.linspace(*ORIGINAL_BOUNDS[feature_no], int(np.ceil(samples_per_iter**(1/N_INPUT_FEATURES)))))
            
        XX_this_iter = np.concatenate([XX_this_iter, np.array(list(itertools.product(*feature_vals))).reshape(-1, N_INPUT_FEATURES)])

    else:       
        
        # for each partition, draw a number of samples proportional to the impurity
        # and calculate their corresponding partials
        for proportion_of_total_impurity, partition in sorted([(np.abs(value)/np.sum(np.abs(list(partitioning.values()))), partition) for partition, value in partitioning.items()]):
            problem = {
                'num_vars': N_INPUT_FEATURES,
                'names': KERNEL_NAMES,
                'bounds' : partition,
            }
            n_samples_in_this_partition = min(samples_per_iter - XX_this_iter.shape[0], max(1,int(samples_per_iter * proportion_of_total_impurity)))
            XX_this_iter = np.concatenate([XX_this_iter, latin.sample(problem, n_samples_in_this_partition).reshape(-1,N_INPUT_FEATURES)])
            if XX_this_iter.shape[0] == samples_per_iter:
                break                   
    
    generate_matrices(dirname=i, condition_numbers=XX_this_iter.squeeze(), dim=DIM, density=DENSITY)
                
    for matrix_path in [f"{REPO_ROOT}/examples/HPCCG/test_matrices/{i}/{A}" for A in os.listdir(f"{REPO_ROOT}/examples/HPCCG/test_matrices/{i}/") if A.endswith(".dat")]:
        SS, EE, TT = HPCCG_adapt_analysis(matrix_path)
        
        for k_no, kernel_name in enumerate(KERNEL_NAMES):
            df3.append({
                'condition_number' : float(matrix_path[matrix_path.rfind("_") + 1 : matrix_path.rfind(".")]),
                'partial' : SS[k_no],
                'kernel_name' : kernel_name,
                'sample_strategy' : "guided",
                'adapt_error_model' : EE[k_no],
                'truncation_error' : TT[k_no],
                'my_implementation_of_adapt_error_model' : SS[k_no] * TT[k_no]
            })
        SS_this_iter = np.concatenate([SS_this_iter, np.reshape(SS, (1,N_KERNELS))])
            
    XX3 = np.concatenate([XX3, XX_this_iter])
    SS3 = np.concatenate([SS3, SS_this_iter])

    try:
        min_samples_per_leaf = max(2, int(min_samples_per_leaf_sequence[i] * XX3.shape[0]))
    except IndexError:
        min_samples_per_leaf = 2
    
    # train forest on all of data sampled thus far
    XX3_preprocessor = MyPreprocessor(XX3, lbs=np.array(ORIGINAL_BOUNDS)[:,0], ubs=np.array(ORIGINAL_BOUNDS)[:,1])
    XX3_pp = XX3_preprocessor.normalize(XX3)
    SS3_preprocessor = MyPreprocessor(SS3)
    SS3_pp = SS3_preprocessor.normalize(SS3)    
    tree = DecisionTreeRegressor(min_samples_split=2*min_samples_per_leaf)
    
    tree.fit(XX3_pp, SS3_pp)
    
    
    # get partitioning parameterized by n_samples; this returns a set partitions,
    # each of which contains a maximum of n_samples, with the corresponding impurity of that partition 
    normalized_partitioning = get_tree_partitioning(tree, ORIGINAL_BOUNDS, n_samples=min_samples_per_leaf)
                                                    
    # un-normalize the bounds of the partitions and keep only the impurity from the value pair
    partitioning = {}
    for normalized_partition, value_pair in normalized_partitioning.items():
        unnormalized_partition = XX3_preprocessor.reverse_normalize(np.array(normalized_partition).T).T
        partitioning[tuple([tuple(interval) for interval in unnormalized_partition])] = value_pair[1]
                                                    
    print(f"iteration number {i}")
    print(f"\t min samples per leaf: {min_samples_per_leaf}")
    print(f"\t number of partitions: {len(partitioning)}")
    print(f"\t new samples added: {XX_this_iter.shape[0]}")
    print(f"\t total_samples: {XX3.shape[0]}")
    
# shuffle
shuffled_indices = np.arange(XX3.shape[0])
np.random.shuffle(shuffled_indices)
XX3 = XX3[shuffled_indices]
SS3 = SS3[shuffled_indices]

# train/test split
split_proportion = 0.8
split_point = int(XX3.shape[0] * split_proportion)
XX3_train, XX3_test = XX3[:split_point], XX3[split_point:]
SS3_train, SS3_test = SS3[:split_point], SS3[split_point:]

# preprocess
XX3_preprocessor = MyPreprocessor(XX3_train, lbs=np.array(ORIGINAL_BOUNDS)[:,0], ubs=np.array(ORIGINAL_BOUNDS)[:,1])
XX3_train_pp = XX3_preprocessor.normalize(XX3_train)
XX3_test_pp = XX3_preprocessor.normalize(XX3_test)

SS3_preprocessor = MyPreprocessor(SS3_train)
SS3_train_pp = SS3_preprocessor.normalize(SS3_train)
SS3_test_pp = SS3_preprocessor.normalize(SS3_test)

rf3 = RandomForestRegressor(n_estimators=10,
                                    #criterion="mse",
                                    #max_features=int(round(X_train_pp.shape[1]/ 3)),
                                    #max_depth=8,
                                    #min_samples_split=2,
                                    #min_samples_leaf=max(1, int(round(np.sqrt(X_train_pp.shape[0]) / np.sqrt(1000)))),
                                    #min_weight_fraction_leaf=0,
                                    #max_leaf_nodes=None,
                                    #bootstrap=False,
                                    #oob_score=True,
                                    #random_state=1
                                  )

rf3.fit(XX3_train_pp, SS3_train_pp)
print(f"Regressor Score: {rf3.score(XX3_test_pp, SS3_test_pp)}")

ensemble_approx_regime3 = get_forest_partitioning(rf3, XX3_test_pp, SS3_test_pp, depth=1)
print("Number of partitions: {}".format(len(ensemble_approx_regime3)))



iteration number 0
	 min samples per leaf: 2
	 number of partitions: 3
	 new samples added: 5
	 total_samples: 5




iteration number 1
	 min samples per leaf: 4
	 number of partitions: 2
	 new samples added: 5
	 total_samples: 10




iteration number 2
	 min samples per leaf: 4
	 number of partitions: 3
	 new samples added: 4
	 total_samples: 14




iteration number 3
	 min samples per leaf: 4
	 number of partitions: 5
	 new samples added: 5
	 total_samples: 19




iteration number 4
	 min samples per leaf: 3
	 number of partitions: 8
	 new samples added: 5
	 total_samples: 24




iteration number 5
	 min samples per leaf: 2
	 number of partitions: 14
	 new samples added: 5
	 total_samples: 29
iteration number 6
	 min samples per leaf: 2
	 number of partitions: 16
	 new samples added: 5
	 total_samples: 34
Regressor Score: -0.17218516116993146
Number of partitions: 8




In [7]:
feature_vals = []
for feature_no in range(N_INPUT_FEATURES):
    feature_vals.append(np.linspace(*ORIGINAL_BOUNDS[feature_no], int(np.ceil(TOTAL_SAMPLES**(1/N_INPUT_FEATURES)))))

XX1 = np.array(list(itertools.product(*feature_vals))).reshape(-1, N_INPUT_FEATURES)

generate_matrices(dirname="uniform", condition_numbers=XX1.squeeze(), dim=DIM, density=DENSITY)

df1 = []
SS1 = []
for matrix_path in [f"{REPO_ROOT}/examples/HPCCG/test_matrices/uniform/{A}" for A in os.listdir(f"{REPO_ROOT}/examples/HPCCG/test_matrices/uniform/") if A.endswith(".dat")]:
    SS, EE, TT = HPCCG_adapt_analysis(matrix_path)
    SS1.append(SS)
    for k_no, kernel_name in enumerate(KERNEL_NAMES):
        df1.append({
            'condition_number' : float(matrix_path[matrix_path.rfind("_") + 1 : matrix_path.rfind(".")]),
            'partial' : SS[k_no],
            'kernel_name' : kernel_name,
            'sample_strategy' : "uniform",
            'adapt_error_model' : EE[k_no],
            'truncation_error' : TT[k_no],
            'my_implementation_of_adapt_error_model' : SS[k_no] * TT[k_no]
        })
SS1 = np.array(SS1)

# shuffle
shuffled_indices = np.arange(XX1.shape[0])
np.random.shuffle(shuffled_indices)
XX1 = XX1[shuffled_indices]
SS1 = SS1[shuffled_indices]

# train/test split
split_proportion = 0.8
split_point = int(XX1.shape[0] * split_proportion)
XX1_train, XX1_test = XX1[:split_point], XX1[split_point:]
SS1_train, SS1_test = SS1[:split_point], SS1[split_point:]

# preprocess
XX1_preprocessor = MyPreprocessor(XX1_train, lbs=np.array(ORIGINAL_BOUNDS)[:,0], ubs=np.array(ORIGINAL_BOUNDS)[:,1])
XX1_train_pp = XX1_preprocessor.normalize(XX1_train)
XX1_test_pp = XX1_preprocessor.normalize(XX1_test)

SS1_preprocessor = MyPreprocessor(SS1_train)
SS1_train_pp = SS1_preprocessor.normalize(SS1_train)
SS1_test_pp = SS1_preprocessor.normalize(SS1_test)

rf1 = RandomForestRegressor(n_estimators=10,
                                    #criterion="mse",
                                    #max_features=int(round(X_train_pp.shape[1]/ 3)),
                                    #max_depth=8,
                                    #min_samples_split=2,
                                    #min_samples_leaf=max(1, int(round(np.sqrt(X_train_pp.shape[0]) / np.sqrt(1000)))),
                                    #min_weight_fraction_leaf=0,
                                    #max_leaf_nodes=None,
                                    #bootstrap=False,
                                    #oob_score=True,
                                    #random_state=1
                                  )

rf1.fit(XX1_train_pp, SS1_train_pp)
print(f"Regressor Score: {rf1.score(XX1_test_pp, SS1_test_pp)}")

ensemble_approx_regime1 = get_forest_partitioning(rf1, XX1_test_pp, SS1_test_pp, depth=1)
print("Number of partitions: {}".format(len(ensemble_approx_regime1)))

Regressor Score: 0.3386657797930217
Number of partitions: 8




In [9]:
# compare sampling strategies visually
ddf = [pd.DataFrame.from_dict(df) for df in [df1]]#, df3]]
for df in ddf:
    sampling_strategy = df['sample_strategy'].unique()[0]
    fig = px.scatter(
        df,
        x='condition_number',
        y='partial',
        symbol='kernel_name',
        color='kernel_name',
        log_y=True,
        title=f'Observed Kernel Partial Derivatives wrt Output ({sampling_strategy})',
    )
    fig.update_layout(yaxis_tickformat=".2e", xaxis_tickformat=".2e")
    fig.show()
    
    fig = px.scatter(
        df,
        x='condition_number',
        y='my_implementation_of_adapt_error_model',
        symbol='kernel_name',
        color='kernel_name',
        log_y=True,
        title=f'(Max Kernel Truncation Error * Avg Kernel Partial) ({sampling_strategy})',
    )
    fig.update_layout(yaxis_tickformat=".2e", xaxis_tickformat=".2e")
    fig.show()

    fig = px.scatter(
        df,
        x='condition_number',
        y='adapt_error_model',
        symbol='kernel_name',
        color='kernel_name',
        log_y=True,
        title=f'ADAPT error model: max output error per kernel ({sampling_strategy})',
    )
    fig.update_layout(yaxis_tickformat=".2e", xaxis_tickformat=".2e")
    fig.show()

In [None]:
problem = {
    'num_vars': N_INPUT_FEATURES,
    'names': KERNEL_NAMES,
    'bounds' : ORIGINAL_BOUNDS,
}
n_eval_samples = 2*TOTAL_SAMPLES
XX_eval = latin.sample(problem, n_eval_samples)

df_plot = []
for X in XX_eval:
            
    SS_adapt = HPCCG_adapt_analysis(X)

    normalized_X1 = XX1_preprocessor.normalize([X])[0]
    normalized_X3 = XX3_preprocessor.normalize([X])[0]

    SS1_regressor = SS1_preprocessor.reverse_normalize(rf1.predict([normalized_X1]))[0]
    SS3_regressor = SS3_preprocessor.reverse_normalize(rf3.predict([normalized_X3]))[0]

    for k_no, kernel_name in enumerate(KERNEL_NAMES):
        df_plot.append({
            'x' : X[0],                 
            'partial' : SS1_regressor[k_no],
            'kernel_name' : kernel_name,
            'technique'   : 'RF Regressor Prediction (uniform)',
        })
        df_plot.append({
            'x' : X[0],                                
            'partial' : SS3_regressor[k_no],
            'kernel_name' : kernel_name,
            'technique'   : 'RF Regressor Prediction (guided)',
        })
        df_plot.append({
            'x' : X[0],                                   
            'partial' : SS_adapt[k_no],
            'kernel_name' : kernel_name,
            'technique'   : 'ADAPT GT',
        })

    for partition in ensemble_approx_regime1.keys():    
        
        # check if the input falls into this partition
        correct_partition = True
        for feature_no in range(rf1.n_features_):

            if (partition[feature_no][0] > normalized_X1[feature_no]) or (normalized_X1[feature_no] > partition[feature_no][1]):
                correct_partition = False
                break
            else:
                continue

        if correct_partition:
            for k_no, kernel_name in enumerate(KERNEL_NAMES):

                df_plot.append({
                    'x' : X[0],                 
                    'partial' : SS1_preprocessor.reverse_normalize([ensemble_approx_regime1[partition][0]])[0][k_no],
                    'kernel_name' : kernel_name,
                    'technique'   : 'Ensemble Approximation Regime (uniform)',
                })
            break
        else:
            continue

    for partition in ensemble_approx_regime3.keys():    
        
        # check if the input falls into this partition
        correct_partition = True
        for feature_no in range(rf3.n_features_):

            if (partition[feature_no][0] > normalized_X3[feature_no]) or (normalized_X3[feature_no] > partition[feature_no][1]):
                correct_partition = False
                break
            else:
                continue

        if correct_partition:
            for k_no, kernel_name in enumerate(KERNEL_NAMES):

                df_plot.append({
                    'x' : X[0],                 
                    'partial' : np.abs(SS3_preprocessor.reverse_normalize([ensemble_approx_regime3[partition][0]])[0][k_no]),
                    'kernel_name' : kernel_name,
                    'technique'   : 'Ensemble Approximation Regime (guided)',
                })
            break
        else:
            continue

In [None]:
df_plot = pd.DataFrame.from_dict(df_plot)
GT = np.array(df_plot[df_plot.technique == "ADAPT GT"]['partial'].tolist())
for technique in ["RF Regressor Prediction (uniform)", "RF Regressor Prediction (guided)", "Ensemble Approximation Regime (uniform)", "Ensemble Approximation Regime (guided)"]:
    predict = np.array(df_plot[df_plot.technique == technique]['partial'].tolist())
    mse = ((GT-predict)**2).mean()
    print(f"{technique} mse : {mse}")
    
for technique in ["ADAPT GT", "RF Regressor Prediction (uniform)", "RF Regressor Prediction (guided)", "Ensemble Approximation Regime (uniform)", "Ensemble Approximation Regime (guided)" ]:
    fig = px.scatter(
        df_plot[df_plot.technique == technique],
        x='x',
        y='partial',
        color='kernel_name',
        symbol='kernel_name',
        title="Kernel Partials wrt Input X ({})".format(technique),
    )
    fig.show()