In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import subprocess
import os
import sys
sys.path.append('/usr/workspace/vanover1/approx-llvm/approx')
from approx_modules import approx

from sklearn.ensemble import ExtraTreesRegressor

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from ._gradient_boosting import predict_stages
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from ._gradient_boosting import predict_stages


In [2]:
N_KERNELS = 3
KERNEL_NAMES = ["acc", "sum", "tmp"]

In [3]:
def run_sum2pi_x(endpoint):
    
    # compile and run simpsons instrumented with HPAC directives
    # need to use /usr/tce/bin/git binary because the default git version grabbed by this subprocess is too early for the --show-superproject-working-tree flag
    subprocess.check_call("rm -f test.h5 && source $(/usr/tce/bin/git rev-parse --show-superproject-working-tree)/scripts/activate_hpac_env.sh && make sum2pi_x-hpac && ./sum2pi_x-hpac {}".format(endpoint), shell=True)

    # open database
    approxDataProfile = approx.approxApplication("./test.h5")
    
    # get output
    Y = approxDataProfile.getApplicationOutput()['product'][0]

    # get aggregated kernel outputs
    kernel_outputs = []
    for kernel_name in KERNEL_NAMES:
        kernel_outputs.append(np.mean([approxDataProfile[region_name].Y().mean() for region_name in approxDataProfile.getRegionNames() if region_name.startswith(kernel_name)]))
    
    return kernel_outputs, Y

In [5]:
# define neighborhood for sum2pi_x analysis
center = 2000
n_sample = 2**6

# uniform sample of n_sample endpoints from the neighborhood
XX = np.arange(center-int(n_sample/2), center+int(n_sample/2))

In [7]:
# run sum2pi_x
kernel_outs = []
YY = []
for n, X in enumerate(XX):
    temp = run_sum2pi_x(X)
    kernel_outs.append(temp[0])
    YY.append(temp[1])
kernel_outs = np.array(kernel_outs)
YY = np.array(YY)

In [9]:
# analyze kernel output sensitivity
et = ExtraTreesRegressor(n_estimators=100,
                                    criterion="mse",
                                    # max_features=int(round(XX.shape[1] / 3)),
                                    max_depth=8,
                                    min_samples_split=2,
                                    min_samples_leaf=max(1, int(round(np.sqrt(kernel_outs.shape[0]) / np.sqrt(1000)))),
                                    min_weight_fraction_leaf=0,
                                    max_leaf_nodes=None,
                                    #bootstrap=True,
                                    #oob_score=True,
                                    random_state=1)
et.fit(kernel_outs, YY)
for k_no in range(N_KERNELS):
    print("{} : {}".format(KERNEL_NAMES[k_no], et.feature_importances_[k_no]))

acc : 0.0
sum : 1.0
tmp : 0.0


## Compare to:
```
=== BEGIN ADAPT REPORT ===
100001 total independent/intermediate variables
1 dependent variables
Mixed-precision recommendation:
  Replace variable tmp      max error introduced: 1.748455e-04  count: 48000       totalerr: 1.748455e-04
  Replace variable acc      max error introduced: 2.918679e-03  count: 50000       totalerr: 3.093524e-03
  DO NOT replace   sum      max error introduced: 1.385115e-01  count: 2001        totalerr: 1.416050e-01
=== END ADAPT REPORT ===
```