In [4]:
import numpy as np
import pandas as pd
import plotly.express as px
import subprocess
import os
import sys
sys.path.append('/usr/workspace/vanover1/approx-llvm/approx')
from approx_modules import approx

from sklearn.ensemble import ExtraTreesRegressor

In [5]:
# need to use /usr/tce/bin/git binary because the default git version grabbed by this subprocess is too early for the --show-superproject-working-tree flag
REPO_ROOT = subprocess.check_output("/usr/tce/bin/git rev-parse --show-superproject-working-tree --show-toplevel | head -1", shell=True).strip().decode()
N_KERNELS = 3
KERNEL_NAMES = ["result", "s1", "x"]

In [6]:
def run_simpsons(endpoint):
    
    # compile and run simpsons instrumented with HPAC directives
    subprocess.check_call(f"rm -f test.h5 && source {REPO_ROOT}/scripts/activate_env.sh && make simpsons-hpac && ./simpsons-hpac {endpoint}", shell=True)

    # open database
    approxDataProfile = approx.approxApplication("./test.h5")
    
    # get output
    Y = approxDataProfile.getApplicationOutput()['area'][0]

    # get aggregated kernel outputs
    kernel_outputs = []
    for kernel_name in KERNEL_NAMES:
        kernel_outputs.append(np.mean([approxDataProfile[region_name].Y().mean() for region_name in approxDataProfile.getRegionNames() if region_name.startswith(kernel_name)]))
    
    return kernel_outputs, Y

In [7]:
# define neighborhood for simpsons analysis
center = 1.0
eps = 0.05
n_sample = 2**8

# uniform sample of n_sample endpoints from the neighborhood
XX = np.linspace(center-eps, center+eps, n_sample)

In [8]:
try:
    with open(f"XX_{n_sample}n.npy", "rb") as f:
        kernel_outs = np.load(f)
    with open(f"YY_{n_sample}n.npy", "rb") as f:
        YY = np.load(f)
        
except FileNotFoundError:

    # run simpsons
    kernel_outs = []
    YY = []
    for X in XX:
        temp = run_simpsons(X)
        kernel_outs.append(temp[0])
        YY.append(temp[1])
    kernel_outs = np.array(kernel_outs)
    YY = np.array(YY)

    with open(f"XX_{n_sample}n.npy", "wb") as f:
        np.save(f, kernel_outs)
    with open(f"YY_{n_sample}n.npy", "wb") as f:
        np.save(f, YY)

In [9]:
# train/test split
split_proportion = 0.8
split_point = int(n_sample * split_proportion)
XX_train, XX_test = kernel_outs[:split_point], kernel_outs[split_point:]
YY_train, YY_test = YY[:split_point], YY[split_point:]

print(f"Training with {split_point} samples.")

# analyze kernel output sensitivity
et = ExtraTreesRegressor(n_estimators=100,
                                    criterion="mse",
                                    # max_features=int(round(XX.shape[1] / 3)),
                                    max_depth=8,
                                    min_samples_split=2,
                                    min_samples_leaf=max(1, int(round(np.sqrt(XX_train.shape[0]) / np.sqrt(1000)))),
                                    min_weight_fraction_leaf=0,
                                    max_leaf_nodes=None,
                                    #bootstrap=True,
                                    #oob_score=True,
                        )#random_state=1)

et.fit(XX_train, YY_train)
print(f"Score: {et.score(XX_test, YY_test)}")

Si = [(KERNEL_NAMES[k_no], et.feature_importances_[k_no]) for k_no in range(N_KERNELS)]
Si.sort(key = lambda x : x[1])
print(f"Sensitivities: {Si}")

Training with 204 samples.
Score: -1.0391481440985193
Sensitivities: [('result', 0.23971582735453612), ('x', 0.26471934533388686), ('s1', 0.4955648273115771)]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype=np.int)


## Compare to:
```
=== BEGIN ADAPT REPORT ===
6000009 total independent/intermediate variables
1 dependent variables
Mixed-precision recommendation:
  Replace variable b           max error introduced: 0.000000e+00  count: 1           totalerr: 0.000000e+00
  Replace variable a           max error introduced: 0.000000e+00  count: 1           totalerr: 0.000000e+00
  Replace variable h           max error introduced: 4.152677e-15  count: 1           totalerr: 4.152677e-15
  Replace variable pi          max error introduced: 9.154282e-14  count: 1           totalerr: 9.569550e-14
  Replace variable result      max error introduced: 2.967209e-11  count: 2000002     totalerr: 2.976779e-11
  DO NOT replace   x           max error introduced: 2.397519e-07  count: 2000001     totalerr: 2.397817e-07
  DO NOT replace   s1          max error introduced: 8.160601e-05  count: 2000002     totalerr: 8.184579e-05
=== END ADAPT REPORT ===
```

In [10]:
# prepare data for shaff analysis
rows = []
for n in range(n_sample):
    row = {}
    for i, kernel_name in enumerate(KERNEL_NAMES):
        row[kernel_name] = kernel_outs[n][i]
    row["Y"] = YY[n]
    rows.append(row)
df = pd.DataFrame.from_dict(rows)
df = df[["Y"] + KERNEL_NAMES]
df.to_csv(f"df_{split_point}n.csv", index=False)

### From 5 SHAFF analysis runs with 2**8 samples:

```
[1] "result" "s1"     "x"     
[1] 0.3197349 0.3602852 0.3197289
[1] 0.3003396 0.3699853 0.3294306
[1] 0.3185540 0.3746283 0.3065407
[1] 0.3088673 0.3690897 0.3217917
[1] 0.3083466 0.3881378 0.3032808
```