Skip to content

Commit

Permalink
Add/result plots (#1)
Browse files Browse the repository at this point in the history
* running out of memory need the BIG GUNS
* add support for compact
* added results files
Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Mar 13, 2022
1 parent 281364d commit 287f219
Show file tree
Hide file tree
Showing 683 changed files with 675,559 additions and 1,316,570 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docs/_site
119 changes: 107 additions & 12 deletions analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,12 @@ def get_parser():
# Helper Functions


def write_json(obj, filename):
def write_json(obj, filename, compact=False):
with open(filename, "w") as fd:
fd.write(json.dumps(obj, indent=4))
if compact:
fd.write(json.dumps(obj))
else:
fd.write(json.dumps(obj, indent=4))


def read_json(filename):
Expand Down Expand Up @@ -316,23 +319,24 @@ def visualize_package(pkg_dir, experiment, outdir, log_dir):
)

# These logs don't have associated results
no_results = {}
has_no_results = {}
for log in found_logs:
exp_id, log_type = log.rsplit(".", 1)
if exp_id not in no_results:
no_results[exp_id] = {}
no_results[exp_id][log_type] = os.path.join("logs", log)
if exp_id not in has_no_results:
has_no_results[exp_id] = {}
has_no_results[exp_id][log_type] = os.path.join("logs", log)

summary["no-results-generated"] = len(no_results)
summary["no-results-generated"] = len(has_no_results)
summary["results-generated"] = len(logs)
summary["total-runs"] = len(logs) + len(no_results)
summary["total-runs"] = len(logs) + len(has_no_results)
if testers:
print("Found %s testers: %s" % (len(testers), " ".join(testers)))
else:
print("Found 0 testers")
print(summary)

# Create top level data frame
# This data frame is the high level "did it work" df
df = pandas.DataFrame(0, index=rows, columns=cols)

# Assign each outcome a number
Expand Down Expand Up @@ -360,13 +364,18 @@ def visualize_package(pkg_dir, experiment, outdir, log_dir):
)

# Save the json to file
write_json(results, os.path.join(result_dir, "results-list.json"))
write_json(results, os.path.join(result_dir, "results-list.json"), compact=True)
df.to_json(os.path.join(result_dir, "results-table.json"))
write_json(outcomes, os.path.join(result_dir, "outcomes.json"))
if logs:
write_json(logs, os.path.join(result_dir, "logs-existing.json"))
if no_results:
write_json(no_results, os.path.join(result_dir, "logs-missing.json"))
logs = True
if has_no_results:
write_json(has_no_results, os.path.join(result_dir, "logs-missing.json"))
has_no_results = True

# Clean up
del results

# These get killed
skips = ["py-libensemble", "heffte"]
Expand All @@ -391,20 +400,106 @@ def visualize_package(pkg_dir, experiment, outdir, log_dir):
save_to = os.path.join(result_dir, "%s-%s.%s" % (experiment, package, ext))
fig = plot_heatmap(df, save_to)

del df

# These next dataframes are the "what do the predictions say" data frames
# IMPORTANT this only represents what we can ACTUALLY PREDICT
# failures to splice or spack choking cannot be here
predicts = {}

# Get rows and cols for each predictor
for _, exps in experiments.items():
for exp in exps:

# We only have predictions on successful splice
# to be clear, this experiment is not about predicting splices
# we ASSUME correct splicing and then predict ABI compatibility
for tester_name, preds in exp["predictions"].items():

# here we are collapsing a set of predictions (across libs and binaries) into one set
for pred in preds:
if tester_name not in predicts:
predicts[tester_name] = {}
if exp["package"] not in predicts[tester_name]:
predicts[tester_name][exp["package"]] = {}
if exp["splice"] not in predicts[tester_name][exp["package"]]:
predicts[tester_name][exp["package"]][exp["splice"]] = {
"total": 0,
"predict_work": 0,
"predict_fail": 0,
}
predicts[tester_name][exp["package"]][exp["splice"]]["total"] += 1
if pred["prediction"] == True:
predicts[tester_name][exp["package"]][exp["splice"]][
"predict_work"
] += 1
else:
predicts[tester_name][exp["package"]][exp["splice"]][
"predict_fail"
] += 1

dfs = {}
for tester_name, preds in predicts.items():
rows = set()
cols = set()
for pkg, deps in preds.items():
rows.add(pkg)
[cols.add(x) for x in deps.keys()]
dfs[tester_name] = pandas.DataFrame(0, index=list(rows), columns=list(cols))
for pkg, deps in preds.items():
for dep, counts in deps.items():
dfs[tester_name].loc[pkg, dep] = (
counts["predict_work"] / counts["total"]
)

# Clean up
del predicts

# Save each matrix to file
no_results = False
for tester_name, df in dfs.items():
if df.shape[1] == 0:
print("Warning - empty data frame! No results to show for %s" % pkg_dir)
no_results = True

elif df.shape[1] > 1 and df.shape[0] > 1:
for ext in ["pdf", "png", "svg"]:
save_to = os.path.join(
result_dir, "%s-%s-%s.%s" % (tester_name, experiment, package, ext)
)
fig = plot_clustermap(df, save_to)
else:
for ext in ["pdf", "png", "svg"]:
save_to = os.path.join(
result_dir, "%s-%s-%s.%s" % (tester_name, experiment, package, ext)
)
fig = plot_heatmap(df, save_to)
del df

# Save the filenames for images
listing = ""
if not no_results:
if package not in skips:
for ext in ["pdf", "png", "svg"]:
listing += "%s: %s-%s.%s\n" % (ext, experiment, package, ext)
for tester_name, _ in dfs.items():
for ext in ["pdf", "png", "svg"]:
listing += "%s-%s: %s-%s-%s.%s\n" % (
tester_name,
ext,
tester_name,
experiment,
package,
ext,
)

# And the entry for the results
listing += "results: results-list.json\n"
listing += "outcomes: %s\n" % outcomes
listing += "summary: %s\n" % summary
if logs:
listing += "logs_existing: logs-existing.json\n"
if no_results:
if has_no_results:
listing += "logs_missing: logs-missing.json"

# Generate a markdown for each
Expand Down
6 changes: 3 additions & 3 deletions docs/_data/run1-summary.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"splice-success": 4946,
"splice-concretization-failed": 122,
"package-install-failed": 2006,
"rewiring-failed": 6280,
"splice-success": 4946,
"splice-install-failed": 2105,
"package-install-failed": 2006,
"splice-failed": 751,
"splice-concretization-failed": 122,
"success-no-prediction": 290,
"predictions": {
"spack-test": 4627,
Expand Down
37 changes: 31 additions & 6 deletions docs/_layouts/result.html
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,11 @@ <h3>{{page.title}}</h3>
{{ content }}

<br>
<p>This plot shows the top level splice outcomes, meaning across predictors. For example, If a package or splice didn't concretize we will see it here. Note that not every label might show up in the table, e.g., the value of 0 for "combination-not-run" is added to all results.</p>
<h2>Outcomes</h2>
<p>Welcome to the results page! This page has build or splice results, along with predictions for the things we could successfully build or splice, along with a table of full results and logs. Logs are important because some jobs might not have ever produced an output file, and this is because HPC is terrible. For this first round, we are able to represent the results in a single data structure (small enough for GitHub) however with more testers we (Vanessa) will need to refactor the interface again to break apart the results into smaller pieces.</p>

<h2>Plot of Outcomes</h2>

<p>This plot shows the top level splice and build outcomes, meaning before we get to any kind of prediction. For example, If a package or splice didn't concretize we will see it here. The labels in the first table below represent the values you'll find in the plot. Note that not every label might show up in the table, e.g., the value of 0 for "combination-not-run" is added to all results.</p>
<p>
<table class="table table-bordered table-hover dt-responsive display dataTable no-footer dtr-inline">
<thead>
Expand All @@ -117,9 +120,8 @@ <h2>Outcomes</h2>
</tr>
{% for outcome in page.outcomes %}<tr><td>{{ outcome[0] }}</td><td>{{ outcome[1]}}</td></tr>{% endfor %}
</tbody>
</table>
</table><br>

<h2>Plot of Outcomes</h2>
<p>Across an experiment, we have multiple testers. However, sometimes there is a failure unrelated to the tester. For example, a concretization fails, spack can't install
the package, and then we can't be sure if the splice failed (or otherwise had a bad outcome) or if it's spack. This is a visualization of the outcomes above, colored by the
values indicated. E.g., if you only see one color, this means there is only one outcome. The package name (experiment) is in the column, and versions are in rows.<span style="font-weight:600">This plot does not hold any prediction information, it only shows statuses of being able to build, splice, etc.</span></p>
Expand All @@ -128,9 +130,32 @@ <h2>Plot of Outcomes</h2>
{% if page.png %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.png }}">Download PNG</a> |{% endif %}
{% if page.svg %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.svg }}">Download SVG</a>{% endif %}
{% if page.svg %}<img src="{{ site.baseurl }}{{ page.url }}{{ page.svg }}"/>{% endif %}


<h2>Predictions</h2>

<p>Remember that the predictions are already a biased set because the can ONLY include finished binaries/libraries from which we can derive predictions. E.g., a spack failure to build or splice or some transient error that makes the job exit produces no binaries or libraries and it cannot be represented here. We can only make predictions on successful splices, and we can only make predictions given that a package can output some combination of binaries and/or libraries needed by a predictor. A prediction is not a prediction for a successful build or splice, but rather GIVEN a successful build or splice, do we think the ABI compatibility will hold? Thus, if there are no successful builds or splices, we have no predictions.</p>
<p><span style="font-weight:600">Important!</span> Since we have many libraries and binaries within each package and splice, the scores here represent a value between 0 and 1 that are the total predicted to work / the total predictions. E.g., a value of "1" says "out of all the predictions made across binaries and libraries, we think they will ALL work. A value of 0.5 says we predict about half of them to work. We have to do this because there are a variable number of predictions for any given package and dependency splice. In the case of the "spack-test" predictor, this is the actual results of running the spack test "package" so it's less a prediction and more a ground truth. And since there is only one spack test run per package and dependency, this value is derived from that one run.</p>

{% if page.libabigail-pdf %}<h4>Libabigail</h4>{% else %}<h4>Sorry we couldn't make any predictions for libabigail</h4>{% endif %}
{% if page.libabigail-pdf %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.libabigail-pdf }}">Download Libabigail PDF</a> |{% endif %}
{% if page.libabigail-png %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.libabigail-png }}">Download Libabigail PNG</a> |{% endif %}
{% if page.libabigail-svg %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.libabigail-svg }}">Download Libabigail SVG</a> |{% endif %}
{% if page.libabigail-svg %}<img src="{{ site.baseurl }}{{ page.url }}{{ page.libabigail-svg }}"/>{% endif %}

{% if page.spack-test-pdf %}<h4>Spack Test</h4>{% else %}<h4>Sorry we couldn't run spack test without a successful install</h4>{% endif %}
{% if page.spack-test-pdf %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.spack-test-pdf }}">Download Spack Test PDF</a> |{% endif %}
{% if page.spack-test-png %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.spack-test-png }}">Download Spack Test PNG</a> |{% endif %}
{% if page.spack-test-svg %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.spack-test-svg }}">Download Spack Test SVG</a> |{% endif %}
{% if page.spack-test-svg %}<img src="{{ site.baseurl }}{{ page.url }}{{ page.spack-test-svg }}"/>{% endif %}

{% if page.symbolator-pdf %}<h4>Symbolator</h4>{% else %}<h4>Sorry we couldn't make any predictions for Symbolator</h4>{% endif %}
{% if page.symbolator-pdf %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.symbolator-pdf }}">Download Symbolator PDF</a> |{% endif %}
{% if page.symbolator-png %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.symbolator-png }}">Download Symbolator PNG</a> |{% endif %}
{% if page.symbolator-svg %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.symbolator-svg }}">Download Symbolator SVG</a> |{% endif %}
{% if page.symbolator-svg %}<img src="{{ site.baseurl }}{{ page.url }}{{ page.symbolator-svg }}"/>{% endif %}

{% if page.results %}<h2>Results</h2>
<p>Since results (and outputs) can vary across testers, we provide an interactive table here for you to explore instead of trying to shove them into a visualization. The raw data is provided alongside each results directory, so you are free to grab it to generate a custom visualization to your liking.</p>
<p>Since results (and outputs) can vary across testers, we provide an interactive table here for you to explore instead of trying to shove them into a visualization. For each row, you can click the "+" to expand to see more data. Try doing a search for a result type or predictor of choice in the search box at the top right. The raw data is provided alongside each results directory, so you are free to grab it to generate a custom visualization to your liking.</p>

<table id="results-table" class="table table-hover table-bordered display" cellspacing="0" width="100%">
<thead>
Expand Down
8 changes: 4 additions & 4 deletions docs/_results/run1/aml/logs-existing.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"aml-master-numactl-experiment": {
"out": "logs/aml-master-numactl-experiment.out",
"err": "logs/aml-master-numactl-experiment.err"
},
"aml-0.1.0-numactl-experiment": {
"out": "logs/aml-0.1.0-numactl-experiment.out",
"err": "logs/aml-0.1.0-numactl-experiment.err"
},
"aml-master-numactl-experiment": {
"out": "logs/aml-master-numactl-experiment.out",
"err": "logs/aml-master-numactl-experiment.err"
}
}
17 changes: 1 addition & 16 deletions docs/_results/run1/aml/results-list.json
Original file line number Diff line number Diff line change
@@ -1,16 +1 @@
{
"failed": [
{
"experiment": "aml-master-numactl-experiment",
"splice": "numactl",
"package": "aml@master",
"result": "package-install-failed"
},
{
"experiment": "aml-0.1.0-numactl-experiment",
"splice": "numactl",
"package": "aml@0.1.0",
"result": "package-install-failed"
}
]
}
{"failed": [{"experiment": "aml-0.1.0-numactl-experiment", "splice": "numactl", "package": "aml@0.1.0", "result": "package-install-failed"}, {"experiment": "aml-master-numactl-experiment", "splice": "numactl", "package": "aml@master", "result": "package-install-failed"}]}
Binary file modified docs/_results/run1/aml/run1-aml.pdf
Binary file not shown.
Binary file modified docs/_results/run1/aml/run1-aml.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 287f219

Please sign in to comment.