Add/result plots (#1)

* running out of memory need the BIG GUNS * add support for compact * added results files Signed-off-by: vsoch <vsoch@users.noreply.github.com>
buildsi · Mar 13, 2022 · 287f219 · 287f219
1 parent 281364d
commit 287f219
Show file tree

Hide file tree

Showing 683 changed files with 675,559 additions and 1,316,570 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+docs/_site
diff --git a/analysis.py b/analysis.py
@@ -32,9 +32,12 @@ def get_parser():
 # Helper Functions
 
 
-def write_json(obj, filename):
+def write_json(obj, filename, compact=False):
     with open(filename, "w") as fd:
-        fd.write(json.dumps(obj, indent=4))
+        if compact:
+            fd.write(json.dumps(obj))
+        else:
+            fd.write(json.dumps(obj, indent=4))
 
 
 def read_json(filename):
@@ -316,23 +319,24 @@ def visualize_package(pkg_dir, experiment, outdir, log_dir):
                     )
 
     # These logs don't have associated results
-    no_results = {}
+    has_no_results = {}
     for log in found_logs:
         exp_id, log_type = log.rsplit(".", 1)
-        if exp_id not in no_results:
-            no_results[exp_id] = {}
-        no_results[exp_id][log_type] = os.path.join("logs", log)
+        if exp_id not in has_no_results:
+            has_no_results[exp_id] = {}
+        has_no_results[exp_id][log_type] = os.path.join("logs", log)
 
-    summary["no-results-generated"] = len(no_results)
+    summary["no-results-generated"] = len(has_no_results)
     summary["results-generated"] = len(logs)
-    summary["total-runs"] = len(logs) + len(no_results)
+    summary["total-runs"] = len(logs) + len(has_no_results)
     if testers:
         print("Found %s testers: %s" % (len(testers), " ".join(testers)))
     else:
         print("Found 0 testers")
     print(summary)
 
     # Create top level data frame
+    # This data frame is the high level "did it work" df
     df = pandas.DataFrame(0, index=rows, columns=cols)
 
     # Assign each outcome a number
@@ -360,13 +364,18 @@ def visualize_package(pkg_dir, experiment, outdir, log_dir):
         )
 
     # Save the json to file
-    write_json(results, os.path.join(result_dir, "results-list.json"))
+    write_json(results, os.path.join(result_dir, "results-list.json"), compact=True)
     df.to_json(os.path.join(result_dir, "results-table.json"))
     write_json(outcomes, os.path.join(result_dir, "outcomes.json"))
     if logs:
         write_json(logs, os.path.join(result_dir, "logs-existing.json"))
-    if no_results:
-        write_json(no_results, os.path.join(result_dir, "logs-missing.json"))
+        logs = True
+    if has_no_results:
+        write_json(has_no_results, os.path.join(result_dir, "logs-missing.json"))
+        has_no_results = True
+
+    # Clean up
+    del results
 
     # These get killed
     skips = ["py-libensemble", "heffte"]
@@ -391,20 +400,106 @@ def visualize_package(pkg_dir, experiment, outdir, log_dir):
             save_to = os.path.join(result_dir, "%s-%s.%s" % (experiment, package, ext))
             fig = plot_heatmap(df, save_to)
 
+    del df
+
+    # These next dataframes are the "what do the predictions say" data frames
+    # IMPORTANT this only represents what we can ACTUALLY PREDICT
+    # failures to splice or spack choking cannot be here
+    predicts = {}
+
+    # Get rows and cols for each predictor
+    for _, exps in experiments.items():
+        for exp in exps:
+
+            # We only have predictions on successful splice
+            # to be clear, this experiment is not about predicting splices
+            # we ASSUME correct splicing and then predict ABI compatibility
+            for tester_name, preds in exp["predictions"].items():
+
+                # here we are collapsing a set of predictions (across libs and binaries) into one set
+                for pred in preds:
+                    if tester_name not in predicts:
+                        predicts[tester_name] = {}
+                    if exp["package"] not in predicts[tester_name]:
+                        predicts[tester_name][exp["package"]] = {}
+                    if exp["splice"] not in predicts[tester_name][exp["package"]]:
+                        predicts[tester_name][exp["package"]][exp["splice"]] = {
+                            "total": 0,
+                            "predict_work": 0,
+                            "predict_fail": 0,
+                        }
+                    predicts[tester_name][exp["package"]][exp["splice"]]["total"] += 1
+                    if pred["prediction"] == True:
+                        predicts[tester_name][exp["package"]][exp["splice"]][
+                            "predict_work"
+                        ] += 1
+                    else:
+                        predicts[tester_name][exp["package"]][exp["splice"]][
+                            "predict_fail"
+                        ] += 1
+
+    dfs = {}
+    for tester_name, preds in predicts.items():
+        rows = set()
+        cols = set()
+        for pkg, deps in preds.items():
+            rows.add(pkg)
+            [cols.add(x) for x in deps.keys()]
+        dfs[tester_name] = pandas.DataFrame(0, index=list(rows), columns=list(cols))
+        for pkg, deps in preds.items():
+            for dep, counts in deps.items():
+                dfs[tester_name].loc[pkg, dep] = (
+                    counts["predict_work"] / counts["total"]
+                )
+
+    # Clean up
+    del predicts
+
+    # Save each matrix to file
+    no_results = False
+    for tester_name, df in dfs.items():
+        if df.shape[1] == 0:
+            print("Warning - empty data frame! No results to show for %s" % pkg_dir)
+            no_results = True
+
+        elif df.shape[1] > 1 and df.shape[0] > 1:
+            for ext in ["pdf", "png", "svg"]:
+                save_to = os.path.join(
+                    result_dir, "%s-%s-%s.%s" % (tester_name, experiment, package, ext)
+                )
+                fig = plot_clustermap(df, save_to)
+        else:
+            for ext in ["pdf", "png", "svg"]:
+                save_to = os.path.join(
+                    result_dir, "%s-%s-%s.%s" % (tester_name, experiment, package, ext)
+                )
+                fig = plot_heatmap(df, save_to)
+        del df
+
     # Save the filenames for images
     listing = ""
     if not no_results:
         if package not in skips:
             for ext in ["pdf", "png", "svg"]:
                 listing += "%s: %s-%s.%s\n" % (ext, experiment, package, ext)
+            for tester_name, _ in dfs.items():
+                for ext in ["pdf", "png", "svg"]:
+                    listing += "%s-%s: %s-%s-%s.%s\n" % (
+                        tester_name,
+                        ext,
+                        tester_name,
+                        experiment,
+                        package,
+                        ext,
+                    )
 
         # And the entry for the results
         listing += "results: results-list.json\n"
         listing += "outcomes: %s\n" % outcomes
         listing += "summary: %s\n" % summary
         if logs:
             listing += "logs_existing: logs-existing.json\n"
-        if no_results:
+        if has_no_results:
             listing += "logs_missing: logs-missing.json"
 
         # Generate a markdown for each

diff --git a/docs/_data/run1-summary.json b/docs/_data/run1-summary.json
@@ -1,10 +1,10 @@
 {
-    "splice-success": 4946,
-    "splice-concretization-failed": 122,
+    "package-install-failed": 2006,
     "rewiring-failed": 6280,
+    "splice-success": 4946,
     "splice-install-failed": 2105,
-    "package-install-failed": 2006,
     "splice-failed": 751,
+    "splice-concretization-failed": 122,
     "success-no-prediction": 290,
     "predictions": {
         "spack-test": 4627,

diff --git a/docs/_layouts/result.html b/docs/_layouts/result.html
@@ -100,8 +100,11 @@ <h3>{{page.title}}</h3>
         {{ content }}
 
         <br>
-        <p>This plot shows the top level splice outcomes, meaning across predictors. For example, If a package or splice didn't concretize we will see it here. Note that not every label might show up in the table, e.g., the value of 0 for "combination-not-run" is added to all results.</p>
-        <h2>Outcomes</h2>
+        <p>Welcome to the results page! This page has build or splice results, along with predictions for the things we could successfully build or splice, along with a table of full results and logs. Logs are important because some jobs might not have ever produced an output file, and this is because HPC is terrible. For this first round, we are able to represent the results in a single data structure (small enough for GitHub) however with more testers we (Vanessa) will need to refactor the interface again to break apart the results into smaller pieces.</p>
+
+        <h2>Plot of Outcomes</h2>
+
+        <p>This plot shows the top level splice and build outcomes, meaning before we get to any kind of prediction. For example, If a package or splice didn't concretize we will see it here. The labels in the first table below represent the values you'll find in the plot. Note that not every label might show up in the table, e.g., the value of 0 for "combination-not-run" is added to all results.</p>
         <p>
           <table class="table table-bordered table-hover dt-responsive display dataTable no-footer dtr-inline">
             <thead>
@@ -117,9 +120,8 @@ <h2>Outcomes</h2>
               </tr>
                 {% for outcome in page.outcomes %}<tr><td>{{ outcome[0] }}</td><td>{{ outcome[1]}}</td></tr>{% endfor %} 
             </tbody>
-          </table>        
+          </table><br>
 
-        <h2>Plot of Outcomes</h2>
         <p>Across an experiment, we have multiple testers. However, sometimes there is a failure unrelated to the tester. For example, a concretization fails, spack can't install
         the package, and then we can't be sure if the splice failed (or otherwise had a bad outcome) or if it's spack. This is a visualization of the outcomes above, colored by the
         values indicated. E.g., if you only see one color, this means there is only one outcome. The package name (experiment) is in the column, and versions are in rows.<span style="font-weight:600">This plot does not hold any prediction information, it only shows statuses of being able to build, splice, etc.</span></p>
@@ -128,9 +130,32 @@ <h2>Plot of Outcomes</h2>
         {% if page.png %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.png }}">Download PNG</a> |{% endif %}           
         {% if page.svg %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.svg }}">Download SVG</a>{% endif %}           
         {% if page.svg %}<img src="{{ site.baseurl }}{{ page.url }}{{ page.svg }}"/>{% endif %}
-
+
+        <h2>Predictions</h2>
+
+          <p>Remember that the predictions are already a biased set because the can ONLY include finished binaries/libraries from which we can derive predictions. E.g., a spack failure to build or splice or some transient error that makes the job exit produces no binaries or libraries and it cannot be represented here. We can only make predictions on successful splices, and we can only make predictions given that a package can output some combination of binaries and/or libraries needed by a predictor. A prediction is not a prediction for a successful build or splice, but rather GIVEN a successful build or splice, do we think the ABI compatibility will hold? Thus, if there are no successful builds or splices, we have no predictions.</p>
+        <p><span style="font-weight:600">Important!</span> Since we have many libraries and binaries within each package and splice, the scores here represent a value between 0 and 1 that are the total predicted to work / the total predictions. E.g., a value of "1" says "out of all the predictions made across binaries and libraries, we think they will ALL work. A value of 0.5 says we predict about half of them to work. We have to do this because there are a variable number of predictions for any given package and dependency splice. In the case of the "spack-test" predictor, this is the actual results of running the spack test "package" so it's less a prediction and more a ground truth. And since there is only one spack test run per package and dependency, this value is derived from that one run.</p>
+
+        {% if page.libabigail-pdf %}<h4>Libabigail</h4>{% else %}<h4>Sorry we couldn't make any predictions for libabigail</h4>{% endif %}
+        {% if page.libabigail-pdf %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.libabigail-pdf }}">Download Libabigail PDF</a> |{% endif %}           
+        {% if page.libabigail-png %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.libabigail-png }}">Download Libabigail PNG</a> |{% endif %}           
+        {% if page.libabigail-svg %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.libabigail-svg }}">Download Libabigail SVG</a> |{% endif %}           
+        {% if page.libabigail-svg %}<img src="{{ site.baseurl }}{{ page.url }}{{ page.libabigail-svg }}"/>{% endif %}
+
+        {% if page.spack-test-pdf %}<h4>Spack Test</h4>{% else %}<h4>Sorry we couldn't run spack test without a successful install</h4>{% endif %}
+        {% if page.spack-test-pdf %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.spack-test-pdf }}">Download Spack Test PDF</a> |{% endif %}           
+        {% if page.spack-test-png %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.spack-test-png }}">Download Spack Test PNG</a> |{% endif %}           
+        {% if page.spack-test-svg %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.spack-test-svg }}">Download Spack Test SVG</a> |{% endif %}           
+        {% if page.spack-test-svg %}<img src="{{ site.baseurl }}{{ page.url }}{{ page.spack-test-svg }}"/>{% endif %}
+
+        {% if page.symbolator-pdf %}<h4>Symbolator</h4>{% else %}<h4>Sorry we couldn't make any predictions for Symbolator</h4>{% endif %}
+        {% if page.symbolator-pdf %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.symbolator-pdf }}">Download Symbolator PDF</a> |{% endif %}           
+        {% if page.symbolator-png %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.symbolator-png }}">Download Symbolator PNG</a> |{% endif %}           
+        {% if page.symbolator-svg %}<a href="{{ site.baseurl }}{{ page.url }}/{{ page.symbolator-svg }}">Download Symbolator SVG</a> |{% endif %}           
+        {% if page.symbolator-svg %}<img src="{{ site.baseurl }}{{ page.url }}{{ page.symbolator-svg }}"/>{% endif %}
+
         {% if page.results %}<h2>Results</h2>
-        <p>Since results (and outputs) can vary across testers, we provide an interactive table here for you to explore instead of trying to shove them into a visualization. The raw data is provided alongside each results directory, so you are free to grab it to generate a custom visualization to your liking.</p>
+        <p>Since results (and outputs) can vary across testers, we provide an interactive table here for you to explore instead of trying to shove them into a visualization. For each row, you can click the "+" to expand to see more data. Try doing a search for a result type or predictor of choice in the search box at the top right. The raw data is provided alongside each results directory, so you are free to grab it to generate a custom visualization to your liking.</p>
 
   <table id="results-table"  class="table table-hover table-bordered display" cellspacing="0" width="100%">
     <thead>

diff --git a/docs/_results/run1/aml/logs-existing.json b/docs/_results/run1/aml/logs-existing.json
@@ -1,10 +1,10 @@
 {
-    "aml-master-numactl-experiment": {
-        "out": "logs/aml-master-numactl-experiment.out",
-        "err": "logs/aml-master-numactl-experiment.err"
-    },
     "aml-0.1.0-numactl-experiment": {
         "out": "logs/aml-0.1.0-numactl-experiment.out",
         "err": "logs/aml-0.1.0-numactl-experiment.err"
+    },
+    "aml-master-numactl-experiment": {
+        "out": "logs/aml-master-numactl-experiment.out",
+        "err": "logs/aml-master-numactl-experiment.err"
     }
 }
diff --git a/docs/_results/run1/aml/results-list.json b/docs/_results/run1/aml/results-list.json
@@ -1,16 +1 @@
-{
-    "failed": [
-        {
-            "experiment": "aml-master-numactl-experiment",
-            "splice": "numactl",
-            "package": "aml@master",
-            "result": "package-install-failed"
-        },
-        {
-            "experiment": "aml-0.1.0-numactl-experiment",
-            "splice": "numactl",
-            "package": "aml@0.1.0",
-            "result": "package-install-failed"
-        }
-    ]
-}
+{"failed": [{"experiment": "aml-0.1.0-numactl-experiment", "splice": "numactl", "package": "aml@0.1.0", "result": "package-install-failed"}, {"experiment": "aml-master-numactl-experiment", "splice": "numactl", "package": "aml@master", "result": "package-install-failed"}]}
diff --git a/docs/_results/run1/aml/run1-aml.pdf b/docs/_results/run1/aml/run1-aml.pdf
diff --git a/docs/_results/run1/aml/run1-aml.png b/docs/_results/run1/aml/run1-aml.png