add lammps runs

vsoch · vsoch · commit a2d6259471be · 2023-09-23T19:55:04.000-06:00
Signed-off-by: vsoch &lt;vsoch@users.noreply.github.com&gt;
diff --git a/google/kubecon/lammps/run1/README.md b/google/kubecon/lammps/run1/README.md
@@ -318,9 +318,17 @@ Let's plot results! Make sure you have seaborn / matplotlib / pandas installed.
 
 ```bash
 mkdir -p ./img/osu-benchmarks
-python plot-times.py --results ./data/osu-benchmarks --out ./img/osu-benchmarks
+python plot-osu-benchmarks.py --results ./data/osu-benchmarks --out ./img/osu-benchmarks
 ```
 
+### LAMMPS
+
+```bash
+mkdir -p ./img/lammps
+python plot-lammps.py --results ./data/lammps --out ./img/lammps
+```
+
+
 ### Singularity for HPCToolkit
 
 We will need to run this as a post analysis to get the data locally, and across nodes.
diff --git a/google/kubecon/lammps/run1/img/lammps/lammps-times.csv b/google/kubecon/lammps/run1/img/lammps/lammps-times.csv
@@ -0,0 +1,26 @@
+,ranks,pods,time
+0,704,8,5.51242
+1,704,8,5.89967
+2,704,8,6.04301
+3,704,8,6.02985
+4,704,8,5.36226
+5,352,4,3.63868
+6,352,4,3.68496
+7,352,4,3.87733
+8,352,4,3.74458
+9,352,4,3.57988
+10,1408,16,6.50231
+11,1408,16,6.83839
+12,1408,16,7.26995
+13,1408,16,7.07252
+14,1408,16,6.49528
+15,88,1,1.73022
+16,88,1,1.81762
+17,88,1,1.7879
+18,88,1,1.75843
+19,88,1,1.72793
+20,176,2,3.04352
+21,176,2,3.22597
+22,176,2,2.90511
+23,176,2,3.02252
+24,176,2,3.09605
diff --git a/google/kubecon/lammps/run1/img/lammps/lammps_lammps.png b/google/kubecon/lammps/run1/img/lammps/lammps_lammps.png
diff --git a/google/kubecon/lammps/run1/plot-lammps.py b/google/kubecon/lammps/run1/plot-lammps.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+
+import argparse
+import collections
+import fnmatch
+import os
+
+import matplotlib.pyplot as plt
+import metricsoperator.utils as utils
+import pandas
+import seaborn as sns
+from metricsoperator.metrics import get_metric
+
+plt.style.use("bmh")
+here = os.path.dirname(os.path.abspath(__file__))
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="Plot LAMMPS",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "--results",
+        help="directory with raw results data",
+        default=os.path.join(here, "data", "lammps"),
+    )
+    parser.add_argument(
+        "--out",
+        help="directory to save parsed results",
+        default=os.path.join(here, "img", "lammps"),
+    )
+    return parser
+
+
+def recursive_find(base, pattern="*.*"):
+    """
+    Recursively find and yield files matching a glob pattern.
+    """
+    for root, _, filenames in os.walk(base):
+        for filename in fnmatch.filter(filenames, pattern):
+            yield os.path.join(root, filename)
+
+
+def find_json_inputs(input_dir):
+    """
+    Find json inputs (results files)
+    """
+    files = []
+    for filename in recursive_find(input_dir, pattern="*.json"):
+        # We only have data for small
+        if "-small-" not in filename or "cache" in filename:
+            continue
+        files.append(filename)
+    return files
+
+
+def main():
+    """
+    Run the main plotting operation!
+    """
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
+
+    # Output images and data
+    outdir = os.path.abspath(args.out)
+    indir = os.path.abspath(args.results)
+    if not os.path.exists(outdir):
+        os.makedirs(outdir)
+
+    # Find input files (skip anything with test)
+    files = find_json_inputs(indir)
+    if not files:
+        raise ValueError(f"There are no input files in {indir}")
+
+    # This does the actual parsing of data into a formatted variant
+    # Has keys results, iters, and columns
+    df = parse_data(files)
+    df.to_csv(os.path.join(outdir, "lammps-times.csv"))
+    plot_results(df, outdir)
+
+
+def plot_results(df, outdir):
+    """
+    Plot lammps results
+    """
+    # Plot each!
+    colors = sns.color_palette("hls", 8)
+    hexcolors = colors.as_hex()
+    types = list(df.ranks.unique())
+
+    # ALWAYS double check this ordering, this
+    # is almost always wrong and the colors are messed up
+    palette = collections.OrderedDict()
+    for t in types:
+        palette[t] = hexcolors.pop(0)
+
+    make_plot(
+        df,
+        title="LAMMPS Times (2x2x2)",
+        tag="lammps",
+        ydimension="time",
+        xdimension="ranks",
+        palette=palette,
+        outdir=outdir,
+        ext="png",
+        plotname="lammps",
+        hue="ranks",
+        plot_type="bar",
+        xlabel="MPI Ranks",
+        ylabel="Time (seconds)",
+    )
+
+
+def parse_data(files):
+    """
+    Given a listing of files, parse into results data frame
+    """
+    # Parse into data frame
+    df = pandas.DataFrame(columns=["ranks", "pods", "time"])
+    idx = 0
+    m = get_metric("app-lammps")()
+
+    for filename in files:
+        # This is a list, each a json result, 20x
+        items = utils.read_json(filename)
+        for item in items:
+            # Parse the data into a result, including times
+            # The parser expects a raw log (not by lines)
+            data = "\n".join(item["data"])
+
+            result = m.parse_log(data)
+
+            # These are used for identifiers across the data
+            pods = result["metadata"]["pods"]
+            for datum in result["data"]:
+                loop_time = datum["loop_time"]
+                ranks = datum["ranks"]
+                df.loc[idx, :] = [ranks, pods, loop_time]
+                idx += 1
+    return df
+
+
+def make_plot(
+    df,
+    title,
+    tag,
+    ydimension,
+    xdimension,
+    palette,
+    xlabel,
+    ylabel,
+    ext="pdf",
+    plotname="lammps",
+    plot_type="violin",
+    hue="ranks",
+    outdir="img",
+):
+    """
+    Helper function to make common plots.
+    """
+    plotfunc = sns.boxplot
+    if plot_type == "violin":
+        plotfunc = sns.violinplot
+
+    ext = ext.strip(".")
+    plt.figure(figsize=(12, 12))
+    sns.set_style("dark")
+    ax = plotfunc(
+        x=xdimension, y=ydimension, hue=hue, data=df, whis=[5, 95], palette=palette
+    )
+    plt.title(title)
+    ax.set_xlabel(xlabel, fontsize=16)
+    ax.set_ylabel(ylabel, fontsize=16)
+    ax.set_xticklabels(ax.get_xmajorticklabels(), fontsize=14)
+    ax.set_yticklabels(ax.get_yticks(), fontsize=14)
+    plt.savefig(os.path.join(outdir, f"{tag}_{plotname}.{ext}"))
+    plt.clf()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/google/kubecon/lammps/run1/plot-osu-benchmarks.py b/google/kubecon/lammps/run1/plot-osu-benchmarks.py
@@ -1,14 +1,15 @@
 import argparse
-import pandas
-from metricsoperator.metrics import get_metric
-from metricsoperator import utils as utils
-import matplotlib.pyplot as plt
-import seaborn as sns
 import fnmatch
 import json
 import os
 import re
 
+import matplotlib.pyplot as plt
+import pandas
+import seaborn as sns
+from metricsoperator import utils as utils
+from metricsoperator.metrics import get_metric
+
 plt.style.use("bmh")
 here = os.path.dirname(os.path.abspath(__file__))
 
@@ -21,19 +22,16 @@ def get_parser():
     parser.add_argument(
         "--results",
         help="directory with raw results data",
-        default=os.path.join(here, "data"),
+        default=os.path.join(here, "data", "osu-benchmarks"),
     )
     parser.add_argument(
         "--out",
         help="directory to save parsed results",
-        default=os.path.join(here, "img"),
+        default=os.path.join(here, "img", "osu-benchmarks"),
     )
     return parser
 
 
-axes_default = {"x": "Size"}
-
-
 def recursive_find(base, pattern="*.*"):
     """
     Recursively find and yield files matching a glob pattern.
diff --git a/google/kubecon/lammps/run1/run-lammps.py b/google/kubecon/lammps/run1/run-lammps.py
@@ -1,11 +1,12 @@
 #!/usr/bin/env python3
 
 import argparse
-import os
 import json
+import os
 import time
-from metricsoperator import MetricsOperator
+
 import metricsoperator.utils as utils
+from metricsoperator import MetricsOperator
 
 here = os.path.abspath(os.path.dirname(__file__))
 
diff --git a/google/kubecon/lammps/run1/run-osu-benchmarks.py b/google/kubecon/lammps/run1/run-osu-benchmarks.py
@@ -1,11 +1,12 @@
 #!/usr/bin/env python3
 
 import argparse
-import os
 import json
+import os
 import time
-from metricsoperator import MetricsOperator
+
 import metricsoperator.utils as utils
+from metricsoperator import MetricsOperator
 
 here = os.path.abspath(os.path.dirname(__file__))