From da975052d9204fc5cbfed1c744dd0cb9113d6fbf Mon Sep 17 00:00:00 2001
From: Joel Lamy-Poirier <joel.lamy-poirier@servicenow.com>
Date: Thu, 16 Feb 2023 17:08:35 -0500
Subject: [PATCH 1/6] Add metrics and update base image

---
 Dockerfile                |  2 +-
 src/pipelines/pipeline.py |  3 ++-
 src/utils/benchmark.py    | 27 ++++++++++++++++++---------
 transformers              |  2 +-
 4 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 58c34e2..5249004 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvcr.io/nvidia/pytorch:22.11-py3
+FROM nvcr.io/nvidia/pytorch:23.01-py3
 
 ARG USER=1000
 ARG USERNAME=user
diff --git a/src/pipelines/pipeline.py b/src/pipelines/pipeline.py
index 0e212c4..79b83c4 100644
--- a/src/pipelines/pipeline.py
+++ b/src/pipelines/pipeline.py
@@ -216,7 +216,8 @@ def aggregate_and_format_metrics(self, metrics: List[Dict[str, Any]]):
             "Latency (decode)": format_ms(mean_metrics[DECODE_TIME]),
             "Latency (max)": format_ms(max(all_metrics[END_TO_END_TIME])),
             "Latency (min)": format_ms(min(all_metrics[END_TO_END_TIME])),
-            "Tokens generated": f"{mean_metrics[NUM_GENERATED_TOKENS]:.0f}",
+            "Tokens generated (average)": f"{mean_metrics[NUM_GENERATED_TOKENS]:.0f}",
+            "Tokens generated (total)": f"{np.sum(all_metrics[NUM_GENERATED_TOKENS]).item():.0f}",
             "Throughput (model)": f"{model_throughput:.2f} tokens/s",
             "Throughput (end to end)": f"{throughput:.2f} tokens/s",
             "Token time (end to end)": f"{format_ms(throughput ** -1)}/token",
diff --git a/src/utils/benchmark.py b/src/utils/benchmark.py
index 7882f7f..76c2ab0 100644
--- a/src/utils/benchmark.py
+++ b/src/utils/benchmark.py
@@ -1,6 +1,7 @@
 import contextlib
 import gc
 import logging
+import time
 from typing import List, Union
 
 import torch
@@ -91,8 +92,21 @@ def benchmark_end_to_end(
     else:
         profiler = contextlib.nullcontext()
 
+    benchmark_stats = {
+        "Model parameters": pipeline.get_num_parameters(),
+        "Batch size": len(inputs),
+        **generate_kwargs,
+        **pipeline.get_initialization_metrics(),
+        "Warmup cycles": skip + warmup,
+        "Benchmark cycles": cycles,
+        "Total cycles": skip + warmup + cycles,
+    }
+    t0 = time.perf_counter()
     with profiler as p:
         for step in range(skip + warmup + cycles):
+            if step == skip + warmup:
+                t1 = time.perf_counter()
+                benchmark_stats["Warmup time"] = format_ms(t1 - t0)
             generated_text, metrics = pipeline(inputs, **generate_kwargs)
             if profile:
                 p.step()
@@ -108,18 +122,13 @@ def benchmark_end_to_end(
                 torch.cuda.synchronize()
                 gc.collect()
                 torch.cuda.empty_cache()
+    t2 = time.perf_counter()
+    benchmark_stats["Benchmark time"] = format_ms(t2 - t1)
+    benchmark_stats["Total time"] = format_ms(t2 - t0)
 
     if len(all_metrics) > 0:
         log_rank_n("*** Performance metrics:", logger.info)
         log_dict(pipeline.aggregate_and_format_metrics(all_metrics), logger.info)
 
     log_rank_n("*** Benchmarking stats:", logger.info)
-    log_dict(
-        {
-            "Model parameters": pipeline.get_num_parameters(),
-            "Batch size": len(inputs),
-            **generate_kwargs,
-            **pipeline.get_initialization_metrics(),
-        },
-        logger.info,
-    )
+    log_dict(benchmark_stats, logger.info)
diff --git a/transformers b/transformers
index 03716fa..39030da 160000
--- a/transformers
+++ b/transformers
@@ -1 +1 @@
-Subproject commit 03716fae4a240724e79cf4de53451f94107ad90b
+Subproject commit 39030da8479e3d09b64bed6f8917fe67ea105706

From 75cdb257476c9ca33dd966009ab2b1cfd6de1e45 Mon Sep 17 00:00:00 2001
From: Joel Lamy-Poirier <joel.lamy-poirier@servicenow.com>
Date: Fri, 17 Feb 2023 19:19:43 -0500
Subject: [PATCH 2/6] stuff

---
 Makefile                  |  4 ++++
 src/main.py               |  1 +
 src/pipelines/pipeline.py |  9 ++++++++-
 src/utils/arguments.py    | 15 ++++++++++-----
 transformers              |  2 +-
 5 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index 9915de8..9c3394d 100644
--- a/Makefile
+++ b/Makefile
@@ -55,6 +55,10 @@ gpt-bigcode-mqa1:
 gpt-bigcode-mqa2:
 	${RUN_HF} ${BIGCODE_ARGS} attention_type=3
 
+.PHONY: santacoder-original
+santacoder:
+	${RUN_HF} --pretrained_model=bigcode/santacoder --tokenizer=bigcode/santacoder --trust_remote_code ${EXP_ARGS}
+
 .PHONY: santacoder
 santacoder:
 	${RUN_HF} --pretrained_model=bigcode/santacoder-fast-inference --tokenizer=bigcode/santacoder ${EXP_ARGS}
diff --git a/src/main.py b/src/main.py
index 8d7ea80..1756ada 100644
--- a/src/main.py
+++ b/src/main.py
@@ -19,6 +19,7 @@ def main(argv: Optional[List[str]] = None) -> None:
         device=args.device,
         dtype=args.dtype,
         fast_init=args.fast_init,
+        trust_remote_code=args.trust_remote_code
     )
 
     benchmark_end_to_end(
diff --git a/src/pipelines/pipeline.py b/src/pipelines/pipeline.py
index 79b83c4..0d367d8 100644
--- a/src/pipelines/pipeline.py
+++ b/src/pipelines/pipeline.py
@@ -47,6 +47,7 @@ def __init__(
         device: torch.device,
         dtype: torch.dtype,
         fast_init: bool = True,
+        trust_remote_code: bool=False,
     ):
         self.initialization_metrics = {}
         log_rank_n("*** Setting up tokenizer", logger.info)
@@ -60,6 +61,7 @@ def __init__(
         self.dtype = dtype
         self.is_int8 = self.dtype == torch.int8
         self.fast_init = fast_init
+        self.trust_remote_code=trust_remote_code
         if self.is_int8 and self.device != torch.device("cuda"):
             raise ValueError(f"Model quantization not supported on device {self.device}")
 
@@ -121,6 +123,7 @@ def _load_pretrained(self, pretrained_model: str) -> PreTrainedModel:
             model = AutoModelForCausalLM.from_pretrained(
                 pretrained_model,
                 config=self.config,
+                trust_remote_code=self.trust_remote_code,
                 **kwargs,
             )
         t1 = time.perf_counter()
@@ -163,7 +166,11 @@ def _get_config(
             )
             config, unused = config_class.from_dict({}, **config_args)
         else:
-            config, unused = config_class.from_pretrained(pretrained_model, **config_args)
+            config, unused = config_class.from_pretrained(
+                pretrained_model,
+                trust_remote_code=self.trust_remote_code,
+                **config_args
+            )
 
         if unused:
             raise ValueError(f"There were unused configuration parameters: {tuple(unused)}")
diff --git a/src/utils/arguments.py b/src/utils/arguments.py
index ccc5684..95fd470 100644
--- a/src/utils/arguments.py
+++ b/src/utils/arguments.py
@@ -13,6 +13,7 @@ def get_arg_parser() -> ArgumentParser:
     parser.add_argument("--model_type")
     parser.add_argument("--pretrained_model")
     parser.add_argument("--tokenizer", default="gpt2")
+    parser.add_argument("--trust_remote_code", action="store_true")
     parser.add_argument("config_args", nargs="*")
 
     # Runtime
@@ -47,10 +48,14 @@ def get_arg_parser() -> ArgumentParser:
 def parse_config_args(config_args: List[str]) -> typing.Dict[str, Any]:
     parsed_config_args = {}
     for config_arg in config_args:
-        try:
-            key, value = [x.strip() for x in config_arg.split("=")]
-        except ValueError:
-            raise ValueError(f"Cannot parse argument: {config_arg}")
+        split_arg=[x.strip() for x in config_arg.split("=", 1)]
+        if len(split_arg)!=2:
+            raise ValueError(f"Cannot parse argument (not in 'key=value' format): {config_arg}")
+        key, value =split_arg
+        if not key.isidentifier():
+            raise ValueError(f"Invalid argument (not a python identifier): {key}")
+        if key in parsed_config_args:
+            raise ValueError(f"Duplicate argument: {key}")
         if value.lower() == "true":
             value = True
         elif value.lower() == "false":
@@ -65,7 +70,7 @@ def parse_config_args(config_args: List[str]) -> typing.Dict[str, Any]:
                     value = float(value)
                 except ValueError:
                     pass
-        parsed_config_args[key.strip()] = value
+        parsed_config_args[key] = value
     return parsed_config_args
 
 
diff --git a/transformers b/transformers
index 39030da..15f94f5 160000
--- a/transformers
+++ b/transformers
@@ -1 +1 @@
-Subproject commit 39030da8479e3d09b64bed6f8917fe67ea105706
+Subproject commit 15f94f57c5c55a7e8be709e1aee3f00ee0eb883e

From 944fe862dd7436f775b7c8ed359e88c14541faac Mon Sep 17 00:00:00 2001
From: Joel Lamy-Poirier <joel.lamy-poirier@servicenow.com>
Date: Thu, 23 Feb 2023 17:26:22 -0500
Subject: [PATCH 3/6] pretrained config, revision, optimized-santacoder

---
 Makefile                  |  4 ++++
 requirements.txt          |  1 +
 src/main.py               |  3 ++-
 src/pipelines/pipeline.py | 27 ++++++++++++++++-----------
 src/utils/arguments.py    |  7 ++++---
 src/utils/utils.py        | 11 ++++++++++-
 transformers              |  2 +-
 7 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/Makefile b/Makefile
index 9c3394d..b1b280a 100644
--- a/Makefile
+++ b/Makefile
@@ -62,3 +62,7 @@ santacoder:
 .PHONY: santacoder
 santacoder:
 	${RUN_HF} --pretrained_model=bigcode/santacoder-fast-inference --tokenizer=bigcode/santacoder ${EXP_ARGS}
+
+.PHONY: optimized-santacoder
+optimized-santacoder:
+	${RUN_HF} --pretrained_model=olivierdehaene/optimized-santacoder --tokenizer=bigcode/santacoder --trust_remote_code ${EXP_ARGS}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 2b8ca55..6e91e92 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 accelerate==0.15.0
 bitsandbytes
+safetensors
 deepspeed==0.7.7
 -e ./transformers
 
diff --git a/src/main.py b/src/main.py
index 1756ada..900fa31 100644
--- a/src/main.py
+++ b/src/main.py
@@ -14,12 +14,13 @@ def main(argv: Optional[List[str]] = None) -> None:
     pipeline = pipeline_class(
         model_type=args.model_type,
         pretrained_model=args.pretrained_model,
+        pretrained_config=args.pretrained_config,
         config_args=args.config_args,
         tokenizer=args.tokenizer,
         device=args.device,
         dtype=args.dtype,
         fast_init=args.fast_init,
-        trust_remote_code=args.trust_remote_code
+        trust_remote_code=args.trust_remote_code,
     )
 
     benchmark_end_to_end(
diff --git a/src/pipelines/pipeline.py b/src/pipelines/pipeline.py
index 0d367d8..7a94701 100644
--- a/src/pipelines/pipeline.py
+++ b/src/pipelines/pipeline.py
@@ -17,7 +17,7 @@
     PretrainedConfig,
     PreTrainedModel,
 )
-
+from src.utils.utils import parse_revision
 
 logger = logging.getLogger(__name__)
 
@@ -41,13 +41,14 @@ def __init__(
         self,
         *,
         model_type: Optional[str] = None,
+        pretrained_config: Optional[str] = None,
         pretrained_model: Optional[str] = None,
         config_args: Dict[str, Any],
         tokenizer: str,
         device: torch.device,
         dtype: torch.dtype,
         fast_init: bool = True,
-        trust_remote_code: bool=False,
+        trust_remote_code: bool = False,
     ):
         self.initialization_metrics = {}
         log_rank_n("*** Setting up tokenizer", logger.info)
@@ -61,11 +62,11 @@ def __init__(
         self.dtype = dtype
         self.is_int8 = self.dtype == torch.int8
         self.fast_init = fast_init
-        self.trust_remote_code=trust_remote_code
+        self.trust_remote_code = trust_remote_code
         if self.is_int8 and self.device != torch.device("cuda"):
             raise ValueError(f"Model quantization not supported on device {self.device}")
 
-        self.config = self._get_config(model_type, pretrained_model, config_args)
+        self.config = self._get_config(model_type, pretrained_config or pretrained_model, config_args)
         t2 = time.perf_counter()
 
         logger.info(f"Model configuration: {self.config}")
@@ -88,7 +89,9 @@ def _create_model(self) -> PreTrainedModel:
         log_rank_n("*** Creating model", logger.info)
         with fast_init(self.device) if self.fast_init else contextlib.nullcontext():
             torch_dtype = torch.float16 if self.is_int8 else self.dtype
-            model = AutoModelForCausalLM.from_config(config=self.config, torch_dtype=torch_dtype)
+            model = AutoModelForCausalLM.from_config(
+                config=self.config, torch_dtype=torch_dtype, trust_remote_code=self.trust_remote_code
+            )
         t1 = time.perf_counter()
         log_rank_n("*** Moving to device", logger.info)
         model.to(self.device)
@@ -120,8 +123,10 @@ def _load_pretrained(self, pretrained_model: str) -> PreTrainedModel:
         log_rank_n(f"*** Loading model from {pretrained_model}", logger.info)
         kwargs = {"load_in_8bit": True, "device_map": "auto"} if self.is_int8 else {"torch_dtype": self.dtype}
         with fast_init(self.device) if self.fast_init else contextlib.nullcontext():
+            pretrained_model, revision = parse_revision(pretrained_model)
             model = AutoModelForCausalLM.from_pretrained(
                 pretrained_model,
+                revision=revision,
                 config=self.config,
                 trust_remote_code=self.trust_remote_code,
                 **kwargs,
@@ -138,7 +143,7 @@ def _load_pretrained(self, pretrained_model: str) -> PreTrainedModel:
     def _get_config(
         self,
         model_type: Optional[str],
-        pretrained_model: Optional[str],
+        pretrained_config: Optional[str],
         config_args: Dict[str, Any],
     ) -> PretrainedConfig:
         config_args = {
@@ -148,15 +153,16 @@ def _get_config(
         }
 
         if model_type is None:
-            if pretrained_model is None:
+            if pretrained_config is None:
                 raise ValueError("You need to provide either --model_type or --pretrained_model")
             config_class = AutoConfig
         elif model_type not in CONFIG_MAPPING:
             raise ValueError(f"Unknown model type: {model_type}")
         else:
             config_class = CONFIG_MAPPING[model_type]
+            config_args["model_type"] = model_type
 
-        if pretrained_model is None:
+        if pretrained_config is None:
             config_args.update(
                 {
                     "bos_token_id": self.tokenizer.bos_token_id,
@@ -166,10 +172,9 @@ def _get_config(
             )
             config, unused = config_class.from_dict({}, **config_args)
         else:
+            pretrained_config, revision = parse_revision(pretrained_config)
             config, unused = config_class.from_pretrained(
-                pretrained_model,
-                trust_remote_code=self.trust_remote_code,
-                **config_args
+                pretrained_config, revision=revision, trust_remote_code=self.trust_remote_code, **config_args
             )
 
         if unused:
diff --git a/src/utils/arguments.py b/src/utils/arguments.py
index 95fd470..d5b4d84 100644
--- a/src/utils/arguments.py
+++ b/src/utils/arguments.py
@@ -11,6 +11,7 @@ def get_arg_parser() -> ArgumentParser:
 
     # Model
     parser.add_argument("--model_type")
+    parser.add_argument("--pretrained_config")
     parser.add_argument("--pretrained_model")
     parser.add_argument("--tokenizer", default="gpt2")
     parser.add_argument("--trust_remote_code", action="store_true")
@@ -48,10 +49,10 @@ def get_arg_parser() -> ArgumentParser:
 def parse_config_args(config_args: List[str]) -> typing.Dict[str, Any]:
     parsed_config_args = {}
     for config_arg in config_args:
-        split_arg=[x.strip() for x in config_arg.split("=", 1)]
-        if len(split_arg)!=2:
+        split_arg = [x.strip() for x in config_arg.split("=", 1)]
+        if len(split_arg) != 2:
             raise ValueError(f"Cannot parse argument (not in 'key=value' format): {config_arg}")
-        key, value =split_arg
+        key, value = split_arg
         if not key.isidentifier():
             raise ValueError(f"Invalid argument (not a python identifier): {key}")
         if key in parsed_config_args:
diff --git a/src/utils/utils.py b/src/utils/utils.py
index d678fe3..7b7a532 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -1,6 +1,6 @@
 import time
 from functools import partial
-from typing import Any, List, Tuple, Union
+from typing import Any, List, Tuple, Union, Optional
 
 
 def run_and_log_time(execs: Union[List[partial], partial]) -> Tuple[Union[List[Any], Any], float]:
@@ -16,3 +16,12 @@ def run_and_log_time(execs: Union[List[partial], partial]) -> Tuple[Union[List[A
 
     time_elapsed = time.perf_counter() - start_time
     return results, time_elapsed
+
+
+def parse_revision(pretrained_model: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
+    revision = None
+    if pretrained_model is not None:
+        pretrained_split = pretrained_model.split(":", 1)
+        if len(pretrained_split) == 2:
+            pretrained_model, revision = pretrained_split
+    return pretrained_model, revision
diff --git a/transformers b/transformers
index 15f94f5..7c258b8 160000
--- a/transformers
+++ b/transformers
@@ -1 +1 @@
-Subproject commit 15f94f57c5c55a7e8be709e1aee3f00ee0eb883e
+Subproject commit 7c258b8735ee10f9f5a8c380be92efceaaa051ef

From c04d23fc365efe8da78d734a464e385236360e11 Mon Sep 17 00:00:00 2001
From: Joel Lamy-Poirier <joel.lamy-poirier@servicenow.com>
Date: Fri, 24 Feb 2023 19:07:19 -0500
Subject: [PATCH 4/6] Memory usage

---
 src/pipelines/pipeline.py |  1 +
 src/utils/benchmark.py    | 14 +++++++++++++-
 src/utils/logging.py      |  4 ++++
 transformers              |  2 +-
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/pipelines/pipeline.py b/src/pipelines/pipeline.py
index 7a94701..60806c1 100644
--- a/src/pipelines/pipeline.py
+++ b/src/pipelines/pipeline.py
@@ -103,6 +103,7 @@ def _create_model(self) -> PreTrainedModel:
         self.initialization_metrics["model initialization"] = t1 - t0
         self.initialization_metrics["move to device"] = t2 - t1
         self.initialization_metrics["initialize weights"] = t3 - t2
+
         return model
 
     def _reload_model(self):
diff --git a/src/utils/benchmark.py b/src/utils/benchmark.py
index 76c2ab0..c9b077c 100644
--- a/src/utils/benchmark.py
+++ b/src/utils/benchmark.py
@@ -7,7 +7,7 @@
 import torch
 
 from src.pipelines.pipeline import Pipeline
-from src.utils.logging import format_ms, log_dict, log_rank_n
+from src.utils.logging import format_ms, log_dict, log_rank_n, format_mib
 
 
 logger = logging.getLogger(__name__)
@@ -101,6 +101,12 @@ def benchmark_end_to_end(
         "Benchmark cycles": cycles,
         "Total cycles": skip + warmup + cycles,
     }
+
+    if pipeline.device.type == "cuda":
+        benchmark_stats["Initial memory used"] = format_mib(torch.cuda.memory_allocated())
+        benchmark_stats["Initial memory reserved"] = format_mib(torch.cuda.memory_reserved())
+        torch.cuda.reset_peak_memory_stats()
+
     t0 = time.perf_counter()
     with profiler as p:
         for step in range(skip + warmup + cycles):
@@ -122,6 +128,12 @@ def benchmark_end_to_end(
                 torch.cuda.synchronize()
                 gc.collect()
                 torch.cuda.empty_cache()
+    if pipeline.device.type == "cuda":
+        benchmark_stats["Memory used"] = format_mib(torch.cuda.memory_allocated())
+        benchmark_stats["Memory reserved"] = format_mib(torch.cuda.memory_reserved())
+        benchmark_stats["Max memory used"] = format_mib(torch.cuda.max_memory_allocated())
+        benchmark_stats["Max memory reserved"] = format_mib(torch.cuda.max_memory_reserved())
+
     t2 = time.perf_counter()
     benchmark_stats["Benchmark time"] = format_ms(t2 - t1)
     benchmark_stats["Total time"] = format_ms(t2 - t0)
diff --git a/src/utils/logging.py b/src/utils/logging.py
index 4ec8a39..9c28276 100644
--- a/src/utils/logging.py
+++ b/src/utils/logging.py
@@ -43,3 +43,7 @@ def log_dict(data: dict, logger: Callable = logging.info, rank: int = 0):
 
 def format_ms(t: float):
     return f"{1000 * t:.2f} ms"
+
+
+def format_mib(m: float):
+    return f"{m/2**20:.0f} MiB"
diff --git a/transformers b/transformers
index 7c258b8..03716fa 160000
--- a/transformers
+++ b/transformers
@@ -1 +1 @@
-Subproject commit 7c258b8735ee10f9f5a8c380be92efceaaa051ef
+Subproject commit 03716fae4a240724e79cf4de53451f94107ad90b

From 322e4f418d21876cd9b079449b87ab230c476239 Mon Sep 17 00:00:00 2001
From: Joel Lamy-Poirier <joel.lamy-poirier@servicenow.com>
Date: Fri, 24 Feb 2023 19:44:13 -0500
Subject: [PATCH 5/6] Merge dicts

---
 src/pipelines/pipeline.py | 3 ++-
 src/utils/benchmark.py    | 7 +++----
 src/utils/utils.py        | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/pipelines/pipeline.py b/src/pipelines/pipeline.py
index 60806c1..86a495c 100644
--- a/src/pipelines/pipeline.py
+++ b/src/pipelines/pipeline.py
@@ -9,6 +9,7 @@
 
 from src.utils.fast_init import fast_init
 from src.utils.logging import format_ms, log_rank_n
+from src.utils.utils import parse_revision
 from transformers import (
     CONFIG_MAPPING,
     AutoConfig,
@@ -17,7 +18,7 @@
     PretrainedConfig,
     PreTrainedModel,
 )
-from src.utils.utils import parse_revision
+
 
 logger = logging.getLogger(__name__)
 
diff --git a/src/utils/benchmark.py b/src/utils/benchmark.py
index c9b077c..ef67dd3 100644
--- a/src/utils/benchmark.py
+++ b/src/utils/benchmark.py
@@ -7,7 +7,7 @@
 import torch
 
 from src.pipelines.pipeline import Pipeline
-from src.utils.logging import format_ms, log_dict, log_rank_n, format_mib
+from src.utils.logging import format_mib, format_ms, log_dict, log_rank_n
 
 
 logger = logging.getLogger(__name__)
@@ -139,8 +139,7 @@ def benchmark_end_to_end(
     benchmark_stats["Total time"] = format_ms(t2 - t0)
 
     if len(all_metrics) > 0:
-        log_rank_n("*** Performance metrics:", logger.info)
-        log_dict(pipeline.aggregate_and_format_metrics(all_metrics), logger.info)
+        benchmark_stats.update(pipeline.aggregate_and_format_metrics(all_metrics))
 
-    log_rank_n("*** Benchmarking stats:", logger.info)
+    log_rank_n("*** Benchmark results:", logger.info)
     log_dict(benchmark_stats, logger.info)
diff --git a/src/utils/utils.py b/src/utils/utils.py
index 7b7a532..f18079f 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -1,6 +1,6 @@
 import time
 from functools import partial
-from typing import Any, List, Tuple, Union, Optional
+from typing import Any, List, Optional, Tuple, Union
 
 
 def run_and_log_time(execs: Union[List[partial], partial]) -> Tuple[Union[List[Any], Any], float]:

From a638a547eb55b23f057008d745320c3ddedbfc67 Mon Sep 17 00:00:00 2001
From: Joel Lamy-Poirier <joel.lamy-poirier@servicenow.com>
Date: Fri, 24 Feb 2023 19:52:21 -0500
Subject: [PATCH 6/6] style

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b1b280a..84e7ba5 100644
--- a/Makefile
+++ b/Makefile
@@ -65,4 +65,4 @@ santacoder:
 
 .PHONY: optimized-santacoder
 optimized-santacoder:
-	${RUN_HF} --pretrained_model=olivierdehaene/optimized-santacoder --tokenizer=bigcode/santacoder --trust_remote_code ${EXP_ARGS}
\ No newline at end of file
+	${RUN_HF} --pretrained_model=olivierdehaene/optimized-santacoder --tokenizer=bigcode/santacoder --trust_remote_code ${EXP_ARGS}