In [None]:
from datasets import load_dataset

In [None]:
# Model
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# Prompts
ds = load_dataset("lighteval/pile_helm", "arxiv")["test"]
ds = ds.select(range(10))
prompts = [sample["text"] for sample in ds]

backend = "pytorch"

use_optimum = False


# PyTorch


In [None]:
from pytorth_experiment_fns import *
from accelerate import notebook_launcher

def text_gen_experiment():
    run_experiment(
        model_name=model_name,
        prompts=prompts,
        inference_fn=run_gen_inference_with_metrics,
        task_type="text_generation",
        backend=backend,
        use_optimum=use_optimum,
        max_input_tokens=512,
        max_output_tokens=50,
        batch_size=8
    )

notebook_launcher(text_gen_experiment, num_processes=4)


# vLLM

NB: memory problems with vLLM

In [None]:
from vllm_experiment import *

In [None]:
results = run_experiment_vllm(
    model_name=model_name,
    prompts=prompts,
    task_type="text_generation",
    max_input_tokens=512,
    max_output_tokens=50,
    batch_size=8
)

print(results)

In [None]:
from optimum_benchmark import Benchmark, BenchmarkConfig, TorchrunConfig, InferenceConfig, PyTorchConfig
from optimum_benchmark.logging_utils import setup_logging

setup_logging(level="INFO", handlers=["console"])

if __name__ == "__main__":
    launcher_config = TorchrunConfig(nproc_per_node=2)
    scenario_config = InferenceConfig(latency=True, memory=True)
    backend_config = PyTorchConfig(model="gpt2", device="cuda", device_ids="0,1", no_weights=True)
    benchmark_config = BenchmarkConfig(
        name="pytorch_gpt2",
        scenario=scenario_config,
        launcher=launcher_config,
        backend=backend_config,
    )
    benchmark_report = Benchmark.launch(benchmark_config)

    # convert artifacts to a dictionary or dataframe
    benchmark_config.to_dict() # or benchmark_config.to_dataframe()

    # save artifacts to disk as json or csv files
    benchmark_report.save_csv("benchmark_report.csv") # or benchmark_report.save_json("benchmark_report.json")

    # push artifacts to the hub
    benchmark_config.push_to_hub("IlyasMoutawwakil/pytorch_gpt2") # or benchmark_config.push_to_hub("IlyasMoutawwakil/pytorch_gpt2")

    # or merge them into a single artifact
    benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
    benchmark.save_json("benchmark.json") # or benchmark.save_csv("benchmark.csv")
    benchmark.push_to_hub("IlyasMoutawwakil/pytorch_gpt2")

    # load artifacts from the hub
    benchmark = Benchmark.from_hub("IlyasMoutawwakil/pytorch_gpt2") # or Benchmark.from_hub("IlyasMoutawwakil/pytorch_gpt2")

    # or load them from disk
    benchmark = Benchmark.load_json("benchmark.json") # or Benchmark.load_csv("benchmark_report.csv")