In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from huggingface_hub import login

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
# DATASET CONFIG
DATASET='tyqiangz/multilingual-sentiments'
MODEL='lxyuan/distilbert-base-multilingual-cased-sentiments-student'
MIN_TOKENS=50
MAX_TOKENS=512
SAMPLES=10_000
MAX_VUS = 2000
TEXT_COLUMN='text'
DATASET_PATH='data/dataset.json'
K6_BIN = "/usr/bin/k6"



In [4]:
from datasets import load_dataset
from transformers import AutoTokenizer
from src.process_dataset import tokenize_and_filter, sample_dataset, save_dataset

dataset = load_dataset(DATASET, 'all', split='train')

# Map numbers to text labels
label_mapping = {0: 'positive', 1: 'neutral', 2: 'negative'}
dataset = dataset.map(lambda example: {'label_text': label_mapping[example['label']]})


tokenizer = AutoTokenizer.from_pretrained(MODEL)

dataset = tokenize_and_filter(dataset, tokenizer, text_column='text', num_proc=8)
dataset = sample_dataset(dataset, n_samples=SAMPLES, min_tokens=MIN_TOKENS, max_tokens=MAX_TOKENS, seed=42)
save_dataset(dataset.select_columns(['text', 'label_text']), DATASET_PATH)

[32m2025-01-24 09:41:20.090[0m | [32m[1mSUCCESS [0m | [36msrc.process_dataset[0m:[36msample_dataset[0m:[36m29[0m - [32m[1mSampled dataset down to 10000 samples[0m
[32m2025-01-24 09:41:20.227[0m | [32m[1mSUCCESS [0m | [36msrc.process_dataset[0m:[36msave_dataset[0m:[36m42[0m - [32m[1mSaved dataset to data/dataset.json[0m


In [5]:
from dataclasses import dataclass, field, asdict
from typing import Dict

@dataclass
class InstanceConfig:
    repository: str
    accelerator: str
    instance_size: str
    instance_type: str
    custom_image: Dict[str, str]  # Dict of str -> str

@dataclass
class ImageConfig:
    health_route: str = "/health"
    url: str = "michaelf34/infinity:0.0.75"
    env: Dict[str, str] = field(default_factory=lambda: {
        "INFINITY_PORT": "80",
        "INFINITY_ENGINE": "torch",
        "INFINITY_BATCH_SIZE": "16",
        "INFINITY_DTYPE": "auto",
        "INFINITY_EMBEDDING_DTYPE": "float32",
        "INFINITY_POOLING_METHOD": "auto",
        "INFINITY_COMPILE": 'true',
        "INFINITY_BETTERTRANSFORMER": 'true',
        "INFINITY_MODEL_ID": "/repository"
    })

In [6]:
def set_batch_size(batch_size: int) -> Dict[str, str]:
    config = asdict(ImageConfig())
    config['env']["INFINITY_BATCH_SIZE"] = str(batch_size)
    return config

# Experiments

In [7]:
instance_config_experiment_dicts = [
    {
        'accelerator': 'gpu',
        'instance_size': 'x1',
        'instance_type': 'nvidia-t4'
    },
    {
        'accelerator': 'gpu',
        'instance_size': 'x1',
        'instance_type': 'nvidia-l4'
    },
]
instance_config_experiments = [InstanceConfig(repository=MODEL, **ic_exp_dict, custom_image=None) for ic_exp_dict in instance_config_experiment_dicts]

In [8]:
set_batch_size(24)

{'health_route': '/health',
 'url': 'michaelf34/infinity:0.0.75',
 'env': {'INFINITY_PORT': '80',
  'INFINITY_ENGINE': 'torch',
  'INFINITY_BATCH_SIZE': '24',
  'INFINITY_DTYPE': 'auto',
  'INFINITY_EMBEDDING_DTYPE': 'float32',
  'INFINITY_POOLING_METHOD': 'auto',
  'INFINITY_COMPILE': 'true',
  'INFINITY_BETTERTRANSFORMER': 'true',
  'INFINITY_MODEL_ID': '/repository'}}

In [9]:
from src.deployment import deploy_endpoint
?deploy_endpoint

[0;31mSignature:[0m [0mdeploy_endpoint[0m[0;34m([0m[0minstance_config[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Deploys or updates a Hugging Face inference endpoint.

This function checks if an existing inference endpoint with the specified name exists.
- If found, it updates the endpoint with the provided instance configuration.
- If not found, it creates a new inference endpoint with the given parameters.

Once the endpoint is updated or created, it waits until the endpoint is fully ready.

Args:
    instance_config (InstanceConfig): A dataclass containing instance configuration details,
                                      such as accelerator type, vendor, region, and instance size.

Returns:
    InferenceEndpoint: The deployed Hugging Face inference endpoint object.

Raises:
    Exception: If the endpoint creation process fails.
[0;31mFile:[0m      /data/src/deployment.py
[0;31mType:[0m      function

In [11]:
from src.k6 import call_k6, optimal_vus
from pathlib import Path
import copy
from time import sleep

template_file = "classification-analysis.js.j2"
output_file = Path("./generated").resolve()/"classification-analysis.js"

batch_sizes = [16, 32, 64, 128, 256, 512, 1024]

for og_instance_config_experiment in instance_config_experiments:
    for batch_size in batch_sizes:
        instance_config_experiment = copy.deepcopy(og_instance_config_experiment)
        instance_config_experiment.custom_image = set_batch_size(batch_size)
        endpoint = deploy_endpoint(instance_config_experiment)

        args_dict = dict(
            endpoint=endpoint,
            total_requests=10_000,
            template_file=template_file,
            output_file=output_file,
            dataset_path=DATASET_PATH,
            k6_bin=K6_BIN
        )
        
        optimal_vus(max_vus=MAX_VUS, args_dict=args_dict, start_vus=16)
        endpoint.delete()
        sleep(5)

[32m2025-01-24 09:41:27.840[0m | [1mINFO    [0m | [36msrc.deployment[0m:[36mdeploy_endpoint[0m:[36m83[0m - [1mCreating inference endpoint...[0m
[32m2025-01-24 09:41:27.954[0m | [1mINFO    [0m | [36msrc.deployment[0m:[36mdeploy_endpoint[0m:[36m104[0m - [1mWaiting for endpoint to be ready...[0m


KeyError: 'BATCH_SIZE'