In [2]:
import sys
import os
from accelerate import notebook_launcher
from datasets import load_dataset
import logging

# Configure logging:
# (i) lear any pre-existing logging handlers (especially useful in notebook reruns)
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)
# (ii) now safely configure logging
logging.getLogger("codecarbon").setLevel(logging.ERROR)
logging.basicConfig(level=logging.INFO)

# Adjust paths -> import classes
project_root = os.getcwd()  
if project_root not in sys.path:
    sys.path.append(project_root)
from classes.experiment_config import ExperimentConfig
from classes.experiment_runner import ExperimentRunner

#import torch.multiprocessing as mp
#mp.set_start_method('spawn', force=True)

In [None]:
experiment_config = ExperimentConfig(
    model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    is_encoder_decoder=False,
    task_type="text_generation",
    inference_type="pure_generative",  
    max_input_tokens=128, #2048 is Llama's limit
    max_output_tokens=128,
    num_input_prompts=100,
    save_outputs=True,
    decode_token_to_text=True,
    gpu_list=[0,1,2,3],
    num_processes=4,
    batching_options={
        "batch_size_[fixed_batching]":16
        "max_batch_size_[adaptive_batching]": 100,
        "adaptive_batching": True,
        "adaptive_max_tokens": 3000  
    },
    sharding_config={
        "fsdp_config": {
            "use_orig_params": True,
            "cpu_offload": True
        },
        "sharding_strategy": "NO_SHARD"
    },
    query_rate=1,
    decoder_temperature=1.0, # NB: needs to be a float
    fp_precision="float16",
    backend="pytorch"
)

# Load prompts (here, using the 'arxiv' split from the lighteval/pile_helm dataset)
ds = load_dataset("lighteval/pile_helm", "arxiv")["test"]
prompts = [sample["text"] for sample in ds]

# function to run the whole experiment workflow.
def run_experiment_workflow():
    # run experiment
    runner = ExperimentRunner(experiment_config, prompts)
    runner.run_torch()
    
    # If not main process, exit cleanly after run
    if not runner.accelerator.is_main_process:
        return  # do NOT proceed further

    # Only main process continues
    if runner.accelerator.is_main_process:
         # aggregate results
        runner.aggregate_results()
        # save results
        runner.save_experiment_results()  
        
    # could add in script here to delete JSON files if storage an issue
    
    runner.teardown()

    return

# Launch the experiment across the specified number of processes.
notebook_launcher(run_experiment_workflow,
                    num_processes=experiment_config.num_processes) 
                    #terminate_on_error=True) # USE THIS IN accelerate.launch.launch()                 

Launching training on 4 GPUs.
Accelerator set up
Unique experiment id: 0210
TinyLlama/TinyLlama-1.1B-Chat-v1.0 loaded using pytorch, with precision float16
Original generate method saved.
Model and tokenizer prepared
Entering wait barrier: after model preparation
wait_for_everyone completed within 10 seconds for after model preparation.
Exiting wait barrier: after model preparation

INFO:classes.experiment_runner:[Process 2695592] Model is on device: cuda:1





INFO:classes.experiment_runner:[Process 2695591] Model is on device: cuda:0
INFO:classes.experiment_runner:[Process 2695593] Model is on device: cuda:2
INFO:classes.experiment_runner:[Process 2695594] Model is on device: cuda:3


Entering wait barrier: after logging device info
wait_for_everyone completed within 10 seconds for after logging device info.
Exiting wait barrier: after logging device info
Original generate method reassigned


INFO:classes.experiment_runner:[Process 2695591] Dummy forward pass complete


Entering wait barrier: after dummy forward pass


INFO:classes.experiment_runner:[Process 2695594] Dummy forward pass complete
INFO:classes.experiment_runner:[Process 2695592] Dummy forward pass complete
INFO:classes.experiment_runner:[Process 2695593] Dummy forward pass complete


wait_for_everyone completed within 10 seconds for after dummy forward pass.
Exiting wait barrier: after dummy forward pass
Prompts processed: 100 prompts.


Token indices sequence length is longer than the specified maximum sequence length for this model (3625 > 2048). Running this sequence through the model will result in indexing errors


Energy tracking started

Token indices sequence length is longer than the specified maximum sequence length for this model (3625 > 2048). Running this sequence through the model will result in indexing errors



Task type: pure_generative


Token indices sequence length is longer than the specified maximum sequence length for this model (3625 > 2048). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (3625 > 2048). Running this sequence through the model will result in indexing errors
W0325 19:47:37.008000 2695400 torch/distributed/elastic/agent/server/api.py:704] Received Signals.SIGINT death signal, shutting down workers
W0325 19:47:37.011000 2695400 torch/distributed/elastic/multiprocessing/api.py:766] Closing process 2695591 via signal SIGINT
W0325 19:47:37.012000 2695400 torch/distributed/elastic/multiprocessing/api.py:766] Closing process 2695592 via signal SIGINT
W0325 19:47:37.013000 2695400 torch/distributed/elastic/multiprocessing/api.py:766] Closing process 2695593 via signal SIGINT
W0325 19:47:37.014000 2695400 torch/distributed/elastic/multiprocessing/api.py:766] Closing process 2695594 via s

Error during inference: Process 2695591 got signal: 2


SignalException: Process 2695400 got signal: 2