In [1]:
!nvidia-smi
!df --output=source,pcent,avail -BG

Fri Oct 27 08:54:31 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.12             Driver Version: 535.104.12   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          On  | 00000000:00:05.0 Off |                    0 |
| N/A   25C    P0              51W / 400W |      2MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-SXM4-80GB          On  | 00000000:00:06.0 Off |  

In [2]:
%pip install -U vllm ujson tqdm
!huggingface-cli login --token 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

Note: you may need to restart the kernel to use updated packages.
Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/paperspace/.cache/huggingface/token
Login successful


In [3]:
import ujson
import torch
import time
import gc
from tqdm import tqdm
from vllm import LLM, SamplingParams
from vllm.model_executor.parallel_utils.parallel_state import destroy_model_parallel

model_names = [
                #"BAAI/Aquila2-7B",
                #"databricks/dolly-v2-12b",
                #"mistralai/Mistral-7B-v0.1",
                #"meta-llama/Llama-2-13b-hf",
                #"lmsys/vicuna-13b-v1.5",
                # "mosaicml/mpt-30b-instruct",
                "tiiuae/falcon-40b",
               ]

sampling_params = SamplingParams(top_p=0.9, temperature=0.35, max_tokens=50)

t = sampling_params.temperature
top_p = sampling_params.top_p
max_new_tokens = sampling_params.max_tokens

In [4]:
def get_number_prompts(file):
  with open(file, 'r') as reader:
    all_data = ujson.load(reader)
    counter = 0
    for _ in all_data:
      counter += 1
    return counter

def prompt_generator(file):
  with open(file, 'r') as reader:
    all_data = ujson.load(reader)
    for data in all_data:
      yield data["cat"], data["slot"], data["value"], data["prompt"]
      
def get_only_prompts(file):
  with open(file, 'r') as reader:
    all_data = ujson.load(reader)
    output = []
    for data in all_data:
      output.append(data["prompt"])
    return output

def get_all_prompts(file):
  with open(file, 'r') as reader:
    all_data = ujson.load(reader)
    output = []
    for data in all_data:
      output.append((data["cat"], data["slot"], data["value"], data["prompt"]))
    return output

def get_simple_model_name(m_name):
  if '/' in m_name:
    m_name = m_name.split('/')[-1]
  return m_name

input_f = 'zero_shot_prompt_v2.json'
!wget "https://evilscript.eu/upload/files/zero_shot_prompt_v2.json"
!wget "https://evilscript.eu/upload/files/zero_shot_prompt_v1.json"
!rm zero_shot_prompt_v1.json.1
!rm zero_shot_prompt_v2.json.1
num_prompts = get_number_prompts(input_f)

--2023-10-27 08:54:40--  https://evilscript.eu/upload/files/zero_shot_prompt_v2.json
Resolving evilscript.eu (evilscript.eu)... 89.46.105.45
Connecting to evilscript.eu (evilscript.eu)|89.46.105.45|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://www.evilscript.eu/upload/files/zero_shot_prompt_v2.json [following]
--2023-10-27 08:54:41--  https://www.evilscript.eu/upload/files/zero_shot_prompt_v2.json
Resolving www.evilscript.eu (www.evilscript.eu)... 89.46.105.45
Connecting to www.evilscript.eu (www.evilscript.eu)|89.46.105.45|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1377348 (1.3M) [application/json]
Saving to: ‘zero_shot_prompt_v2.json.1’


2023-10-27 08:54:42 (2.54 MB/s) - ‘zero_shot_prompt_v2.json.1’ saved [1377348/1377348]

--2023-10-27 08:54:42--  https://evilscript.eu/upload/files/zero_shot_prompt_v1.json
Resolving evilscript.eu (evilscript.eu)... 89.46.105.45
Connecting to evilscript.eu (evilscrip

In [5]:
for model_name in model_names:
  f_m_name = get_simple_model_name(model_name)
  file_output = f'{f_m_name}__t_{t}__top_p_{top_p}__max_new_tokens_{max_new_tokens}.jsonl'
  
  print(f'Running {model_name} with {num_prompts} prompts')
  
  model = LLM(model_name, trust_remote_code=True, tensor_parallel_size=torch.cuda.device_count())
  
  with open(file_output, 'w') as writer:
    generator = prompt_generator(input_f)
    full_list_prompts = get_only_prompts(input_f)
    model_outputs = model.generate(full_list_prompts, sampling_params=sampling_params)
    outputs_dict = {}
    for output in model_outputs:
      prompt = output.prompt
      generated_text = output.outputs[0].text
      outputs_dict[prompt] = generated_text
    for (cat, slot, value, prompt) in tqdm(generator, total=num_prompts):
      model_output = outputs_dict[prompt]
      json_dump = ujson.dumps({"cat": cat,
                              "slot": slot,
                              "value": value,
                              "prompt": prompt,
                              "result": model_output})
      writer.write(json_dump + '\n')
    with torch.no_grad():
        destroy_model_parallel()
        del model
        gc.collect()
        torch.cuda.empty_cache()
        time.sleep(5)

Running tiiuae/falcon-40b with 3762 prompts








2023-10-27 08:54:48,064	INFO worker.py:1642 -- Started a local Ray instance.


INFO 10-27 08:54:48 llm_engine.py:72] Initializing an LLM engine with config: model='tiiuae/falcon-40b', tokenizer='tiiuae/falcon-40b', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=2, quantization=None, seed=0)
INFO 10-27 08:57:44 llm_engine.py:207] # GPU blocks: 34322, # CPU blocks: 4369


Processed prompts:  98%|█████████████████████████████████████████████████████▊ | 3683/3762 [02:55<00:01, 60.35it/s][2m[33m(raylet)[0m [2023-10-27 09:00:45,928 E 2323 2342] (raylet) file_system_monitor.cc:111: /tmp/ray/session_2023-10-27_08-54-46_266921_2162 is over 95% full, available space: 26320977920; capacity: 526452940800. Object creation will fail if spilling is required.
Processed prompts: 100%|███████████████████████████████████████████████████████| 3762/3762 [02:57<00:00, 21.24it/s]
100%|██████████████████████████████████████████████████████████████████████| 3762/3762 [00:00<00:00, 205140.17it/s]


[2m[33m(raylet)[0m [2023-10-27 09:03:05,308 E 2323 2342] (raylet) file_system_monitor.cc:111: /tmp/ray/session_2023-10-27_08-54-46_266921_2162 is over 95% full, available space: 26315038720; capacity: 526452940800. Object creation will fail if spilling is required.
