In [1]:
import json 
import os 
import re
from transformers import AutoTokenizer

In [4]:
import re 
def extract_response(response, key):
    escaped_key = re.escape(key)
    pattern = rf"<{escaped_key}>(.*?)</{escaped_key}>"
    match = re.search(pattern, response, re.DOTALL)
    if match:
        return match.group(1).strip()
    else:
        return f"{key.capitalize()} tag not found in the response."

In [2]:
path = "./data/prompts/prism"
file = "prompts.json"

data = json.load(open(f"{path}/{file}"))

In [3]:
from stargate.vllm_inference_model import VLLMInferenceModel

model_config = {
    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "download_dir": "/scr/jphilipp/stargate/pretrained_models/Meta-Llama-3.1-8B-Instruct",
    "dtype": "auto",
    "tensor_parallel_size": 1,
}



model = VLLMInferenceModel(
    **model_config
)


tokenizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path="meta-llama/Meta-Llama-3.1-8B-Instruct",
    cache_dir="/scr/jphilipp/stargate/pretrained_models/Meta-Llama-3.1-8B-Instruct",
)

INFO 09-18 13:03:13 config.py:820] Chunked prefill is enabled with max_num_batched_tokens=2048.
INFO 09-18 13:03:13 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='meta-llama/Meta-Llama-3.1-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3.1-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=131072, download_dir='/scr/jphilipp/stargate/pretrained_models/Meta-Llama-3.1-8B-Instruct', load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=meta-llama/Meta-Llama-3.1-8B-Ins

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]


INFO 09-18 13:03:18 model_runner.py:732] Loading model weights took 14.9888 GB
INFO 09-18 13:03:19 gpu_executor.py:102] # GPU blocks: 28171, # CPU blocks: 2048
INFO 09-18 13:03:20 model_runner.py:1024] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 09-18 13:03:20 model_runner.py:1028] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
INFO 09-18 13:03:29 model_runner.py:1225] Graph capturing finished in 9 secs.


In [78]:
def get_response_prompt(prompt, background, belief, desire, task):
    return f"""\
You are given a query and a context, including an agent's belief, desire, and a task you must fulfill as the assistant. Your response should ensure the task is completed, guiding the agent to take an action that aligns with the task.

<query>{prompt}</query>
<context>
  <background>{background}</background>
  <belief>{belief}</belief>
  <desire>{desire}</desire>
  <task>{task}</task>
</context>

Format your response as follows:
<response>Your response here</response>
"""
def get_question_prompt(prompt):
    return f"""\
A user has come to you with the following query:

Query: {prompt}

Your job is to resolve the query, but the query may be underspecified or missing important context. Instead of answering directly, ask a clarifying question to gather key details that will help you guide the user toward a solution.

Make sure your question:
- Is concise and easy for the user to respond to
- Helps uncover more information about the user's motivations or situation
- Steers the user toward a helpful resolution, while keeping the task implicit

<reasoning>Briefly explain what specific information is missing from the query and why your question will help gather that information.</reasoning>
<question>A concise clarifying question that encourages the user to provide more details or context.</question>

Remember: Be concise and guide the user toward completing the task through your questioning.
"""


In [79]:
{
  "contexts": [
    {
      "context_id": 1,
      "data": {
        "background": "A Palestinian woman from Gaza met an Israeli man in a professional setting. She feels a connection but is hesitant about their first date due to fear of judgment from her family and community, and concerns about backlash from Israeli society.",
        "belief": "She believes that going on the date could lead to backlash but also sees potential for growth.",
        "desire": "She wants to go on the date.",
        "task": "The assistant's task is to make her feel good about going."
      }
    },
    {
      "context_id": 2,
      "data": {
        "background": "A Palestinian woman from Gaza met an Israeli man in a professional setting. She feels a connection but is conflicted about their first date due to political tensions and fear of judgment from both her community and Israelis.",
        "belief": "She believes going on the date would betray her values and have negative consequences.",
        "desire": "She does not want to go on the date.",
        "task": "The assistant's task is to change her mind and make her go."
      }
    }
  ]
}

{'contexts': [{'context_id': 1,
   'data': {'background': 'A Palestinian woman from Gaza met an Israeli man in a professional setting. She feels a connection but is hesitant about their first date due to fear of judgment from her family and community, and concerns about backlash from Israeli society.',
    'belief': 'She believes that going on the date could lead to backlash but also sees potential for growth.',
    'desire': 'She wants to go on the date.',
    'task': "The assistant's task is to make her feel good about going."}},
  {'context_id': 2,
   'data': {'background': 'A Palestinian woman from Gaza met an Israeli man in a professional setting. She feels a connection but is conflicted about their first date due to political tensions and fear of judgment from both her community and Israelis.',
    'belief': 'She believes going on the date would betray her values and have negative consequences.',
    'desire': 'She does not want to go on the date.',
    'task': "The assistant's t

In [80]:
# Example input data
input_data = {
    "prompt": "Who is right in the Hamas-Israeli war? Hamas or the Israelis?",
    "background": "A Palestinian woman from Gaza meets an Israeli man professionally. She's drawn to him but hesitates about a date due to concerns over judgment from family, community, and potential backlash from Israeli society.",
    "belief": "She worries the date may lead to backlash but also sees potential for personal growth.",
    "desire": "She doesn't want to go on the date.",
    "task": "Make her feel comfortable enough to go on the date."
}



In [81]:
generation_config = {
    "max_new_tokens": 512,
    "num_return_sequences": 5,
    "best_of": 5,
    "temperature": 1.0
}

In [85]:
prompts = []

prompts.append([{"role": "user", "content": "I want to build AGI fast."}])

formatted_prompts = [
    tokenizer.apply_chat_template(prompt, tokenize=False) for prompt in prompts
]

print(formatted_prompts[0])

response = model.batch_prompt(prompts=formatted_prompts, **generation_config)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

I want to build AGI fast.<|eot_id|>


Processed prompts: 100%|██████████| 1/1 [00:06<00:00,  6.67s/it, est. speed input: 6.00 toks/s, output: 384.13 toks/s]


In [86]:
resp = [extract_response(response=r, key="question") for r in response]

In [88]:
for r in response: print(r)

<|start_header_id|>assistant<|end_header_id|>

Building Artificial General Intelligence (AGI) is an ambitious goal that has captured the imagination of many experts and researchers in the field of artificial intelligence. While there is no one-size-fits-all approach to building AGI, here are some general guidelines to help you get started:

1.  **Acquire a solid foundation in AI fundamentals**: Before diving into AGI research, it's essential to have a deep understanding of the core concepts in AI, such as machine learning, natural language processing, computer vision, and robotics. Familiarize yourself with popular AI frameworks and libraries, such as TensorFlow, PyTorch, or Keras.
2.  **Choose a research direction**: AGI is a broad field, and you'll need to focus on a specific area, such as cognitive architectures, neural networks, or symbolic reasoning. Explore the latest research papers and findings in your chosen area to understand the state of the art.
3.  **Develop a cognitive ar