In [1]:
from jinja2 import Template
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
import json

In [2]:
NUM_PERSONAS = 100
NUM_GPUS = 1

In [3]:
with open("new_persona.jinja2") as f:
    template_str = f.read()

In [4]:
template = Template(template_str)

In [5]:
with open("personas_shots.json") as f:
    personas = json.load(f)

In [6]:
personas_str = [json.dumps(pjson, indent=2) for pjson in personas]

In [7]:
system_prompt = template.render(persona_jsons=personas_str)

In [8]:
print(system_prompt)

# Instructions

Construct detailed imaginary personas for characters in imaginary dialogue. We want these persona descriptions to contain enough details so that anyone can identify one persona apart from another. 

These personas should portray a complex character. Real people have good behaviors as well as aspects which others may not always appreciate. Their personal and professional social networks don't fully represent their identity. Create a character that has depth of complexity beneath the surface

# Output format

Output in JSON format as follows

{
  "identity": "string",
  "personalLife": "string",
  "communication": "string",
  "background": "string",
  "dailyLife": "string",
  "coping": "string",
  "interests": "string",
  "relationships": "string",
  "values": "string",
  "aspirations": "string",
  "dialogue": "string"
}
 
# Examples

{
  "identity": "Victor Patel is a 47-year-old cryptocurrency trading savant and angel investor who made his first million by age 25 throug

In [9]:
model_id = "meta-llama/Llama-3.1-8B-Instruct"

In [10]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [11]:
llm = LLM(model=model_id, max_model_len=8192, tensor_parallel_size=NUM_GPUS, max_num_batched_tokens=2048, enable_chunked_prefill=True)

INFO 12-23 13:13:09 config.py:350] This model supports multiple tasks: {'embedding', 'generate'}. Defaulting to 'generate'.
INFO 12-23 13:13:09 config.py:1136] Chunked prefill is enabled with max_num_batched_tokens=2048.
INFO 12-23 13:13:09 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post1) with config: model='meta-llama/Llama-3.1-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Llama-3.1-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_mode

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]


INFO 12-23 13:13:13 model_runner.py:1077] Loading model weights took 14.9888 GB
INFO 12-23 13:13:14 worker.py:232] Memory profiling results: total_gpu_memory=23.69GiB initial_memory_usage=16.00GiB peak_torch_memory=16.18GiB memory_usage_post_profile=16.04GiB non_torch_memory=1.04GiB kv_cache_size=4.10GiB gpu_memory_utilization=0.90
INFO 12-23 13:13:14 gpu_executor.py:113] # GPU blocks: 2100, # CPU blocks: 2048
INFO 12-23 13:13:14 gpu_executor.py:117] Maximum concurrency for 8192 tokens per request: 4.10x
INFO 12-23 13:13:16 model_runner.py:1400] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 12-23 13:13:16 model_runner.py:1404] If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
INFO

In [12]:
def generate_personas(n):
    messages = [{
        "role": "system",
        "content": system_prompt
    }, {
        "role": "user",
        "content": "Construct a new persona"        
    }]

    chat_input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    prompts = [chat_input] * n

    outputs = llm.generate(prompts, SamplingParams(temperature=0.25, top_p=0.9, max_tokens=4096))
    print(len(outputs))

    new_personas = []
    for output in outputs:
        text = output.outputs[0].text.strip()
        try:
            persona = json.loads(text)
            new_personas.append(persona)
        except:
            print("JSON decode error")
            print(text)
            continue

    print(f"n={n}, len(new_personas)={len(new_personas)}")

    return new_personas

In [13]:
generate_personas(4)

Processed prompts: 100%|█████████████████████████| 4/4 [00:32<00:00,  8.13s/it, est. speed input: 446.73 toks/s, output: 142.16 toks/s]

4
JSON decode error
{
  "identity": "Ava Moreno is a 29-year-old queer, non-binary artist and art therapist living in a vibrant, rapidly gentrifying neighborhood in Brooklyn. Born to a family of Mexican immigrants, Ava's early life was marked by the constant struggle to balance cultural identity with the harsh realities of poverty. Their art often explores themes of identity, belonging, and the search for a sense of home. Despite their success as an artist, Ava still feels like an outsider in their own community, caught between the expectations of their family and the demands of their creative vision.",
  "personalLife": "Ava shares a spacious loft with two roommates, a fellow artist and a writer, in a building that's rapidly becoming a hub for creative types. Their living space is a testament to their eclectic style, filled with vibrant textiles, found objects, and an ever-changing array of art projects. Ava's daily routine is a delicate balance of studio time, teaching art classes at




[{'identity': "Aurora 'Rory' Thompson is a 29-year-old freelance journalist and podcast host known for her sharp, incisive reporting on social justice issues and her quick wit on live radio. Born to a family of traveling performers, Rory has spent her life on the road, never staying in one place long enough to form lasting connections. Her parents' constant reinvention and reinvestment in new projects have instilled in her a restless spirit and a talent for reinventing herself, but also left her with a deep-seated fear of commitment and a tendency to sabotage relationships.",
  'personalLife': "Living in a small studio apartment in Brooklyn, Rory's home is a shrine to her eclectic tastes - vintage cameras, vinyl records, and a collection of antique typewriters. She's been in a string of short-term relationships with fellow creatives, each ending in a blaze of passion and mutual destruction. Her closest friend is her editor, Rachel, who's been a constant in her life despite Rory's nomad