# Generating synthetic conversation

Reference: https://www.reddit.com/r/copywriting/comments/1749ux6/whats_your_onboarding_process_heres_mine/

### imports & utils

In [None]:
import os
import json
import pandas as pd
import anthropic
from jinja2 import Template

API_KEY = os.getenv("ANTHROPIC_API_KEY")
print(API_KEY)

client = anthropic.Anthropic(api_key=API_KEY)

def text_to_text(system: str, messages: list, model: str = "claude-3-7-sonnet-latest", max_tokens: int = 1024, temperature: float = 0.0) -> str:
    # print(f"text_to_text messages: {messages}")
    response = client.messages.create(
            model=model,
            max_tokens=max_tokens,
            system=system,
            messages=messages,
            temperature=temperature
        )
    print(f"text_to_text response: {response}")
    response_content = response.content[0].text
    return response_content

def text_to_json(system: str, messages: list, tools: list, model: str = "claude-3-7-sonnet-latest", max_tokens: int = 1024, temperature: float = 0.0) -> dict:
    # print(f"text_to_json messages: {messages}")
    response = client.messages.create(
            model=model,
            max_tokens=max_tokens,
            system=system,
            messages=messages,
            tools=tools,
            tool_choice={"type": "any"},
            temperature=temperature
        )
    tool_response = next((c for c in response.content if c.type == "tool_use"), None)
    if tool_response:
        # print(f"text_to_json type: {tool_response.type}")
        # print(f"text_to_json id: {tool_response.id}")
        # print(f"text_to_json name: {tool_response.name}")
        # print(f"text_to_json input: {tool_response.input}")
        return { "type": tool_response.type, "id": tool_response.id, "name": tool_response.name, "input": tool_response.input }
    text_response = next((c for c in response.content if c.type == "text"), None)
    # print(f"text_to_json text_response: {text_response}")
    return text_response.text if text_response else None



Step by step generation. 


## Prompts Templates
For each key stage of the client onboarding journey in wealth management

### initial_contact_phase

In [None]:
initial_contact_phase_template = """
Your task is to generate a professional dialogue for the Initial Contact phase of client onboarding in wealth management.
The dialogue should consist of between 20 and 30 turns (one turn = one person speaking).

<persona_1>
  <name>James Carter</name>
  <role>Senior Wealth advisor</role>
  <communication_style>Clear and concise. Avoids excessive jargon, focuses on understanding client needs before proposing solutions, and ensures transparency in all interactions.</communication_style>
  <company_context>
  Nevis is an AI-native Wealth Intelligence Platform that augments and automates advisorsʼ workflows - so they can focus on delivering best-in-class client service and drive AUM growth.
  The company is revolutionizing the industry with its AI-native approach to financial advice. 
  By automating routine tasks and providing real-time insights into client data.
  </company_context>
</persona_1>

<persona_2>
  <role>Prospective Client</role>
  <name>{{ CLIENT_NAME }}</name>
  <background>{{ CLIENT_BACKGROUND }}</background>
  <financial_situation>{{ CLIENT_FINANCIAL_SITUATION }}</financial_situation>
  <personality>{{ CLIENT_PERSONALITY }}</personality>
  <communication_style>{{ CLIENT_COMMUNICATION_STYLE }}</communication_style>
  <objectives>{{ CLIENT_OBJECTIVES }}</objectives>
</persona_2>

<dialogue_instructions>
Generate a dialogue that:
- Begins with James Carter introducing himself and explaining the onboarding process.
- Focuses on collecting KYC compliance:
  - Full name
  - Date of birth (DOB)
  - Address (current and previous if applicable)
  - Citizenships
  - Contact details (phone number, email address)
  - Children (if any)
  - All information about family members (if any)
- Ensures that James Carter communicates clearly why this information is required (e.g., regulatory compliance, personalization of financial strategies).
- Includes opportunities for the client to ask clarifying questions about privacy or data usage.
- Ends with confirmation that the required information has been collected and agreement to proceed to the next stage.

IMPORTANT FORMATTING RULES:
- Do NOT include any descriptions of physical actions, gestures, or emotional states (like "*nods*", "*smiles*", "*pauses*", etc.)
- Do NOT add any stage directions or non-verbal cues
- Present ONLY the direct speech of each participant
- Format each turn as exact words without any additional descriptions or person name
- Focus solely on the verbal content of the conversation
</dialogue_instructions>
</system_prompt>
"""

### professional_information_and_income_details_phase

In [None]:
professional_information_and_income_details_phase_template = """
Your task is to generate a professional dialogue for the Professional Information and Income Details phase of client onboarding in wealth management.
The dialogue should consist of between 20 and 30 turns.

<persona_1>
  <name>James Carter</name>
  <role>Senior Wealth advisor</role>
  <communication_style>Clear and concise. Avoids excessive jargon, focuses on understanding client needs before proposing solutions, and ensures transparency in all interactions.</communication_style>
  <company_context>
  Nevis is an AI-native Wealth Intelligence Platform that augments and automates advisorsʼ workflows - so they can focus on delivering best-in-class client service and drive AUM growth.
  The company is revolutionizing the industry with its AI-native approach to financial advice. 
  By automating routine tasks and providing real-time insights into client data.
  </company_context>
</persona_1>

<persona_2>
  <role>Prospective Client</role>
  <name>{{ CLIENT_NAME }}</name>
  <background>{{ CLIENT_BACKGROUND }}</background>
  <financial_situation>{{ CLIENT_FINANCIAL_SITUATION }}</financial_situation>
  <personality>{{ CLIENT_PERSONALITY }}</personality>
  <communication_style>{{ CLIENT_COMMUNICATION_STYLE }}</communication_style>
  <objectives>{{ CLIENT_OBJECTIVES }}</objectives>
</persona_2>

<dialogue_instructions>
Generate a dialogue that:
- Focuses on collecting the following information:
  - Occupation details (e.g., current role, employer, industry)
  - Income details (e.g., sources of income, frequency, gross/net amounts)
  - Net worth and its structure (e.g., assets, liabilities)
- Includes clarifying questions from the client about how this information will be used to optimize their financial plan.
- Ends with James summarizing key points and transitioning to discussions about pensions and net worth.

IMPORTANT FORMATTING RULES:
- Do NOT include any descriptions of physical actions, gestures, or emotional states (like "*nods*", "*smiles*", "*pauses*", etc.)
- Do NOT add any stage directions or non-verbal cues
- Present ONLY the direct speech of each participant
- Format each turn as exact words without any additional descriptions or person name
- Focus solely on the verbal content of the conversation
</dialogue_instructions>
"""

### pension_plans_and_asset_structure_phase

In [None]:
pension_plans_and_asset_structure_phase_template = """
Your task is to generate a professional dialogue for the Pension Plans and Asset Structure phase of client onboarding in wealth management.
The dialogue should consist of between 20 and 30 turns.

<persona_1>
  <name>James Carter</name>
  <role>Senior Wealth Advisor</role>
  <communication_style>Clear and concise. Avoids excessive jargon, focuses on understanding client needs before proposing solutions, and ensures transparency in all interactions.</communication_style>
  <company_context>
  Nevis is an AI-native Wealth Intelligence Platform that augments and automates advisorsʼ workflows - so they can focus on delivering best-in-class client service and drive AUM growth.
  The company is revolutionizing the industry with its AI-native approach to financial advice. 
  By automating routine tasks and providing real-time insights into client data.
  </company_context>
</persona_1>

<persona_2>
  <role>Prospective Client</role>
  <name>{{ CLIENT_NAME }}</name>
  <background>{{ CLIENT_BACKGROUND }}</background>
  <financial_situation>{{ CLIENT_FINANCIAL_SITUATION }}</financial_situation>
  <personality>{{ CLIENT_PERSONALITY }}</personality>
  <communication_style>{{ CLIENT_COMMUNICATION_STYLE }}</communication_style>
  <objectives>{{ CLIENT_OBJECTIVES }}</objectives>
</persona_2>

<dialogue_instructions>
Generate a dialogue that:
- Begins with James Carter transitioning from income discussions to pension plans and net worth evaluation.
- Focuses on collecting the following information:
  - Pension details (e.g., type of pension plans, providers, current value)
  - Net worth structure (e.g., distribution across real estate, investments, savings accounts)
- Includes opportunities for the client to discuss any concerns about retirement planning or asset allocation strategies.
- Ends with agreement on next steps for deeper analysis or strategy development.

IMPORTANT FORMATTING RULES:
- Do NOT include any descriptions of physical actions, gestures, or emotional states (like "*nods*", "*smiles*", "*pauses*", etc.)
- Do NOT add any stage directions or non-verbal cues
- Present ONLY the direct speech of each participant
- Format each turn as exact words without any additional descriptions or person name
- Focus solely on the verbal content of the conversation
</dialogue_instructions>
"""

### regulatory_checks_and_kyc

In [None]:
regulatory_checks_and_kyc_template = """
Your task is to generate a professional dialogue for the Family Situation and Financial Goals phase of client onboarding in wealth management.
The dialogue should consist of between 20 and 30 turns.

<persona_1>
  <name>James Carter</name>
  <role>Senior Wealth Advisor</role>
  <communication_style>Clear and concise. Avoids excessive jargon, focuses on understanding client needs before proposing solutions, and ensures transparency in all interactions.</communication_style>
  <company_context>
  Nevis is an AI-native Wealth Intelligence Platform that augments and automates advisorsʼ workflows - so they can focus on delivering best-in-class client service and drive AUM growth.
  The company is revolutionizing the industry with its AI-native approach to financial advice. 
  By automating routine tasks and providing real-time insights into client data.
  </company_context>
</persona_1>

<persona_2>
  <role>Prospective Client</role>
  <name>{{ CLIENT_NAME }}</name>
  <background>{{ CLIENT_BACKGROUND }}</background>
  <financial_situation>{{ CLIENT_FINANCIAL_SITUATION }}</financial_situation>
  <personality>{{ CLIENT_PERSONALITY }}</personality>
  <communication_style>{{ CLIENT_COMMUNICATION_STYLE }}</communication_style>
  <objectives>{{ CLIENT_OBJECTIVES }}</objectives>
</persona_2>

<dialogue_instructions>
Generate a dialogue that:
- Begins with James Carter asking open-ended questions about the client’s family situation (e.g., marital status, dependents).
- Focuses on understanding the client’s financial and life goals:
  - Short-term goals (e.g., purchasing property, funding education)
  - Long-term goals (e.g., retirement income targets, legacy planning)
  - Financial and life goals
- Includes opportunities for the client to share their priorities and expectations from wealth management services.
- Ends with James summarizing key points discussed and outlining next steps in creating a tailored strategy.

IMPORTANT FORMATTING RULES:
- Do NOT include any descriptions of physical actions, gestures, or emotional states (like "*nods*", "*smiles*", "*pauses*", etc.)
- Do NOT add any stage directions or non-verbal cues
- Present ONLY the direct speech of each participant
- Format each turn as exact words without any additional descriptions or person name
- Focus solely on the verbal content of the conversation
</dialogue_instructions>
"""

## Clients

выделить разных персоналити выбранных клиентов

### Elon Musk

In [None]:
# Elon Musk
client_data = {
    "CLIENT_NAME": "Elon Reeve Musk",
    "CLIENT_BACKGROUND": "Visionary entrepreneur, founder of Tesla, SpaceX, Neuralink, and The Boring Company. Currently the world's richest person with a net worth exceeding $330 billion. Known for his ambitious goals like colonizing Mars and advancing AI technologies.",
    "CLIENT_FINANCIAL_SITUATION": "Highly complex financial portfolio primarily tied to Tesla and SpaceX shares. Limited liquidity due to reinvestment in his companies. Operates through a single-family office, Excession LLC.",
    "CLIENT_PERSONALITY": "Analytical, innovative, risk-tolerant, highly detail-oriented but occasionally impulsive. Prefers direct communication and values efficiency.",
    "CLIENT_COMMUNICATION_STYLE": "Direct and data-driven. Appreciates clear, concise explanations but may challenge conventional approaches.",
    "CLIENT_OBJECTIVES": "Ensure the sustainability of his family’s wealth while supporting creative ventures aligned with his vision. Establish a legacy fund to promote innovation in technology and education."
}

### Oprah Winfrey

In [None]:
# Oprah Winfrey
client_data = {
    "CLIENT_NAME": "Oprah Gail Winfrey",
    "CLIENT_BACKGROUND": "Media mogul, talk show host, actress, philanthropist, and billionaire entrepreneur with a net worth exceeding $2 billion. Known for her influence on culture through 'The Oprah Winfrey Show' and her production company Harpo Productions.",
    "CLIENT_FINANCIAL_SITUATION": "Diverse portfolio including media holdings, real estate, and philanthropic initiatives through the Oprah Winfrey Foundation. Focused on empowering women and education globally.",
    "CLIENT_PERSONALITY": "Empathetic, charismatic, visionary leader with a strong sense of social responsibility. Values authenticity and connection in relationships.",
    "CLIENT_COMMUNICATION_STYLE": "Warm and conversational but expects clarity and transparency in professional dealings. Appreciates personalized service tailored to her values.",
    "CLIENT_OBJECTIVES": "Expand philanthropic efforts while maintaining financial sustainability. Optimize real estate holdings across multiple countries. Plan for long-term wealth preservation aligned with her mission of empowerment."
}

### Steve Jobs

In [None]:
# Steve Jobs
client_data = {
    "CLIENT_NAME": "Steven Paul Jobs",
    "CLIENT_BACKGROUND": "Co-founder of Apple Inc., known for revolutionizing consumer electronics with products like the iPhone and MacBook. Passed away in 2011 but remains an iconic figure in technology and design innovation.",
    "CLIENT_FINANCIAL_SITUATION": "At the peak of his career, had significant wealth tied to Apple and Pixar shares. Known for minimalistic personal spending but strategic investments in technology and media.",
    "CLIENT_PERSONALITY": "Visionary, perfectionist, demanding but inspiring. Known for challenging norms and expecting excellence from those around him. Values creativity and innovation in solutions presented to him.",
    "CLIENT_COMMUNICATION_STYLE": "Assertive and persuasive but occasionally confrontational. Values creativity and innovation in solutions presented to him.",
    "CLIENT_OBJECTIVES": "Ensure the sustainability of his family’s wealth while supporting creative ventures aligned with his vision. Establish a legacy fund to promote innovation in technology and education."
}

### Warren Buffett

In [None]:
# Warren Buffett
client_data = {
    "CLIENT_NAME": "Warren Edward Buffett", 
    "CLIENT_BACKGROUND": "Legendary investor known as the 'Oracle of Omaha.' Chairman of Berkshire Hathaway with a net worth exceeding $100 billion. Advocates value investing principles and long-term financial planning.",
    "CLIENT_FINANCIAL_SITUATION": "Vast portfolio of investments across multiple industries through Berkshire Hathaway holdings. Minimal personal spending habits despite immense wealth. Committed to philanthropy through The Giving Pledge initiative.",   
    "CLIENT_PERSONALITY": "Pragmatic, disciplined, patient investor with a sharp analytical mind. Values simplicity and long-term thinking over short-term gains.",
    "CLIENT_COMMUNICATION_STYLE": "Clear, logical, avoids unnecessary complexity or jargon. Prefers data-backed discussions but values ethical considerations in decision-making processes.",
    "CLIENT_OBJECTIVES": "Continue growing Berkshire Hathaway's portfolio sustainably while mentoring future leaders within the company. Ensure effective allocation of funds through philanthropic initiatives."
}

## Generate conversation workflow

### create_dialogue_object

Description:
Create unique object **DIALOGUE** depends from **client_data** (Elon Musk for example)

Object contains:
- **phase** name
- set of **system_prompts** for generating each part of the conversation
- persanolized **initial_history** for generating workflow

In [None]:
def create_dialogue_object(client_data):
    # initial_contact_phase_system_prompt
    initial_contact_phase_system_prompt_template = Template(initial_contact_phase_template)
    initial_contact_phase_system_prompt = initial_contact_phase_system_prompt_template.render(client_data)
    
    # professional_information_and_income_details_phase_system_prompt
    professional_information_and_income_details_phase_system_prompt_template = Template(professional_information_and_income_details_phase_template)
    professional_information_and_income_details_phase_system_prompt = professional_information_and_income_details_phase_system_prompt_template.render(client_data)
    
    # pension_plans_and_asset_structure_phase_system_prompt
    pension_plans_and_asset_structure_phase_system_prompt_template = Template(pension_plans_and_asset_structure_phase_template)
    pension_plans_and_asset_structure_phase_system_prompt = pension_plans_and_asset_structure_phase_system_prompt_template.render(client_data)
    
    # regulatory_checks_and_kyc_system_prompt
    regulatory_checks_and_kyc_system_prompt_template = Template(regulatory_checks_and_kyc_template)
    regulatory_checks_and_kyc_system_prompt = regulatory_checks_and_kyc_system_prompt_template.render(client_data)
    
    # Starting points for each phase
    history_0 = [
        {
            "role": "advisor",
            "content": f"Good afternoon, Mr. {client_data['CLIENT_NAME']}, thank you for taking the time to meet with me today. My name is James Carter, and I'm a Senior Wealth advisor at Nevis. Before we begin, I'd like to share a little about how we work. At our firm, we specialize in providing tailored wealth management solutions for clients like yourself. Today's conversation is all about understanding your unique situation, priorities, and long-term objectives so we can determine how best to support you. I'll also be happy to answer any questions you may have about our services or approach. To start, could you tell me a bit about what prompted you to seek wealth management services at this time?"
        },
        {
            "role": "client",
            "content": f"Hi James, thanks for having me. I'm looking for a wealth advisor who can help me navigate my financial situation and plan for the future. I have a few specific goals in mind, but I'm also open to suggestions on how to optimize my portfolio. I want to ensure that my investments align with my values and long-term vision."
        }
    ]
    
    history_1 = [
        {
            "role": "advisor",
            "content": f"Mr. {client_data['CLIENT_NAME']}, now lets discuss about Occupation details. Could you please share your current role, employer, and the industry you work in? This will help me understand your professional background and how it may impact your financial planning."
        },
        {
            "role": "client",
            "content": f"Hi James, good question, could you provide me more details about what you need to know regarding my occupation?"
        }
    ]
    
    history_2 = [
        {
            "role": "advisor",
            "content": f"Mr. {client_data['CLIENT_NAME']}, now lets discuss income discussions to pension plans and net worth evaluation."
        },
        {
            "role": "client",
            "content": f"Hi James, good question, could you provide me more details about what you need to know regarding all of that?"
        }
    ]
    
    history_3 = [
        {
            "role": "advisor",
            "content": f"Mr. {client_data['CLIENT_NAME']}, now lets discuss about family situation and financial goals."
        },
        {
            "role": "client",
            "content": f"Hi James, I'm happy to share more about my family situation and financial goals. What specific information would you like to know?"
        }
    ]
    
    # get the unique object
    return [
        {
            "phase": "initial_contact_phase_template",
            "system_prompt": initial_contact_phase_system_prompt,
            "initial_history": history_0
        },
        {
            "phase": "professional_information_and_income_details_phase_template",
            "system_prompt": professional_information_and_income_details_phase_system_prompt,
            "initial_history": history_1
        },
        {
            "phase": "pension_plans_and_asset_structure_phase_template",
            "system_prompt": pension_plans_and_asset_structure_phase_system_prompt,
            "initial_history": history_2
        },
        {
            "phase": "regulatory_checks_and_kyc_template",
            "system_prompt": regulatory_checks_and_kyc_system_prompt,
            "initial_history": history_3
        }
    ]

### utils

- for_advisor_messages
- for_client_messages
- save_dialog_to_dataset

In [None]:
def for_advisor_messages(dialog):
    messages = []
    for item in dialog:
        if item["role"] == "advisor":
            messages.append({"role": "assistant", "content": item["content"]})
        elif item["role"] == "client":  
            messages.append({"role": "user", "content": item["content"]})
    return messages

def for_client_messages(dialog):
    messages = []
    for item in dialog:
        if item["role"] == "advisor":
            messages.append({"role": "user", "content": item["content"]})
        elif item["role"] == "client":  
            messages.append({"role": "assistant", "content": item["content"]})
    return messages

def save_dialog_to_dataset(dialog, client_name, dialog_stage, dataset_path):
    try:
        os.makedirs(os.path.dirname(dataset_path), exist_ok=True)
        dataset = []
        if os.path.exists(dataset_path) and os.path.getsize(dataset_path) > 0:
            try:
                with open(dataset_path, 'r', encoding='utf-8') as f:
                    dataset = json.load(f)
            except json.JSONDecodeError:
                dataset = []

        dialog_entry = {
            "id": len(dataset) + 1,
            "dialog_stage": dialog_stage,
            "client_name": client_name,
            "messages": dialog
        }
        dataset.append(dialog_entry)
        
        with open(dataset_path, 'w', encoding='utf-8') as f:
            json.dump(dataset, f, ensure_ascii=False, indent=2)
            print(f"Dataset saved to {dataset_path}")
    except Exception as e:
        print(f"Error saving dialog to dataset: {e}")

In [None]:
def generate_dialog(system_prompt: str, history: list, iterations: int=10) -> list:
    dialog = []

    for message in history:
        dialog.append(message)
        
    print("initial dialog: ", dialog)
    
    last_role = dialog[-1]["role"]
    
    for i in range(iterations):
        print(f"Iteration {i+1}/{iterations}")
        
        if last_role == "client":
            advisor_messages = for_advisor_messages(dialog)
            advisor_response = text_to_text(system=system_prompt, messages=advisor_messages, max_tokens=2048, temperature=0.6)
            dialog.append({"role": "advisor", "content": advisor_response})
            last_role = "advisor"
        elif last_role == "advisor":
            client_messages = for_client_messages(dialog)
            client_response = text_to_text(system=system_prompt, messages=client_messages, max_tokens=2048, temperature=0.6)
            dialog.append({"role": "client", "content": client_response})
            last_role = "client"

    return dialog

### generate_full_dialogue
Description:
- Generates a complete synthetic dialogue between a **advisor** and a **client**.
- Each phase processed separately and appended to a file located in the **dialogues** folder.


In [None]:
def generate_conversation(client_data, num_conversations: int, iterations_per_phase=30):
    client_name = client_data['CLIENT_NAME']
    file_path = f"../conversations/conversations_with_{client_name}.json"
    all_conversations = []

    for i in range(num_conversations):
        print(f"Generating conversation {i + 1} for {client_name}")
        dialogue_phases_config = create_dialogue_object(client_data)
        generated_phases = [
            {
                "dialog_stage": phase_data["phase"],
                "messages": generate_dialog(
                    system_prompt=phase_data["system_prompt"],
                    history=phase_data['initial_history'],
                    iterations=iterations_per_phase
                )
            }
            for phase_data in dialogue_phases_config
        ]

        all_conversations.append({
            "id": i + 1,
            "client_name": client_name,
            "conversation": generated_phases
        })

    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(all_conversations, f, ensure_ascii=False, indent=2)
    print(f"Done {client_name}.")

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 50)
pd.set_option('display.width', 200)

# print(df.messages[0])
# print(df.head())

In [None]:
generate_conversation(client_data, num_conversations=10, iterations_per_phase=30)

---