In [1]:
import json
import uuid
from sqlalchemy import create_engine
from utils import reset_db, get_session, model_to_dict
from data.models import udahub

# Udahub Application

## Core Database

**Init DB**

In [2]:
udahub_db = "data/core/udahub.db"
reset_db(udahub_db)
engine = create_engine(f"sqlite:///{udahub_db}", echo=False)
udahub.Base.metadata.create_all(bind=engine)

✅ Removed existing data/core/udahub.db
2025-12-05 23:45:25,441 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-12-05 23:45:25,442 INFO sqlalchemy.engine.Engine COMMIT
✅ Recreated data/core/udahub.db with fresh schema


**Account**

In [3]:
account_id = "cultpass"
account_name = "CultPass Card"

with get_session(engine) as session:
    account = udahub.Account(
        account_id=account_id,
        account_name=account_name,
    )
    session.add(account)

---

## Integrations

**Use LLM to Generate Sample Data**

In [16]:
import os
import pandas as pd
from dotenv import load_dotenv
from typing import List
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any, Literal, TypedDict, Annotated
from langgraph.prebuilt import create_react_agent
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.messages import SystemMessage

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
llm_base_url = "https://openai.vocareum.com/v1"

llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0.0,
    base_url="https://openai.vocareum.com/v1",
    api_key=OPENAI_API_KEY,
)

**Data Generation Parameters**

In [None]:
num_articles = 15
num_account = 5
num_user = 15
num_ticket = 5
num_msg = num_ticket
num_expected_tag_msg = round(num_msg * 0.8)
num_unexpected_tag_msg = num_msg - num_expected_tag_msg

**Sampling Schema**

For LLM to produce the data samples

In [54]:
class ArticlesSchema(BaseModel):
    title: Annotated[str, Field(description="Title of the article.")]
    content: Annotated[str, Field(description="Content of the article.")]
    tags: Annotated[str, Field(description="Tags of the article")]

class ArticlesListSchema(BaseModel):
    samples: Annotated[List[ArticlesSchema], Field(description="List of CultpassArticles")]

class AccountSamples(BaseModel):
    """Structured response for account table"""
    company_name: List[str] = Field(description="List of first name and last name")

class UsersSchema(BaseModel):
    name: Annotated[str, Field(description="User's name with first and last name.")]
    email: Annotated[str, Field(description="User's email address.")]
    is_blocked: Annotated[bool, Field(description="User's blocking status.")]
    account_id: Annotated[str, Field(description="Company account ID")]

class UsersListSchema(BaseModel):
    samples: Annotated[List[UsersSchema], Field(description="List of users")]

class SingleTicket(BaseModel):
    """Structured response for a ticket"""
    status: Annotated[Literal['open','pending','closed'], Field(description="Status of the ticket")]
    content: Annotated[str, Field(description="The ticket body message content.")]
    owner_id: Annotated[str, Field(description="User ID")]
    owner_name: Annotated[str, Field(description="User's name")]
    channel: Annotated[Literal['email','chat','web','sms','social','phone'], Field(description="Ticket priority")]
    tags: Annotated[str, Field(description="List of tags used for creating this message")]
    account_id: Annotated[str, Field(description="Company account ID")]

class TicketSamples(BaseModel):
    """Structured response of ticket list"""
    samples: List[SingleTicket]

**Loading Default Datae**

* Knowledge Base
* Users Data

In [8]:
# Load Sample Articles
cultpass_articles = []
with open('data/external/cultpass_articles.jsonl', 'r', encoding='utf-8') as f:
    for line in f:
        cultpass_articles.append(json.loads(line))

# Extract Tags from Samples
sample_tags = [ article['tags'].split(', ') for article in cultpass_articles ]
sample_tags = set([ item for subtags in sample_tags for item in subtags ])
sample_tags_str = ", ".join(sample_tags)

# Define Tags with Expected Knowledge and Tags without Expected Knowledge
# (This is for sampling ticket questions "with vs without knowledge to answer" using LLM)
expected_tags = sample_tags
unexpected_tags = ['weather', 'stock price', 'dietary', 'health advice']

In [9]:
# Load Users Data
cultpass_users = []
with open('data/external/cultpass_users.jsonl', 'r', encoding='utf-8') as f:
    for line in f:
        cultpass_users.append(json.loads(line))

Generate Knowledge Articles

In [12]:
# Create Additional Knowledge Articles
sample_articles_template = """
    You are an helpful expert creating sample of python sample article data based on given examples.  Do not provide any additional description or explanation.
    There is a mock-up project, it has the following description:

    You’ve joined a fast-growing AI startup building the next frontier in customer support automation. Your team is responsible for building UDA-Hub, 
    a Universal Decision Agent designed to plug into existing customer support systems (Zendesk, Intercom, Freshdesk, internal CRMs) and intelligently 
    resolve tickets. You need to generate sample knowledge articles in area including: {sample_tags}.  
    
    The knowledge areas given above will be used as tags for the generated articles.
    
    The first UDA-Hub customer Cultpass has the following sample articles to be used as knowledge to be used by the system:
    {samples}

    Provide {num_articles} more example like above and double check if all articles uses tags from {sample_tags}, if not try again.
    """

sample_articles_prompt = PromptTemplate(
    template = sample_articles_template,
    input_variables = ["samples","num_articles","sample_tags"],
    ).invoke({"samples": cultpass_articles,
              "num_articles": num_articles,
              "sample_tags": sample_tags}).to_messages()

new_cultpass_articles = llm.with_structured_output(ArticlesListSchema).invoke(sample_articles_prompt).model_dump()['samples']
cultpass_articles_ls = cultpass_articles + new_cultpass_articles

# Extracting Sample Tags for Each Sample
# new_sample_tags = [ article['tags'].split(', ') for article in cultpass_articles_ls ]
# new_sample_tags = set([ item for subtags in new_sample_tags for item in subtags ])
# new_sample_tags_str = ", ".join(new_sample_tags)

if len(cultpass_articles_ls) < 14:
    raise AssertionError("You should load the articles with at least 14 records")

# cultpass_articles_ls

In [20]:
# Create Company List
account_prompt_template = """You are a company naming expert. Please come up with {num_account} company names."""

account_prompt = PromptTemplate(
    template = account_prompt_template,
    input_variables = ["num_account"],
    ).invoke({"num_account": num_account}).to_messages()

account = llm.with_structured_output(AccountSamples).invoke(account_prompt).model_dump()['company_name']
account_table = pd.DataFrame({"company_name": account})
# account_table['id'] = account_table.index
account_table['account_id'] = [ str(uuid.uuid4()) for i in range(num_account) ]
account_table_ls = account_table.to_dict(orient='records')
# account_table

In [44]:
# Create Additional Users
num_cultpass_users = len(cultpass_users)
num_new_users = num_user - num_cultpass_users
user_prompt_template = """
    You are a people naming expert. Please come up with {num_user} names with both first and last name.
    Randomly choose a company for each user from the account list and based on the name of the company,
    create an email address for this person.  Some companies should have more users than others.
    All users from the same company must have the same domain name on their email address.

    The `is_blocked` status should be randomly choosen as 'True' or 'False'

    The account ID is the ID in the provided table, the ID and account name must match.

    List of account (companies): {account_table}"""

user_prompt = PromptTemplate(
    template = user_prompt_template,
    input_variables = ["num_user","account_table"],
    ).invoke({"num_user": num_new_users, "account_table": account_table}).to_messages()

user_ls = llm.with_structured_output(UsersListSchema).invoke(user_prompt).model_dump()['samples']
user_table = pd.DataFrame(user_ls)
user_table['id'] = [ str(uuid.uuid4().hex[:6]) for i in range(num_new_users) ]
user_table = user_table[['id', 'name', 'email', 'is_blocked','account_id']]
cultpass_users_table = pd.DataFrame(cultpass_users)
cultpass_users_table['account_id'] = [ str(uuid.uuid4()) for i in range(num_cultpass_users) ]
user_table = pd.concat([user_table, cultpass_users_table])

In [46]:
user_table

Unnamed: 0,id,name,email,is_blocked,account_id
0,ab33a0,Liam Anderson,liam.anderson@innovatechsolutions.com,False,e7ccf85e-e5d9-4a6d-9b8c-78f1fe8537ae
1,a77179,Sophia Martinez,sophia.martinez@ecospherenterprises.com,True,1c58e9ce-b6fe-4fb8-a76d-4024deebdeaf
2,757e8d,Ethan Johnson,ethan.johnson@quantumleapdynamics.com,False,ef24d2a6-b4b3-483c-bf94-3e1ef74fd8f8
3,677ee9,Olivia Brown,olivia.brown@innovatechsolutions.com,True,e7ccf85e-e5d9-4a6d-9b8c-78f1fe8537ae
0,a4ab87,Alice Kingsley,alice.kingsley@wonderland.com,True,b25fb49c-9e7e-4613-9790-0879829ac73b
1,f556c0,Bob Stone,bob.stone@granite.com,False,61d16af6-fd5b-4e10-842b-e971488c8dee
2,88382b,Cathy Bloom,cathy.bloom@florals.org,False,7eec0b4d-ee22-4a8c-9fd4-781e4abdd611
3,888fb2,David Noir,david.noir@shadowmail.com,True,6be19f2f-9db6-4443-a1dc-ae7bbd59669c
4,f1f10d,Eva Green,eva.green@ecosoul.net,False,7f4a043d-28ec-48f9-a4dd-c8dee193483a
5,e6376d,Frank Ocean,frank.ocean@seawaves.io,False,218b4530-f94e-4c91-a7a4-56d88b271f30


In [61]:
# Create Tickets
ticket_prompt_template = """
    You are a IT ticket generation emulator focusing on generating the actual message of users.
    
    Step 1: Generate text message `content` with each message from 20 to 200 words, and make occasional minor typos in the message:
        Generate {num_expected_tag_msg} messages body based on the selection of one to multiple of the following topics {expected_tags}.
        Generate {num_unexpected_tag_msg} messages with at least one topic from {unexpected_tags} as main theme and zero to multiple topics from {expected_tags}.  

    Step 2: For the `owner_id` (use `id` column in the table), `owner_name` (use `name` column in the table), account_id (`account_id`):
    {user_table}

    Step 3: Randomly selected a `channel` from ['email','chat','web','sms','social','phone'] for each ticket.
    """

ticket_prompt = PromptTemplate(
    template = ticket_prompt_template,
    input_variables = ["num_expected_tag_msg","num_unexpected_tag_msg","expected_tags","unexpected_tags","ticket_table"],
    ).invoke({"num_expected_tag_msg": num_expected_tag_msg, 
              "num_unexpected_tag_msg": num_unexpected_tag_msg, 
              "expected_tags": expected_tags, 
              "unexpected_tags": unexpected_tags, 
              "user_table": user_table,
            }).to_messages()

ticket_ls = llm.with_structured_output(TicketSamples).invoke(ticket_prompt).model_dump()['samples']
ticket_table = pd.DataFrame(ticket_ls)
# ticket_table['id'] = msg_table.index
ticket_table['role'] = "user"
ticket_table['status'] = "open"
# ticket_table

In [62]:
ticket_table

Unnamed: 0,status,content,owner_id,owner_name,channel,tags,account_id,role
0,open,"Hi there, I am having trouble logging into my ...",ab33a0,Liam Anderson,email,"password, login",e7ccf85e-e5d9-4a6d-9b8c-78f1fe8537ae,user
1,open,"Hello, I would like to cancel my current subsc...",a77179,Sophia Martinez,web,"cancelation, subscription, billing",1c58e9ce-b6fe-4fb8-a76d-4024deebdeaf,user
2,open,"Hi, I need to escalate an issue regarding my r...",757e8d,Ethan Johnson,chat,"escalation, booking, reservation, events",ef24d2a6-b4b3-483c-bf94-3e1ef74fd8f8,user
3,open,"Good day, I am writing to inquire about the be...",677ee9,Olivia Brown,phone,"benefits, subscription, access",e7ccf85e-e5d9-4a6d-9b8c-78f1fe8537ae,user
4,open,"Hi, I am concerned about the recent weather ch...",a4ab87,Alice Kingsley,social,"weather, health advice, dietary, pause, subscr...",b25fb49c-9e7e-4613-9790-0879829ac73b,user


## Load Samples into Database

In [None]:
with get_session(engine) as session:
    kb = []
    for article in cultpass_articles:
        knowledge = udahub.Knowledge(
            article_id=str(uuid.uuid4()),
            account_id=account_id,
            title=article["title"],
            content=article["content"],
            tags=article["tags"]
        )
        kb.append(knowledge)
    session.add_all(kb) 
    

**Ticket**

In [64]:
def add_ticket(ticket_info):
    """
    Adding a single ticket to the database
    """
    with get_session(engine) as session:
        user = session.query(udahub.User).filter_by(
            account_id=account_id,
            external_user_id=ticket_info["owner_id"],
        ).first()

        if not user:
            user = udahub.User(
                user_id=str(uuid.uuid4()),
                account_id=account_id,
                external_user_id=ticket_info["owner_id"],
                user_name=ticket_info["owner_name"],
            )
        
        ticket = udahub.Ticket(
            ticket_id=str(uuid.uuid4()),
            account_id=account_id,
            user_id=user.user_id,
            channel=ticket_info["channel"],
        )
        metadata = udahub.TicketMetadata(
            ticket_id=ticket.ticket_id,
            status=ticket_info["status"],
            main_issue_type=None,
            tags=ticket_info["tags"],
        )

        first_message = udahub.TicketMessage(
            message_id=str(uuid.uuid4()),
            ticket_id=ticket.ticket_id,
            role=ticket_info["role"],
            content=ticket_info["content"],
        )

        session.add_all([user, ticket, metadata, first_message])


In [65]:
for index, row in ticket_table.iterrows():
    add_ticket(row)

# Tests

In [66]:
with get_session(engine) as session:
    account = session.query(udahub.Account).filter_by(
        account_id=account_id
    ).first()
    print(account)

<Account(account_id='cultpass', account_name='CultPass Card')>


In [67]:
with get_session(engine) as session:
    account = session.query(udahub.Account).filter_by(
        account_id=account_id
    ).first()
    for article in account.knowledge_articles:
        print(article)

In [68]:
with get_session(engine) as session:
    users = session.query(udahub.User).all()
    for user in users:
        print(user)

<User(user_id='7e35d47a-2a89-4ac1-a90a-a868167641b3', user_name='Liam Anderson', external_user_id='ab33a0')>
<User(user_id='0c5609c5-8bbf-4742-8157-feb473af9377', user_name='Sophia Martinez', external_user_id='a77179')>
<User(user_id='91c3abf6-22fd-4def-8664-ea757d2ff92e', user_name='Ethan Johnson', external_user_id='757e8d')>
<User(user_id='ef25a834-b0dc-47e2-a147-bd7c87261afd', user_name='Olivia Brown', external_user_id='677ee9')>
<User(user_id='85e10baf-8d7e-4d4d-aca1-15290bbfb5ea', user_name='Alice Kingsley', external_user_id='a4ab87')>


In [70]:
with get_session(engine) as session:
    user = session.query(udahub.User).filter_by(
        account_id=account_id,
        external_user_id=ticket_table.iloc[2].to_dict()["owner_id"],
    ).first()
    
    ticket:udahub.Ticket = user.tickets[0]
    for message in ticket.messages:
        print(message)

<TicketMessage(message_id='a49dc9e8-2de8-41a5-8fa2-117bc477d52a', role='user', content='Hi, I need to escalate an issu...')>
