In [22]:
import os

os.environ["GOOGLE_API_KEY"] = "AIzaSyDueqOJ_1Clm8a6_kC-7E2IcCeSGB0H7Xk"

In [23]:
import json
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document


with open('dataset.json', 'r', encoding='utf-8') as f:
    dataset = json.load(f)

embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/text-embedding-004"
)

documents = []
for item in dataset:
    page_content = (
    f"Feature Name: {item['feature_name']}\n"
    f"Description: {item['description']}\n"
    f"Product Category: {item['product_category']}\n"
    f"Keywords: {', '.join(item['keywords'])}\n"
    f"Pain Points Solved: {', '.join(item['pain_points_solved'])}\n"
    f"Feature Type: {item['feature_type']}\n"
    f"Solution Phase: {item['solution_phase']}\n"
    f"Outputs: {', '.join(item['outputs_data_type'])}\n"
    f"Consumes: {', '.join(item['consumes_data_type'])}\n"
    f"Complements: {', '.join(item['complements_features'])}\n"
    f"Synergy: {item['synergy_description']}"
)

    metadata = {
        "feature_id": item["feature_id"],
        "feature_name": item["feature_name"]
    }

    documents.append(Document(
        page_content=page_content,
        metadata=metadata
    ))

vector_store = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    persist_directory="./chroma_db"
)

retriever = vector_store.as_retriever(
    search_kwargs={"k": 3}
)

print(f"Vector store created successfully with {len(documents)} documents.")

Vector store created successfully with 11 documents.


In [24]:
retriever.invoke("How to know if a feature is useful?")

[Document(id='dacb91aa-11da-4c0f-9499-3bf4a7853fbd', metadata={'feature_name': 'VoC - Surveys', 'feature_id': 'VOC_SURVEYS'}, page_content="Feature Name: VoC - Surveys\nDescription: Design and deploy surveys across Web, Mobile, Zalo, SMS, Email, QR, POS to collect customer feedback.\nProduct Category: Voice of Customer (VoC)\nKeywords: collect feedback, survey, customer opinion, NPS, CSAT, post-purchase\nPain Points Solved: struggling to collect feedback, low survey response rate, don't know what customers think after purchase, manual feedback collection\nFeature Type: Data_Collection\nSolution Phase: Collect\nOutputs: customer_feedback_raw, satisfaction_score\nConsumes: customer_transaction_event, customer_segment\nComplements: INSIGHTS_EXPERIENCE, CUSTOMER_360_CUSTOMERS\nSynergy: Provides the raw data for 'Insights - Experience' to analyze, and can be targeted to specific segments from 'Customer 360'."),
 Document(id='a83eea54-aaa7-44f9-955d-b5021ee18038', metadata={'feature_name': '

In [None]:
import json
from typing import Dict, List, Any
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.retrievers import BaseRetriever
from langchain_core.documents import Document

def deconstruct_pain_point(pain_point_text: str) -> Dict[str, List[str]]:
    """
    Deconstructs pain point text into current problems and desired outcomes using an LLM.
    
    Args:
        pain_point_text: User's description of their pain points
    
    Returns:
        Dictionary with 'current_problems' and 'desired_outcomes' lists
    """
    # Initialize the LLM with safety settings and JSON output enforcement
    llm = ChatGoogleGenerativeAI(
        model="gemini-1.5-flash-latest",
        temperature=0.1,
        convert_system_message_to_human=True
    )
    
    # Define the structured output parser
    parser = JsonOutputParser(pydantic_object=None)
    
    # Create the prompt template
    prompt_template = ChatPromptTemplate.from_template(
        "Analyze the following customer pain point description and extract:\n"
        "1. CURRENT problems the user is experiencing (list of strings)\n"
        "2. DESIRED outcomes they want to achieve (list of strings)\n\n"
        "Format output as JSON with exactly these keys: 'current_problems', 'desired_outcomes'\n\n"
        "Pain point description: {pain_point}\n\n"
        "Guidelines:\n"
        "- Return empty lists if no relevant items found\n"
        "- Never add explanations or additional text\n"
        "- Keep items concise (3-5 words each)\n"
        "- Extract verbatim phrases when possible\n"
    )
    
    # Create and execute the processing chain
    chain = prompt_template | llm | parser
    try:
        result = chain.invoke({"pain_point": pain_point_text})
        # Validate the result structure
        if not all(key in result for key in ["current_problems", "desired_outcomes"]):
            raise ValueError("Invalid output structure from LLM")
        return result
    except Exception as e:
        # Fallback to empty structure on error
        return {"current_problems": [], "desired_outcomes": []}

def match_features_to_pain_points(
    deconstructed: Dict[str, List[str]], 
    retriever: BaseRetriever
) -> Dict[str, List[Dict[str, str]]]:
    """
    Matches problems and outcomes to product features using semantic search
    
    Args:
        deconstructed: Output from deconstruct_pain_point
        retriever: Vector store retriever instance
        
    Returns:
        Dictionary where keys are problems/outcomes and values are feature matches
    """
    # Combine all problems and outcomes
    all_terms = deconstructed["current_problems"] + deconstructed["desired_outcomes"][:1]
    results = {}
    
    for term in all_terms:
        try:
            # Retrieve relevant documents
            docs = retriever.get_relevant_documents(term)
            matches = []
            for doc in docs:
                matches.append({
                    "feature_name": doc.metadata["feature_name"],
                    "feature_id": doc.metadata["feature_id"],
                    "score_context": doc.page_content  # The full document content
                })
            results[term] = matches
        except Exception as e:
            # Continue processing other terms on error
            results[term] = []
            continue
            
    return results

In [26]:
from typing import Dict, List, Optional
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate

def select_best_feature(
    pain_point_text: str,
    deconstructed: Dict[str, List[str]],
    matches: Dict[str, List[Dict[str, str]]],
    max_candidates: int = 10
) -> Optional[Dict[str, str]]:
    """
    Selects the best feature that addresses the overall pain point.
    
    Args:
        pain_point_text: Original pain point description
        deconstructed: Output from deconstruct_pain_point
        matches: Output from match_features_to_pain_points
        max_candidates: Maximum features to consider
        
    Returns:
        Dictionary with best feature details and reasoning, or None if no matches
    """
    # Collect all unique features from matches
    seen_features = set()
    candidate_features = []
    
    for term, feature_list in matches.items():
        for feature in feature_list:
            if feature["feature_id"] not in seen_features:
                seen_features.add(feature["feature_id"])
                candidate_features.append({
                    "feature_id": feature["feature_id"],
                    "feature_name": feature["feature_name"],
                    "context": feature["score_context"]
                })
                # Limit number of candidates
                if len(candidate_features) >= max_candidates:
                    break
    
    if not candidate_features:
        return None
    
    # Prepare LLM for decision making
    llm = ChatGoogleGenerativeAI(
        model="gemini-1.5-flash-latest",
        temperature=0.2,
        convert_system_message_to_human=True
    )
    
    # Format candidate features for prompt
    candidates_str = "\n\n".join([
        f"Feature ID: {c['feature_id']}\n"
        f"Name: {c['feature_name']}\n"
        f"Details:\n{c['context']}"
        for c in candidate_features
    ])
    
    # Create decision-making prompt
    prompt = ChatPromptTemplate.from_template(
        "As a product recommendation expert, select the single best feature that addresses "
        "the customer's overall pain point. Consider both current problems and desired outcomes.\n\n"
        "**Customer Pain Point**:\n{pain_point}\n\n"
        "**Key Elements**:\n"
        "- Current Problems: {current_problems}\n"
        "- Desired Outcomes: {desired_outcomes}\n\n"
        "**Candidate Features**:\n{candidates}\n\n"
        "**Instructions**:\n"
        "1. Choose ONE feature that best solves the overall situation\n"
        "2. Consider comprehensiveness, relevance, and impact\n"
        "3. Output JSON with: 'feature_id', 'feature_name', and 'reason'\n"
        "4. 'reason' should be a concise 1-2 sentence explanation\n"
        "5. If no good match exists, set feature_id to null\n\n"
        "Output ONLY valid JSON. Example:\n"
        "{{\"feature_id\": \"F123\", \"feature_name\": \"Smart Routing\", \"reason\": \"Explanation...\"}}"
    )
    
    # Create and execute the chain
    chain = prompt | llm | JsonOutputParser()
    
    try:
        result = chain.invoke({
            "pain_point": pain_point_text,
            "current_problems": ", ".join(deconstructed.get("current_problems", []) or "None identified"),
            "desired_outcomes": ", ".join(deconstructed.get("desired_outcomes", []) or "No specific goals"),
            "candidates": candidates_str
        })
        # Handle null selection
        if not result.get("feature_id"):
            return None
        return result
    except Exception as e:
        # Log error in production (placeholder)
        print(f"Feature selection error: {str(e)}")
        return None

In [None]:
def evaluate_outcomes(
    current_problems: List[str],
    desired_outcomes: List[str],
    best_feature: Dict[str, Any],
    retriever: BaseRetriever,
    find_additional_features: bool = True  # Thêm parameter mới
) -> Dict[str, Any]:
    """
    Evaluates whether the recommended feature resolves current problems and achieves desired outcomes.
    Also finds additional features for unresolved issues.
    
    Args:
        current_problems: List of current problems from pain point analysis
        desired_outcomes: List of desired outcomes from pain point analysis
        best_feature: Dictionary containing selected feature details (from select_best_feature)
        retriever: Vector store retriever for feature context
        find_additional_features: Whether to find additional features for unresolved issues
        
    Returns:
        Evaluation results with resolution status and additional features for unresolved issues
    """
    # Handle case where no feature was recommended
    if best_feature is None:
        return {
            "overall_status": "no_feature_recommended",
            "problem_resolution": [{"problem": p, "status": "not_resolved"} for p in current_problems],
            "outcome_achievement": [{"outcome": o, "status": "not_achieved"} for o in desired_outcomes],
            "resolved_items": {
                "problems": [],
                "outcomes": [],
                "unresolved_items": [
                    {"item": p, "type": "problem", "status": "not_resolved"} for p in current_problems
                ] + [
                    {"item": o, "type": "outcome", "status": "not_achieved"} for o in desired_outcomes
                ]
            },
            "additional_features": {}
        }

    # Get full feature context
    feature_id = best_feature["feature_id"]
    try:
        docs = retriever.get_relevant_documents(f"Feature ID: {feature_id}")
        feature_context = docs[0].page_content if docs else "No context available"
    except Exception:
        feature_context = "No context available"

    # Prepare LLM for evaluation
    llm = ChatGoogleGenerativeAI(
        model="gemini-1.5-flash-latest",
        temperature=0,
        convert_system_message_to_human=True
    )
    
    # Create evaluation prompt
    prompt_template = ChatPromptTemplate.from_template(
        "EVALUATE how well the recommended feature addresses the user's problems and desired outcomes.\n\n"
        "**Recommended Feature**\n"
        "ID: {feature_id}\n"
        "Name: {feature_name}\n"
        "Details:\n{feature_context}\n\n"
        "**User's Current Problems**\n{problems}\n\n"
        "**User's Desired Outcomes**\n{outcomes}\n\n"
        "**Evaluation Tasks**\n"
        "1. For EACH current problem:\n"
        "   - Determine if feature Does or DOES NOT resolve it\n"
        "   - Provide 1-sentence technical justification\n"
        "2. For EACH desired outcome:\n"
        "   - Determine if feature DOES or DOES NOT achieve it\n"
        "   - Provide 1-sentence technical justification\n"
        "3. Give overall resolution status\n\n"
        "**Output Format**\n"
        "{{"
        "\"overall_status\": \"resolved\"  | \"not_resolved\","
        "\"problem_resolution\": ["
        "   {{\"problem\": \"text\", \"status\": \"resolved\"  | \"not_resolved\", \"reason\": \"explanation\"}}"
        "],"
        "\"outcome_achievement\": ["
        "   {{\"outcome\": \"text\", \"status\": \"achieved\"  | \"not_achieved\", \"reason\": \"explanation\"}}"
        "]}}"
    )
    
    # Build and execute chain
    chain = prompt_template | llm | JsonOutputParser()
    
    try:
        result = chain.invoke({
            "feature_id": feature_id,
            "feature_name": best_feature["feature_name"],
            "feature_context": feature_context,
            "problems": "\n- ".join(current_problems),
            "outcomes": "\n- ".join(desired_outcomes)
        })
        
        # Thêm logic đánh dấu resolved items
        resolved_items = {
            "problems": [],
            "outcomes": [],
            "unresolved_items": []
        }
        
        # Đánh dấu problems
        if "problem_resolution" in result:
            for problem_item in result["problem_resolution"]:
                if problem_item.get("status") == "resolved":
                    resolved_items["problems"].append({
                        "item": problem_item["problem"],
                        "status": "resolved",
                        "reason": problem_item.get("reason", "Successfully addressed")
                    })
                else:
                    resolved_items["unresolved_items"].append({
                        "item": problem_item["problem"],
                        "type": "problem",
                        "status": problem_item.get("status", "not_resolved"),
                        "reason": problem_item.get("reason", "Not addressed")
                    })
        
        # Đánh dấu outcomes
        if "outcome_achievement" in result:
            for outcome_item in result["outcome_achievement"]:
                if outcome_item.get("status") == "achieved":
                    resolved_items["outcomes"].append({
                        "item": outcome_item["outcome"],
                        "status": "achieved",
                        "reason": outcome_item.get("reason", "Successfully achieved")
                    })
                else:
                    resolved_items["unresolved_items"].append({
                        "item": outcome_item["outcome"],
                        "type": "outcome",
                        "status": outcome_item.get("status", "not_achieved"),
                        "reason": outcome_item.get("reason", "Not achieved")
                    })
        
        # Tìm additional features cho unresolved items nếu được yêu cầu
        additional_features = {}
        if find_additional_features and resolved_items["unresolved_items"]:
            for unresolved_item in resolved_items["unresolved_items"]:
                item_text = unresolved_item["item"]
                try:
                    docs = retriever.get_relevant_documents(item_text)
                    matches = []
                    for doc in docs:
                        matches.append({
                            "feature_id": doc.metadata["feature_id"],
                            "feature_name": doc.metadata["feature_name"],
                            "relevance_score": "high" if item_text.lower() in doc.page_content.lower() else "medium",
                            "context": doc.page_content
                        })
                    additional_features[f"{unresolved_item['type']}_{item_text}"] = matches
                except Exception as e:
                    additional_features[f"{unresolved_item['type']}_{item_text}"] = []
        
        # Thêm thông tin mới vào result
        result["resolved_items"] = resolved_items
        result["additional_features"] = additional_features
        result["summary"] = {
            "total_unresolved": len(resolved_items["unresolved_items"]),
            "problems_resolved": len(resolved_items["problems"]),
            "outcomes_achieved": len(resolved_items["outcomes"]),
            "problems_unresolved": len([item for item in resolved_items["unresolved_items"] if item["type"] == "problem"]),
            "outcomes_unachieved": len([item for item in resolved_items["unresolved_items"] if item["type"] == "outcome"])
        }
        
        return result
        
    except Exception as e:
        # Fallback evaluation
        return {
            "overall_status": "evaluation_failed",
            "problem_resolution": [{"problem": p, "status": "unknown"} for p in current_problems],
            "outcome_achievement": [{"outcome": o, "status": "unknown"} for o in desired_outcomes],
            "resolved_items": {
                "problems": [],
                "outcomes": [],
                "unresolved_items": [
                    {"item": p, "type": "problem", "status": "unknown"} for p in current_problems
                ] + [
                    {"item": o, "type": "outcome", "status": "unknown"} for o in desired_outcomes
                ]
            },
            "additional_features": {},
            "summary": {
                "total_unresolved": len(current_problems) + len(desired_outcomes),
                "problems_resolved": 0,
                "outcomes_achieved": 0,
                "problems_unresolved": len(current_problems),
                "outcomes_unachieved": len(desired_outcomes)
            }
        }

In [28]:
# Full workflow example
pain_point = "Our support agents are overwhelmed by the high volume of repetitive questions and i want to automate responses to common queries."
# Stage 1: Deconstruct pain point
deconstructed = deconstruct_pain_point(pain_point)
deconstructed



{'current_problems': ['overwhelmed support agents',
  'high volume questions',
  'repetitive questions'],
 'desired_outcomes': ['automate responses', 'automate common queries']}

In [29]:
print(f"Deconstructed pain point: {deconstructed}")
# Stage 2: Match features to pain point components
matches = match_features_to_pain_points(deconstructed, retriever)
matches
# Stage 3: Select best overall feature
best_feature = select_best_feature(
    pain_point_text=pain_point,
    deconstructed=deconstructed,
    matches=matches
)
best_feature


Deconstructed pain point: {'current_problems': ['overwhelmed support agents', 'high volume questions', 'repetitive questions'], 'desired_outcomes': ['automate responses', 'automate common queries']}




{'feature_id': 'SERVICE_AI_INBOX',
 'feature_name': 'AI Customer Service - AI Inbox',
 'reason': 'This feature directly addresses the high volume of repetitive questions by enabling AI-assisted responses, thus alleviating the burden on support agents and improving response times.  Its omnichannel capabilities further enhance efficiency.'}

In [34]:
# Stage 4: Evaluate outcomes
evaluation = evaluate_outcomes(
    current_problems=deconstructed["current_problems"],
    desired_outcomes=deconstructed["desired_outcomes"],
    best_feature=best_feature,
    retriever=retriever
)

evaluation



{'overall_status': 'partially_resolved',
 'problem_resolution': [{'problem': 'overwhelmed support agents - high volume questions',
   'status': 'partially_resolved',
   'reason': "The AI agent can handle some of the high volume questions, reducing the load on human agents, but it doesn't eliminate the high volume entirely."},
  {'problem': 'overwhelmed support agents - repetitive questions',
   'status': 'resolved',
   'reason': 'The AI agent can be trained to automatically answer repetitive questions, freeing up human agents to focus on more complex issues.'}],
 'outcome_achievement': [{'outcome': 'automate responses - automate common queries',
   'status': 'achieved',
   'reason': 'The AI agent within the omnichannel inbox is designed to automate responses to common queries, as described in the feature details.'}]}