### Experimentation to take list of messages and group similarity


In [74]:
import os

from haystack import Pipeline, component
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack_integrations.components.generators.ollama import OllamaGenerator
from haystack.dataclasses import ChatMessage
from typing import Optional, List, Dict
from pydantic import BaseModel, ConfigDict
from pprint import pprint
import json

In [75]:
messages = [
    "How do I reset my password?",
    "I forgot my password, how can I recover it?",
    "What is the process to change my password?",
    "Can you help me with my account recovery?",
    "How do I update my profile information?",
    "I need to change my email address on my account.",
    "What are the store hours for the weekend?",
    "Is the store open on holidays?",
    "Can I return an item without a receipt?",
    "What is the return policy for online purchases?",
    "How do I track my order?",
    "My order hasn't arrived yet, what should I do?",
    "Can I change the shipping address for my order?",
    "How do I apply for a job at your company?",
    "Are there any job openings in the marketing department?",
    "What benefits do you offer to employees?",
    "How do I schedule an appointment?",
    "Can I reschedule my appointment online?",
    "What documents do I need to bring to my appointment?",
    "How do I cancel my subscription?",
    "What are the subscription plans available?",
    "Can I upgrade my subscription plan?",
    "How do I contact customer support?",
    "Is there a live chat option for customer support?",
    "What is the phone number for customer support?",
    "How do I download the mobile app?",
    "Is the mobile app available for both iOS and Android?",
    "How do I report a bug in the mobile app?",
    "Can I use the mobile app to make payments?",
    "What payment methods are accepted?",
    "How do I add a new payment method?",
    "Can I set up automatic payments?",
    "How do I delete my account?",
    "What happens to my data if I delete my account?",
    "Can I reactivate my account after deleting it?",
    "How do I change my notification settings?",
    "Can I turn off email notifications?",
    "How do I enable push notifications?",
    "What is the privacy policy of your company?",
    "How do you handle customer data?",
    "What security measures are in place to protect my information?"
]

In [76]:
# Load environment variable
my_env_var = os.getenv('GROQ_API')
print(my_env_var)

None


In [77]:
class Answer(BaseModel):
    groups: List[List[str]]
    model_config = ConfigDict(extra='forbid')

json_schema = Answer.model_json_schema()
pprint(json_schema)

@component
class OutputValidator:

    def __init__(self, pydantic_model: BaseModel):
        self.pydantic_model = pydantic_model
    
    @component.output_types(valid_replies = List[str], invalid_replies = Optional[List[str]], error_msg = Optional[str])
    def run(self, replies: List[str]):
        try:
            output_dict = json.loads(replies[0])
            print('replies = ', output_dict)
            self.pydantic_model.model_validate(output_dict)
            print('[OK] valid')
            return {'valid_replies': replies}
        except ValueError as e:
            print('[not OK] wrong format')
            print(replies)
            return {'invalid_replies': replies, 'error_msg': str(e)}

def extract_valid_replies(res):
    return json.loads(res['json_validator']['valid_replies'][0])

{'additionalProperties': False,
 'properties': {'groups': {'items': {'items': {'type': 'string'},
                                     'type': 'array'},
                           'title': 'Groups',
                           'type': 'array'}},
 'required': ['groups'],
 'title': 'Answer',
 'type': 'object'}


In [None]:
# template = '''
# Given the following messages: {{messages}}
# for each message, classifiy them into groupid where each groupid is a list of similar messages.
# Must strictly follow this JSON schema, only return the actual instances without any additional schema definition : {{schema}}
# Only use the original messages, do not add any extra characters like newline 

# {% if invalid_replies and error_message %}
#   This is your previous answer: {{invalid_replies}}
#   Fix the error Python exception: {{error_message}}
#   Return the corrected output without any extra explanations.
# {% endif %}
# '''

template = '''
  Given the following messages:
  {{messages}}
  Rephrase these messages into 1 message that clarify the question and keep it consise. Just the final message with no explanation and extra information

'''
MODEL = 'llama3.2:3b'
MAX_RUN = 10

#component
prompt_template = PromptBuilder(template=template)
llm = OllamaGenerator(model = MODEL, url="http://localhost:11434") 

#build pipeline
pipe = Pipeline(max_runs_per_component=MAX_RUN)
pipe.add_component('prompt', prompt_template)
pipe.add_component('llm', llm)

#make connection
pipe.connect('prompt', 'llm')

#run pipeline

res = pipe.run({'prompt': {'messages': messages[:10]}})

print(res['llm']['replies'])



['"How can I request assistance with resetting or recovering my account information, such as password, profile, email address, or returns?"']


['"How can I recover my account or access my profile/account-related information?"']
