In [None]:
%pip install -qU elasticsearch

In [5]:
from elasticsearch import Elasticsearch, helpers, exceptions
import json
import time
import os
from dotenv import load_dotenv
load_dotenv()

True

In [7]:
client = Elasticsearch(
    cloud_id=os.getenv("ELASTIC_SEARCH_CLOUD_ID"),
    api_key=os.getenv("ELASTIC_SEARCH_API_KEY"),
)

print(client.info())

{'name': 'instance-0000000000', 'cluster_name': '69f730990be14babb963969a8a27e740', 'cluster_uuid': 'KXh92WjYTomcj9zolDs4Vw', 'version': {'number': '8.15.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '1a77947f34deddb41af25e6f0ddb8e830159c179', 'build_date': '2024-08-05T10:05:34.233336849Z', 'build_snapshot': False, 'lucene_version': '9.11.1', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


In [9]:
# delete model if already downloaded and deployed
try:
    client.ml.delete_trained_model(model_id=".elser_model_2", force=True)
    print("Model deleted successfully, We will proceed with creating one")
except exceptions.NotFoundError:
    print("Model doesn't exist, but We will proceed with creating one")

# Creates the ELSER model configuration. Automatically downloads the model if it doesn't exist.
client.ml.put_trained_model(
    model_id=".elser_model_2", input={"field_names": ["text_field"]}
)

Model deleted successfully, We will proceed with creating one


ObjectApiResponse({'model_id': '.elser_model_2', 'model_type': 'pytorch', 'model_package': {'packaged_model_id': 'elser_model_2', 'model_repository': 'https://ml-models.elastic.co', 'minimum_version': '11.0.0', 'size': 438123914, 'sha256': '2e0450a1c598221a919917cbb05d8672aed6c613c028008fedcd696462c81af0', 'metadata': {}, 'tags': [], 'vocabulary_file': 'elser_model_2.vocab.json'}, 'created_by': 'api_user', 'version': '12.0.0', 'create_time': 1724010127309, 'model_size_bytes': 0, 'estimated_operations': 0, 'license_level': 'platinum', 'description': 'Elastic Learned Sparse EncodeR v2', 'tags': ['elastic'], 'metadata': {}, 'input': {'field_names': ['text_field']}, 'inference_config': {'text_expansion': {'vocabulary': {'index': '.ml-inference-native-000002'}, 'tokenization': {'bert': {'do_lower_case': True, 'with_special_tokens': True, 'max_sequence_length': 512, 'truncate': 'first', 'span': -1}}}}, 'location': {'index': {'name': '.ml-inference-native-000002'}}})

In [10]:
while True:
    status = client.ml.get_trained_models(
        model_id=".elser_model_2", include="definition_status"
    )

    if status["trained_model_configs"][0]["fully_defined"]:
        print("ELSER Model is downloaded and ready to be deployed.")
        break
    else:
        print("ELSER Model is downloaded but not ready to be deployed.")
    time.sleep(5)

ELSER Model is downloaded but not ready to be deployed.
ELSER Model is downloaded but not ready to be deployed.
ELSER Model is downloaded but not ready to be deployed.
ELSER Model is downloaded but not ready to be deployed.
ELSER Model is downloaded but not ready to be deployed.
ELSER Model is downloaded but not ready to be deployed.
ELSER Model is downloaded and ready to be deployed.


In [11]:
# Start trained model deployment if not already deployed
client.ml.start_trained_model_deployment(
    model_id=".elser_model_2", number_of_allocations=1, wait_for="starting"
)

while True:
    status = client.ml.get_trained_models_stats(
        model_id=".elser_model_2",
    )
    if status["trained_model_stats"][0]["deployment_stats"]["state"] == "started":
        print("ELSER Model has been successfully deployed.")
        break
    else:
        print("ELSER Model is currently being deployed.")
    time.sleep(5)

ELSER Model is currently being deployed.
ELSER Model is currently being deployed.
ELSER Model has been successfully deployed.


In [12]:
client.ingest.put_pipeline(
    id="elser-ingest-pipeline",
    description="Ingest pipeline for ELSER",
    processors=[
        {
            "inference": {
                "model_id": ".elser_model_2",
                "input_output": [
                    {"input_field": "complaint_what_happened", "output_field": "plot_embedding"}
                ],
            }
        }
    ],
)

ObjectApiResponse({'acknowledged': True})

In [13]:
client.indices.delete(index="complaints", ignore_unavailable=True)
client.indices.create(
    index="complaints",
    settings={"index": {"default_pipeline": "elser-ingest-pipeline"}},
    mappings={
        "properties": {
            "complaint_what_happened" :{"type": "text"},
            "plot_embedding": {"type": "sparse_vector"},
        }
    },
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'complaints'})

In [14]:
import psycopg2
from config import config
import json

In [15]:
connection = None
try:
    params = config()
    print("Connecting to database")
    connection = psycopg2.connect(**params)

    # Create cursor
    crsr = connection.cursor()
    crsr.execute("SELECT * FROM complaints")
    res = crsr.fetchall()
    for r in res:
        complaint = r[4]
        client.index(index="complaints", id=r[0], document=complaint)

    crsr.close()
except Exception as error:
    print(error)
finally:
    if connection is not None:
        connection.close()
        print("Database conn terminated")

Connecting to database


In [45]:
user_complaint = """My business is based in California, but I reside in Arizona. I planned a get-together and business meeting, and a family friend recommended a local pizzeria that offers such services. The owner requested the full amount upfront, plus a credit card on file. Since it was a referral, I didn’t think much of it. The deposit was supposed to be around XXXX. However, the problems began shortly after. A few days later, I noticed that I was charged approximately XXXX. When I contacted the owner via text message, he said he would look into it and get back to me, but the issue remained unresolved.

Frustrated, I asked to cancel the service altogether. XXXX or XXXX asked for a week to process the refund, and I agreed. Two weeks passed, and I ended up filing a dispute with my bank, providing them with the text messages that included information about the incorrect charge and the owner’s promise to refund me. However, the bank came back to me stating that I had actually been there and used the services.

I explained to the bank that I was in a different state at the time, but they sent me documents showing a receipt with a signature they claimed was mine. I informed them that the signature was not mine and that my signature on record with the bank is different. To this day, the issue remains unresolved and is still under investigation.

I have also filed a complaint with XXXX XXXX Office of Consumer Protection Agency, but I have yet to receive a response.

Your case number: XXXX

This issue is with Chase Bank XXXX

Please advise on what steps I can take next."""

response = client.search(
    index="complaints",
    size=3,
    query={
        "text_expansion": {
            "plot_embedding": {
                "model_id": ".elser_model_2",
                "model_text": user_complaint,
            }
        }
    },
)

related_ids = []

for hit in response["hits"]["hits"]:
    related_ids.append(hit["_id"])

  response = client.search(


In [46]:
related_records = []
connection = None

try:
    params = config()
    print("Connecting to database")
    connection = psycopg2.connect(**params)
    
    cursor = connection.cursor()
    cursor.execute("SELECT * FROM complaints WHERE _id IN %s", (tuple(related_ids),))
    related_records = cursor.fetchall()
    cursor.close()
    
except:
    print("error")
    
finally:
    if connection is not None:
        connection.close()
        print("Database conn terminated")

related_records_str = ""

for record in related_records:
    related_records_str += json.dumps(record[4]) + "\n"

print(related_records_str)    


Connecting to database
Database conn terminated
{"product": "Credit card", "complaint_what_happened": "On XX/XX/year>, I booked a hotel in XXXX through XXXX XXXX XXXX using my Chase Freedom Flex card. The transaction amounted to {$98.00} and was recorded under Trip ID XXXX. \n\nDue to a severe flight delay, I arrived in XXXX at XXXX XXXX  on XX/XX/year>. Given the significant disruption to my travel plans, I contacted XXXX customer service to inquire about canceling my hotel reservation and obtaining a refund. During this call, the customer service representative assured me that a refund was possible and initiated Case number XXXX to process my request. The representative informed me that I did not need to take any further action and that the case would be resolved within 48 hours. \n\nBased on this assurance, I did not check into the hotel and relied on the promise of a refund. However, after waiting beyond the promised timeframe without receiving any communication or updates, I conta

In [None]:
%pip install groq
from groq import Groq
groqClient = Groq(
    # This is the default and can be omitted
    api_key="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",
)

In [49]:
system_prompt = """Given a user's complaint as a regular text and a couple of stored similar complaints, categorize the 
current user's complaint using the following keys in the similar complaints: 'product', 'issue', 'subproduct', and 'subissue'.
Try as much as possible to use one of the given values of each of these keys only when they are related, otherwise you can form the best value for it.

Return specifically in the JSON format below:
{
    "product": str, 
    "complaint_what_happened": (The user's complaint)
    "issue": str, 
    "sub_product": str, 
    "sub_issue": str, 
}
 
"""

chat_completion = groqClient.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": system_prompt
        },
        {
            "role": "user",
            "content": 'Categorize this complaint: {0}\nUsing these similar complaints: {1}'.format(user_complaint, related_records_str)
        }
    ],
    model="llama3-8b-8192",
    response_format={"type": "json_object"}
)

print(chat_completion.choices[0].message.content)

{
    "product": "Credit card",
    "complaint_what_happened": "My business is based in California, but I reside in Arizona. I planned a get-together and business meeting, and a family friend recommended a local pizzeria that offers such services. The owner requested the full amount upfront, plus a credit card on file. Since it was a referral, I didn’t think much of it. However, the problems began shortly after. A few days later, I noticed that I was charged approximately XXXX. When I contacted the owner via text message, he said he would look into it and get back to me, but the issue remained unresolved.\n\nFrustrated, I asked to cancel the service altogether. XXXX or XXXX asked for a week to process the refund, and I agreed. Two weeks passed, and I ended up filing a dispute with my bank, providing them with the text messages that included information about the incorrect charge and the owner’s promise to refund me. However, the bank came back to me stating that I had actually been the