In [84]:
from elasticsearch import Elasticsearch

In [85]:
es = Elasticsearch(
    "https://localhost:9200",
    basic_auth=("elastic","3TgCUfRLY3o7BXKYpWFs"),
    ca_certs="/home/isurika/downloads/elasticsearch-8.12.0/config/certs/http_ca.crt"
)
es.ping()

True

### Prepare the data

In [86]:
import pandas as pd

df = pd.read_csv("/mnt/d/University/repos/aws-chatbot/dataset/consolidated_data.csv").loc[:1500]
df.head()

Unnamed: 0,id,pattern,response
0,1,How can I run a web server on AWS?,"To run a web server on AWS, you can use Amazon..."
1,2,What AWS service should I use to host a scalab...,"For hosting scalable applications, consider us..."
2,3,How do I deploy a custom application on the cl...,"To deploy a custom application, use Amazon EC2..."
3,4,I need a dedicated environment for my applicat...,If you need dedicated resources for your appli...
4,5,What's the best way to get started with virtua...,Start with Amazon EC2. It offers a wide range ...


#### Check NA values


In [87]:
df.isna().value_counts()

id     pattern  response
False  False    False       1501
Name: count, dtype: int64

In [88]:
df.fillna("None", inplace=True)

### Convert the relevant field to Vector using BERT model

In [89]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-mpnet-base-v2')

In [90]:
df["ResponseVector"] = df["response"].apply(lambda x: model.encode(x))

In [91]:
df.head()

Unnamed: 0,id,pattern,response,ResponseVector
0,1,How can I run a web server on AWS?,"To run a web server on AWS, you can use Amazon...","[-0.00054931047, -0.05692641, -0.0006814774, 0..."
1,2,What AWS service should I use to host a scalab...,"For hosting scalable applications, consider us...","[-0.015269826, -0.012294186, -0.01100238, -0.0..."
2,3,How do I deploy a custom application on the cl...,"To deploy a custom application, use Amazon EC2...","[-0.012933653, -0.045452587, -0.0068273027, -0..."
3,4,I need a dedicated environment for my applicat...,If you need dedicated resources for your appli...,"[-0.009388072, -0.018510194, 0.0007667323, 0.0..."
4,5,What's the best way to get started with virtua...,Start with Amazon EC2. It offers a wide range ...,"[-0.059143938, -0.017358141, -0.0022847152, -0..."


In [92]:
es.ping()

True

### Create new index in ElasticSearch

In [93]:
from indexMapping import indexMapping

try:
    es.indices.create(index="all_patterns_v1", mappings=indexMapping) 
except Exception as e:
    pass

### Ingest the data into index

In [94]:
record_list = df.to_dict("records")

In [95]:
for record in record_list:
    try:
        es.index(index="all_patterns_1500", document=record, id=record["id"])
    except Exception as e:
        print(e)

In [96]:
es.count(index="all_patterns_1500")

ObjectApiResponse({'count': 1501, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

### Search the data

In [97]:
input_keyword = " Billing of Amazon EC2 systems begin and end?"
vector_of_input_keyword = model.encode(input_keyword)

query = {
    "field": "ResponseVector",
    "query_vector": vector_of_input_keyword,
    "k": 1,  # Set k to 1 to get only the top result
    "num_candidates": 1500,
}

res = es.knn_search(index="all_patterns_1500", knn=query, source=["pattern", "response"])
hits = res["hits"]["hits"]


if hits:
    best_match = hits[0]
    print("Best Matching Result:")
    print("Pattern:", best_match["_source"]["pattern"])
    print("Response:", best_match["_source"]["response"])
else:
    print("No matching results found.")


Best Matching Result:
Pattern: When does billing of my Amazon EC2 systems begin and end?
Response: Billing commences when Amazon EC2 initiates the boot sequence of an AMI instance. Billing ends when the instance terminates, which could occur through a web services command, by running "shutdown -h", or through instance failure. When you stop an instance, we shut it down but don't charge hourly usage for a stopped instance, or data transfer fees, but we do charge for the storage for any Amazon EBS volumes. To learn more, visit the AWS Documentation.


  res = es.knn_search(index="all_patterns_1500", knn=query, source=["pattern", "response"])


In [98]:
input_keyword = "Billing of Amazon EC2 systems begin and end?"
vector_of_input_keyword = model.encode(input_keyword)

query = {
    "field" : "ResponseVector",
    "query_vector" : vector_of_input_keyword,
    "k" : 3,
    "num_candidates" : 1500, 
}

res = es.knn_search(index="all_patterns_1500", knn=query , source=["pattern","response"])
res["hits"]["hits"]

  res = es.knn_search(index="all_patterns_1500", knn=query , source=["pattern","response"])


[{'_index': 'all_patterns_1500',
  '_id': '339',
  '_score': 0.90958804,
  '_ignored': ['response.keyword'],
  '_source': {'pattern': 'When does billing of my Amazon EC2 systems begin and end?',
   'response': 'Billing commences when Amazon EC2 initiates the boot sequence of an AMI instance. Billing ends when the instance terminates, which could occur through a web services command, by running "shutdown -h", or through instance failure. When you stop an instance, we shut it down but don\'t charge hourly usage for a stopped instance, or data transfer fees, but we do charge for the storage for any Amazon EBS volumes. To learn more, visit the AWS Documentation.'}},
 {'_index': 'all_patterns_1500',
  '_id': '340',
  '_score': 0.84812176,
  '_ignored': ['response.keyword'],
  '_source': {'pattern': 'What defines billable EC2 instance usage?',
   'response': 'Instance usages are billed for any time your instances are in a "running" state. If you no longer wish to be charged for your instance

In [99]:
rdf = df.sample(frac=0.01)

In [100]:
rdf.head()

Unnamed: 0,id,pattern,response,ResponseVector
1049,1050,When should I use Amazon RDS Blue/Green Deploy...,Amazon RDS Blue/Green Deployments allow you to...,"[0.0035824373, -0.0594007, -0.018656341, -0.05..."
686,687,How are compute resources assigned to an AWS L...,"In the AWS Lambda resource model, you choose t...","[0.023873782, -0.0793809, -0.019675193, 0.0186..."
969,970,How do I control the actions that my systems a...,You can control the actions that your AWS IAM ...,"[0.009978332, -0.012073057, -0.012865683, 0.03..."
1011,1012,How do I delete a read replica? Will it be del...,You can delete a read replica with a few steps...,"[0.017723927, 0.009811944, -0.02005789, 0.0690..."
31,32,I need to launch instances quickly without any...,On-Demand Instances provide the flexibility to...,"[0.010348905, -0.07074453, -0.024264289, 0.008..."


In [101]:
def search(input_keyword):
    # model = SentenceTransformer('all-mpnet-base-v2')
    # input_keyword = "Billing of Amazon EC2 systems begin and end?"
    vector_of_input_keyword = model.encode(input_keyword)

    query = {
        "field" : "ResponseVector",
        "query_vector" : vector_of_input_keyword,
        "k" : 3,
        "num_candidates" : 1500, 
    }

    res = es.knn_search(index="all_patterns_1500", knn=query , source=["pattern","response"])
    results = res["hits"]["hits"]

    return results




In [102]:
arr_of_actual_responses = rdf['response'].tolist()
arr_of_predicted_responses = []

for index, row in rdf.iterrows():
    result = search(row["pattern"])
    print(f"Pattern: {result[1]['_source']['pattern']} ")
    print(f"Response: {result[1]['_source']['response']}")
    arr_of_predicted_responses.append(result[1]['_source']['response'])

  res = es.knn_search(index="all_patterns_1500", knn=query , source=["pattern","response"])


Pattern: When should I use Amazon RDS Blue/Green Deployments? 
Response: Amazon RDS Blue/Green Deployments allow you to make safer, simpler, and faster database changes. Blue/Green Deployments are ideal for use cases such as major or minor version database engine upgrades, operating system updates, schema changes on green environments that do not break logical replication, like adding a new column at the end of a table, or database parameter setting changes. You can use Blue/Green Deployments to make multiple database updates at the same time using a single switchover. This allows you to stay current on security patches, improve database performance, and access newer database features with short, predictable downtime. 
Pattern: How does AWS Lambda isolate my code? 
Response: Each AWS Lambda function runs in its own isolated environment, with its own resources and file system view. AWS Lambda uses the same techniques as Amazon EC2 to provide security and separation at the infrastructure

Pattern: How do I control network access to my DB Instance(s)? 
Response: Visit the Security Groups section of the Amazon RDS User Guide to learn about the different ways to control access to your DB Instances. 
Pattern: Do I need to enable automatic backups on my DB instance before I can create read replicas? 
Response: Yes. Enable automatic backups on your source DB Instance before adding read replicas by setting the backup retention period to a value other than 0. Backups must remain enabled for read replicas to work. 
Pattern: I need to launch instances quickly without any upfront commitments. What EC2 option should I choose? 
Response: On-Demand Instances provide the flexibility to launch instances quickly without any upfront commitments. They are suitable for applications with variable workloads.
Pattern: Can I sell any RI on the EC2 RI Marketplace? 
Response: No, AWS prohibits the resale of RIs purchased as part of a discount program per AWS Service Terms. Any All Upfront, Parti

In [103]:
# print(rdf['response'].tolist())
print(arr_of_predicted_responses)

['Amazon RDS Blue/Green Deployments allow you to make safer, simpler, and faster database changes. Blue/Green Deployments are ideal for use cases such as major or minor version database engine upgrades, operating system updates, schema changes on green environments that do not break logical replication, like adding a new column at the end of a table, or database parameter setting changes. You can use Blue/Green Deployments to make multiple database updates at the same time using a single switchover. This allows you to stay current on security patches, improve database performance, and access newer database features with short, predictable downtime. ', 'Each AWS Lambda function runs in its own isolated environment, with its own resources and file system view. AWS Lambda uses the same techniques as Amazon EC2 to provide security and separation at the infrastructure and execution levels. ', 'Visit the Security Groups section of the Amazon RDS User Guide to learn about the different ways t

In [104]:
def precision_for_k(arr_of_actual_responses, arr_of_predicted_responses, k):
    sum = 0

    for i in range(len(arr_of_actual_responses)-1):
        if (arr_of_actual_responses[i] == arr_of_predicted_responses[i]) :
            
            sum += 1
        # else:
            # print(f"{arr_of_actual_responses[i]} !=== {arr_of_predicted_responses[i]}")
    precision = sum / k if k > 0 else 0
    print(f"precision : {precision}")

k = len(rdf.index)
precision_for_k(arr_of_actual_responses, arr_of_predicted_responses, k)
    

precision : 0.2
