# LLM Searching product sets Relevance Example

Jupyter notebook that will implement LLM-Based Search Function based on a prodyc catelog and refine the search based on relevance




# Setup

This jupyter notebook was build on Colab. You need to install the following libraries.

In [None]:
# install required dependencies
! pip install OpenAI

Collecting OpenAI
  Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━[0m [32m71.7/76.5 kB[0m [31m2.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: OpenAI
Successfully installed OpenAI-0.28.0


# Get OpenAi Key

In [None]:
# Imports for our environment and accessing Astra DB
import os

# Ask user for OpenAI API Key.
from getpass import getpass
OPENAIKEY = getpass('Please enter your OpenAI API Key: ')

Please enter your OpenAI API Key: ··········


# Define data

In [None]:
products = [
    {"id": 1, "name": "Laptop", "description": "High-performance gaming laptop", "popularity": 5},
    {"id": 2, "name": "Mobile Phone", "description": "Latest model with OLED screen", "popularity": 9},
    {"id": 3, "name": "Headphones", "description": "Noise-cancelling over-ear headphones", "popularity": 8},
    {"id": 4, "name": "Camera", "description": "DSLR with 24.2 megapixels", "popularity": 6},
    {"id": 5, "name": "Smartwatch", "description": "Water-resistant with heart rate monitor", "popularity": 7},
    {"id": 6, "name": "Bluetooth Speaker", "description": "Portable speaker with deep bass", "popularity": 7},
    {"id": 7, "name": "XBOX Console", "description": "Next-gen  console with 4K support", "popularity": 9},
    {"id": 8, "name": "Tablet", "description": "10-inch screen with stylus support", "popularity": 8},
    {"id": 9, "name": "E-reader", "description": "E-ink display for a better reading experience", "popularity": 6},
    {"id": 10, "name": "VR Headset", "description": "Immersive virtual reality experience", "popularity": 7},
    {"id": 11, "name": "Wireless Mouse", "description": "Ergonomic design with long battery life", "popularity": 7},
    {"id": 12, "name": "Keyboard", "description": "Mechanical keyboard with RGB backlight", "popularity": 7},
    {"id": 13, "name": "External Hard Drive", "description": "2TB storage with fast transfer rates", "popularity": 8},
    {"id": 14, "name": "USB Flash Drive", "description": "64GB storage in a compact design", "popularity": 6},
    {"id": 15, "name": "Chair", "description": "Highly comfortable gaming chair", "popularity": 2},
    {"id": 16, "name": "PS4 Console", "description": "games console ", "popularity": 8},
    {"id": 17, "name": "PS5 Console", "description": "Next-gen console with 4K support", "popularity": 9},

]



# Implement LLm Search

In [None]:
import openai
import re

# Set up OpenAI API key
openai.api_key = OPENAIKEY


def get_first_numeric(s):
    # Search for the first sequence of digits in the string
    match = re.search(r"\d+", s)

    # If found, convert to integer and return; otherwise, return None
    return int(match.group()) if match else None

def llm_search_score(query, product_description):
    prompt = f"How relevant is the product '{product_description}' to the search query '{query}'? Rate on a scale of 1 to 10."

    # Generate final response
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=100,
        n=1,
        stop=None,
        temperature=0.0,
   )

    score = get_first_numeric(response.choices[0].text.strip())
    # print(f"Relevance search on product description: {score}")
    return score


def llm_search(query):
    results = []
    for product in products:
        relevance = llm_search_score(query, product['description'])

        if relevance > 5:  # Filter to consider products with a relevance score greater than 5
            product['relevance'] = relevance
            results.append(product)
    return results



query = "gaming"
original_results = llm_search(query)

print("\nRelevant results from the LLM search based on word 'gaming' :\n")

for product in original_results:
    print("{:<15} - {:<50} | LLM Relevance Score: {:<15}".format(
        product['name'], product['description'], product['relevance']))



Relevant results from the LLM search based on word 'gaming' :

Laptop          - High-performance gaming laptop                     | LLM Relevance Score: 10             
XBOX Console    - Next-gen  console with 4K support                  | LLM Relevance Score: 9              
VR Headset      - Immersive virtual reality experience               | LLM Relevance Score: 9              
Keyboard        - Mechanical keyboard with RGB backlight             | LLM Relevance Score: 9              
Chair           - Highly comfortable gaming chair                    | LLM Relevance Score: 10             
PS4 Console     - games console                                      | LLM Relevance Score: 10             
PS5 Console     - Next-gen console with 4K support                   | LLM Relevance Score: 9              


# Rerank products

Rerank ased on combined popularity and features where the LLM score is weighted 50% and the product customer review weighted 50%

In [None]:
def llm_re_rank_score(query, product_description):
    prompt = f"Considering both popularity and features, how relevant is the product '{product_description}' to the search query '{query}'? Rate on a scale of 1 to 10."

    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=100,
        n=1,
        stop=None,
        temperature=0.0,
   )

    score = get_first_numeric(response.choices[0].text.strip())
    return score

def improved_search(query):
    # results = llm_search(query)
    results = original_results

    for product in results:
        # Re-rank based on combined popularity and features
        re_ranked_score = llm_re_rank_score(query, product['description'])
        product['score'] = re_ranked_score * 0.5 + product['popularity'] * 0.5

    # Sort products by the final score in descending order
    results.sort(key=lambda x: x['score'], reverse=True)
    return results


print("\nResults from the improved LLM search with reranking:\n")
improved_results = improved_search(query)

for product in improved_results:
    print("{:<15} - {:<30} | Score: {:<15}".format(
        product['name'], product['description'], product.get('score', 'N/A')))




Results from the improved LLM search with reranking:

XBOX Console    - Next-gen  console with 4K support | Score: 9.0            
PS4 Console     - games console                  | Score: 9.0            
PS5 Console     - Next-gen console with 4K support | Score: 9.0            
VR Headset      - Immersive virtual reality experience | Score: 8.0            
Keyboard        - Mechanical keyboard with RGB backlight | Score: 8.0            
Laptop          - High-performance gaming laptop | Score: 7.5            
Chair           - Highly comfortable gaming chair | Score: 5.5            


# Astra DB - Vector

How could Astra be used in this example

Create a Vector table in Astra i.e.  

 TABLE items
 {
   id int PRIMARY KEY,
   name TEXT,  
   item_vector VECTOR<FLOAT, 512>,
   popularity int,
 }

Create index on the table

Generate embeddings from the description and do similarity searc on query string

In [None]:
products = [
    {"id": 1, "name": "Laptop", "vector": [0.2, 0.4, 0.7], "popularity": 5},
    {"id": 2, "name": "Camera", "vector": [0.5, 0.1, 0.3], "popularity": 9},
    {"id": 3, "name": "Smartwatch", "vector": [0.6, 0.8, 0.2], "popularity": 8},
    {"id": 4, "name": "Tablet", "vector": [0.1, 0.5, 0.6], "popularity": 6},
]



response = openai.Embedding.create(
  input="gaming",
  model="text-embedding-ada-002"
)

print(response)


{
  "object": "list",
  "data": [
    {
      "object": "embedding",
      "index": 0,
      "embedding": [
        -0.006218219641596079,
        0.0016806430649012327,
        -0.0011076603550463915,
        -0.030542057007551193,
        -0.014198481105268002,
        -0.006259717047214508,
        -0.03064420446753502,
        -0.023953553289175034,
        0.016777701675891876,
        -0.06164592504501343,
        0.031946584582328796,
        0.012793955393135548,
        0.016496796160936356,
        0.0013143491232767701,
        0.0028010718524456024,
        0.003804988693445921,
        0.019484607502818108,
        0.000610889052040875,
        0.022523490712046623,
        -0.03414275124669075,
        -0.005723443813621998,
        0.003370862454175949,
        0.0031617796048521996,
        -0.013508986681699753,
        -0.011829939670860767,
        -0.02455366961658001,
        0.0038209492340683937,
        -0.03728378191590309,
        0.0009999268222600222,
      