# Example of building a search system that combines PaLM API and Vector Servie using LangChain

## Preparation

In [None]:
pip install langchain==0.0.260 --user

## Prepare the product catalog data

In [1]:
!curl -OL https://github.com/GoogleCloudPlatform/python-docs-samples/raw/main/cloud-sql/postgres/pgvector/data/retail_toy_dataset.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 1351k  100 1351k    0     0  1935k      0 --:--:-- --:--:-- --:--:-- 1935k


In [16]:
from langchain.document_loaders import CSVLoader

loader = CSVLoader(
    file_path='./retail_toy_dataset.csv',
    csv_args={'delimiter': ','}
)

documents = loader.load()

In [9]:
print(documents[0].page_content)

product_id: 7e8697b5b7cdb5a40daf54caf1435cd5
crawl_timestamp: 2020-01-24 20:51:13 +0000
product_url: https://www.walmart.com/ip/Koplow-Games-Set-of-2-D12-12-Sided-Rock-Paper-Scissors-Game-Dice-White-with-Pink-Letters-13060/125007983
product_name: Koplow Games Set of 2 D12 12-Sided Rock, Paper, Scissors Game Dice - White with Pink Letters #13060
list_price: 3.56
sale_price: 3.56
brand: Koplow Games
item_number: 
gtin: 799443797461
package_size: 
category: Toys | Shop Toys by Price
postal_code: 
available: False


In [10]:
from langchain.embeddings import VertexAIEmbeddings

embeddings = VertexAIEmbeddings(model_name='textembedding-gecko')
initial_vector = embeddings.embed_documents([documents[0].page_content])

In [11]:
len(initial_vector[0]), initial_vector[0][:5]

(768,
 [-0.0012477353448048234,
  -0.03854205459356308,
  0.02505659870803356,
  0.01457118708640337,
  -0.011549930088222027])

In [12]:
import json
with open('initial_vector.json', 'w') as f:
    json.dump({'id': 'initial', 'embedding': initial_vector[0]}, f)

In [13]:
%%bash
PROJECT_ID=$(gcloud config get project)
BUCKET=$PROJECT_ID-embeddings
gsutil mb -l us-central1 gs://$BUCKET
gsutil cp initial_vector.json gs://$BUCKET

Creating gs://cloud-genai-app-403700-embeddings/...
Copying file://initial_vector.json [Content-Type=application/json]...
/ [1 files][ 16.6 KiB/ 16.6 KiB]                                                
Operation completed over 1 objects/16.6 KiB.                                     


## Storing information in Vector Store

In [14]:
from langchain.vectorstores import MatchingEngine

PROJECT_ID = !gcloud config get project

INDEX_ID = !gcloud ai indexes list \
  --region=us-central1 --format="value(name)" \
  --filter="displayName=retail-data" 2>/dev/null \
  | rev | cut -d "/" -f 1 | rev

INDEX_ENDPOINT_ID = !gcloud ai index-endpoints list --region=us-central1 \
  --format="value(name)" \
  --filter="displayName=retail-endpoint" 2>/dev/null \
  | rev | cut -d "/" -f 1 | rev

PROJECT_ID = PROJECT_ID[0]
INDEX_ID = INDEX_ID[0]
INDEX_ENDPOINT_ID = INDEX_ENDPOINT_ID[0]

vector_store = MatchingEngine.from_components(
    embedding=VertexAIEmbeddings(model_name='textembedding-gecko'),
    project_id=PROJECT_ID,
    region='us-central1',
    gcs_bucket_name='gs://{}-embeddings'.format(PROJECT_ID),
    index_id=INDEX_ID,
    endpoint_id=INDEX_ENDPOINT_ID
)

In [17]:
vector_store.add_documents(documents)

Updating MatchingEngineIndex index: projects/255962005384/locations/us-central1/indexes/7568022096865918976
Update MatchingEngineIndex index backing LRO: projects/255962005384/locations/us-central1/indexes/7568022096865918976/operations/3426995705883918336
MatchingEngineIndex index Updated. Resource name: projects/255962005384/locations/us-central1/indexes/7568022096865918976


['e8c7df93-3562-4b74-9300-696d5fb9dc14',
 '7f4a9ada-ff3a-458a-8f03-c4094abf096d',
 '87930bbd-2091-4ead-80e0-fafbd9bcf5c7',
 'ddf89102-d74d-489f-8439-add5aa35a1b9',
 'e4d29e3d-4138-4c7c-92b6-0899d2505b9b',
 '279238f5-ece5-4dc6-adff-8271ceb0604a',
 '8e8a040b-41a1-4002-82c3-a5baacb4eb33',
 '6c17cd2f-422f-441f-b867-40bfd8c1fb9c',
 '0a1558c2-d4b0-4ba8-971e-b994e1f4e653',
 '3d6849c2-0244-44be-b24d-3590fe012d13',
 'db332b66-d0ff-464a-baf5-77873f6ec753',
 '3f043d92-1888-4313-9d9b-f557dfa39889',
 '4ceef185-1557-474e-ad6d-fb6d61fcb1d6',
 '31b4068b-c169-4164-9dec-0fd5b0f882e2',
 'd143424f-a9fc-454e-a3b1-a8c88b5c27d1',
 'f828b348-7838-4cde-a34f-425b9f45fd6c',
 '3485cb47-12a2-453d-8604-09dac11c3b29',
 '8ff6e11e-fe7f-4435-aa01-fcf66b83ef3c',
 'd6cfe5f8-369e-4775-86ff-262cf5d62dde',
 'f21e6b83-51c8-4e21-8337-887f021e65f2',
 '3f276c5c-8058-40bc-b915-d019b69517de',
 '514aa371-772f-4564-8825-44aa83d7c8c3',
 '2a9a2402-bff5-4d64-80be-9dbe60d585b6',
 'c694d39c-fae1-4ff9-9a39-2a73b8a2da9c',
 '825b334a-4535-

In [18]:
result = vector_store.similarity_search('soccer')
print(result[0].page_content)

product_id: 39cee39aee26c1f821836ec38efe4666
crawl_timestamp: 2020-03-10 11:54:48 +0000
product_url: https://www.walmart.com/ip/Sport-Squad-Flux-Magnetic-Reversible-Soccer-Hockey-Tabletop-Multi-Sport-Game-Set-2ct-Soccer-Foosballs-2ct-Hockey-Pucks/554346135
product_name: Sport Squad Flux Magnetic Reversible Soccer & Hockey Tabletop Multi Sport Game Set, 2ct Soccer Foosballs, 2ct Hockey Pucks
description: The Sport Squad Flux is the latest innovation to combination gaming! Play soccer or reverse the playfield and play hockey! Control your players using the magnetic handles under the board and deliver perfect shots against your opponent! The Sport Squad Flux is the perfect miniature table top game for the entire family and measures 20" long, 12" wide, and 6" high. The Sport Squad Flux is also an excellent portable travel game for kids and adults! Make sure you grab a friend because the Flux requires two players for maximum fun! You will need to have a quick reaction time and focus to crus

## Construct the search agent

In [19]:
from langchain.llms import VertexAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

prompt_template = """The string enclosed in the backquotes (```) is product information.
From that information, please extract product_name field, product_url field, and description filed.
Your output should be in the following format.

Name: the product_name field
URL: the product_url field
Summary: short summary of the description field less than 100 words

Here's the product information: ```{context}```
"""

toy_search_llm = VertexAI(temperature=0.2, max_output_tokens=1024)
retriever = vector_store.as_retriever(search_kwargs={'include_metadata': True})
prompt = PromptTemplate(input_variables=['context'], template=prompt_template)

toy_search = RetrievalQA.from_llm(llm=toy_search_llm, prompt=prompt, retriever=retriever)

In [20]:
print(toy_search.run('sports'))

 Name: Sport Squad Flux Magnetic Reversible Soccer & Hockey Tabletop Multi Sport Game Set, 2ct Soccer Foosballs, 2ct Hockey Pucks
URL: https://www.walmart.com/ip/Sport-Squad-Flux-Magnetic-Reversible-Soccer-Hockey-Tabletop-Multi-Sport-Game-Set-2ct-Soccer-Foosballs-2ct-Hockey-Pucks/554346135
Summary: The Sport Squad Flux is the latest innovation to combination gaming! Play soccer or reverse the playfield and play hockey! Control your players using the magnetic handles under the board and deliver perfect shots against your opponent! 


In [21]:
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

description = 'useful for when you need to answer questions about toys. \
Input should be a comma-separated words, do not input a fully formed question.'

tools = [
    Tool(name='Retail Toy QA System',
         func=toy_search.run,
         description=description),
]

agent_llm = VertexAI(temperature=0.4, max_output_tokens=1024)
agent = initialize_agent(
    tools, llm=agent_llm, 
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

In [22]:
prompt_template = """You are a recommendation system that introduces the product that best matches the user's request.
The string enclosed in the backquotes (```) is the user's request. You must output only the product name and the url in the format as shown below.

Name: PRODUCT_NAME
URL: PRODUCT_URL

Here's the request: ```{query}```
"""

In [23]:
query = 'Can you suggest a birthday present for my 9-year-old daughter who loves sports?'
result = agent.run(prompt_template.format(query=query))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m The user is looking for a birthday present for their 9-year-old daughter who loves sports.
Action: Retail Toy QA System
Action Input: sports, 9 year old girl[0m
Observation: [36;1m[1;3m Name: Razor V17 Multi-Sport Youth Helmet, Satin Pink
URL: https://www.walmart.com/ip/Razor-V17-Multi-Sport-Youth-Helmet-Satin-Pink/8457151
Summary: The Razor V17 Youth Helmet is perfect for biking, skating, and scootering. It features 17 vents to keep your child's head cool and comfortable, and the sleek finish gives it a professional look. The side release buckles make it easy to get your child in and out of the helmet, and the adjustable fit system ensures a secure fit.

Name: Razor V17 Child Multi Sport Skateboard Scooter Kids Safety Helmet, Pink (2 Pack)
URL: https://www.walmart.com/ip/Razor-V17-Child-Multi-Sport-Skateboard-Scooter-Kids-Safety-Helmet-Pink-2-Pack/791564401
Summary: Keep your kids safe while they're skateboarding, scoote