In [None]:
# install the latest VertexAI and langchain package
! pip install google-cloud-aiplatform google-cloud-discoveryengine langchain[docarray]==0.0.305 pydantic==1.10.8 typing-inspect==0.8.0 typing_extensions==4.5.0 --upgrade --user

Collecting docarray[hnswlib]<0.33.0,>=0.32.0 (from langchain[docarray]==0.0.305)
  Downloading docarray-0.32.1-py3-none-any.whl (215 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m215.3/215.3 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Collecting orjson>=3.8.2 (from docarray[hnswlib]<0.33.0,>=0.32.0->langchain[docarray]==0.0.305)
  Downloading orjson-3.9.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.7/138.7 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
Collecting types-requests>=2.28.11.6 (from docarray[hnswlib]<0.33.0,>=0.32.0->langchain[docarray]==0.0.305)
  Downloading types_requests-2.31.0.7-py3-none-any.whl (14 kB)
Collecting hnswlib>=0.6.2 (from docarray[hnswlib]<0.33.0,>=0.32.0->langchain[docarray]==0.0.305)
  Downloading hnswlib-0.7.0.tar.gz (33 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Prepa

In [None]:
import os
import sys

if "google.colab" in sys.modules:
    from google.colab import auth as google_auth

    google_auth.authenticate_user()

In [None]:
PROJECT_ID = "cloud-sandbox-danielw"
SEARCH_ENGINE_ID = "unstructured-datastore_1695333435366"
REGION = "us-central1"
MODEL = "text-bison@001"

In [None]:
# init the Vertext LLM model - bison
import vertexai
from langchain.llms import VertexAI

vertexai.init(project=PROJECT_ID, location=REGION)
bison = VertexAI(model_name=MODEL, temperature = 0.0)

In [None]:
# Load the document data
from langchain.document_loaders import CSVLoader

file = '/content/OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file, encoding='utf-8')
data = loader.load()

In [None]:
# Generate the Q&A pairs
from langchain.evaluation.qa import generate_chain

example_gen_chain = generate_chain.QAGenerateChain.from_llm(bison)

qa_pairs = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)

# Print the generated question-answer pairs
for item in qa_pairs:
    print(f"Question: {item['qa_pairs']['query']}")
    print(f"Answer: {item['qa_pairs']['answer']}")



Question: What is the name of the shoe that is made of soft canvas?
Answer: Women's Campside Oxfords
Question: What is the name of the dog mat?
Answer: Recycled Waterhog Dog Mat, Chevron Weave
Question: What is the name of the toddler's two-piece swimsuit?
Answer: Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece
Question: What is the name of the swimwear item?
Answer: Refresh Swimwear, V-Neck Tankini Contrasts
Question: What is the name of the pants that are made of 100% nylon?
Answer: EcoFlex 3L Storm Pants


In [None]:
examples = [
  {
    "qa_pairs": {
      "query": "Do the Cozy Comfort Pullover Set have side pockets?",
      "answer": "Yes"
    }
  },{
    "qa_pairs": {
        "query": "What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
  }
]

qa_pairs += examples

examples = []
for item in qa_pairs:
  examples.append({
    "query": item['qa_pairs']['query'],
    "answer": item['qa_pairs']['answer']
  })

In [None]:
print(examples)

[{'query': 'What is the name of the shoe that is made of soft canvas?', 'answer': "Women's Campside Oxfords"}, {'query': 'What is the name of the dog mat?', 'answer': 'Recycled Waterhog Dog Mat, Chevron Weave'}, {'query': "What is the name of the toddler's two-piece swimsuit?", 'answer': "Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece"}, {'query': 'What is the name of the swimwear item?', 'answer': 'Refresh Swimwear, V-Neck Tankini Contrasts'}, {'query': 'What is the name of the pants that are made of 100% nylon?', 'answer': 'EcoFlex 3L Storm Pants'}, {'query': 'Do the Cozy Comfort Pullover Set have side pockets?', 'answer': 'Yes'}, {'query': 'What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?', 'answer': 'The DownTek collection'}]


In [None]:
from langchain.chains import RetrievalQA
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.embeddings import VertexAIEmbeddings

# create the in memory VectorDB index
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=VertexAIEmbeddings()
).from_loaders([loader])

# Q&A chain
qa = RetrievalQA.from_chain_type(
    llm=bison,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

In [None]:
print(f"question: {examples[0]['qa_pairs']['query']}")
answer = qa.run(examples[0]['qa_pairs']["query"])
print(f"answer: {answer}")

question: Do the Cozy Comfort Pullover Set have side pockets?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
answer: Yes


In [None]:
import langchain
langchain.debug = True

qa.run(examples[0]['qa_pairs']["query"])

# Turn off the debug mode
langchain.debug = False

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Do the Cozy Comfort Pullover Set have side pockets?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Do the Cozy Comfort Pullover Set have side pockets?",
  "context": "Berber fleece-lined pockets keep hands warm. One pocket has a hidden security pocket tucked inside. Imported.<<<<>>>>>: 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farth

In [None]:
questions = []
for item in examples:
  questions.append({
    "query": item['query']
  })

predictions = qa.apply(questions)

print(f"\n{predictions}")



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m

[{'query': 'What is the name of the shoe that is made of soft canvas?', 'result': "I don't know the answer to this question."}, {'query': 'What is the name of the dog mat?', 'result': 'The name of the dog mat is Recycled Waterhog Dog Mat, Chevron Weave.'}, {'query': "What is the name of the toddler's two-piece swimsuit?", 'result': "Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece"}, {'query': 'What is the name of the swimwear item?', 'result': 'The name of the swimwear it

In [None]:
from langchain.evaluation.qa import QAEvalChain

eval_chain = QAEvalChain.from_llm(bison)

graded_outputs = eval_chain.evaluate(examples, predictions)

for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + examples[i]['query'])
    print("Real Answer: " + examples[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print()

Example 0:
Question: What is the name of the shoe that is made of soft canvas?
Real Answer: Women's Campside Oxfords
Predicted Answer: I don't know the answer to this question.
Predicted Grade: INCORRECT

Example 1:
Question: What is the name of the dog mat?
Real Answer: Recycled Waterhog Dog Mat, Chevron Weave
Predicted Answer: The name of the dog mat is Recycled Waterhog Dog Mat, Chevron Weave.
Predicted Grade: CORRECT

Example 2:
Question: What is the name of the toddler's two-piece swimsuit?
Real Answer: Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece
Predicted Answer: Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece
Predicted Grade: CORRECT

Example 3:
Question: What is the name of the swimwear item?
Real Answer: Refresh Swimwear, V-Neck Tankini Contrasts
Predicted Answer: The name of the swimwear item is Mix-It-Up Tankini Print.
Predicted Grade: INCORRECT

Example 4:
Question: What is the name of the pants that are made of 100% nylon?
Real Answer: EcoFlex 3