In [2]:
# install the latest VertexAI and langchain package
! pip install google-cloud-aiplatform google-cloud-discoveryengine langchain[docarray]==0.0.305 pydantic==1.10.8 typing-inspect==0.8.0 typing_extensions==4.5.0 --upgrade --user

Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.34.0-py2.py3-none-any.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting google-cloud-discoveryengine
  Downloading google_cloud_discoveryengine-0.11.1-py3-none-any.whl (523 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m523.5/523.5 kB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain[docarray]==0.0.305
  Downloading langchain-0.0.305-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydantic==1.10.8
  Downloading pydantic-1.10.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-inspect==0.8.0
  Downloading t

In [1]:
# Authenticate with GCP

import os
import sys

if "google.colab" in sys.modules:
    from google.colab import auth as google_auth

    google_auth.authenticate_user()

In [2]:
# set environment variables

PROJECT_ID = "<project_id>"
SEARCH_ENGINE_ID = "<search_engine_id>"
REGION = "us-central1"
MODEL = "text-bison@001"

In [3]:
# init the Vertext LLM model - bison
import vertexai
from langchain.llms import VertexAI

vertexai.init(project=PROJECT_ID, location=REGION)
bison = VertexAI(model_name=MODEL, temperature = 0.0)

In [43]:
# Load the document data
from langchain.document_loaders import CSVLoader

file = '/content/catalog_1000.csv'
loader = CSVLoader(file_path=file, encoding='utf-8')
data = loader.load()

In [44]:
# Generate the Q&A pairs
from langchain.evaluation.qa import generate_chain

example_gen_chain = generate_chain.QAGenerateChain.from_llm(bison)

qa_pairs = example_gen_chain.apply(
    [{"doc": t} for t in data[11:16]]
)

# print(qa_pairs)

# Print the generated question-answer pairs
for item in qa_pairs:
    print(f"Question: {item['qa_pairs']['query']}")
    print(f"Answer: {item['qa_pairs']['answer']}")
    print()

Question: What is the name of the jacket that is suitable for light activity up to 20 degrees and moderate activity up to -30 degrees?
Answer: Ultra-Lofty 850 Stretch Down Hooded Jacket

Question: What is the name of the sheet set that is made of brushed cotton flannel?
Answer: CozyCloud Flannel Sheet Set

Question: What is the name of the product that is made of 85% wool and 15% nylon?
Answer: Men's Maine Explorer Zip-Front Jac-Shirt

Question: What is the name of the shirt that is made of 100% cotton?
Answer: Easy-Care Classic Stripe Short-Sleeve Shirt, Traditional Fit

Question: What is the weight of the kayak with the seat?
Answer: 38 lb.



In [45]:
examples = [
  {
    "qa_pairs": {
      "query": "Do the Cozy Comfort Pullover Set have side pockets?",
      "answer": "Yes"
    }
  },{
    "qa_pairs": {
        "query": "What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
  }
]

qa_pairs += examples

examples = []
for item in qa_pairs:
  examples.append({
    "query": item['qa_pairs']['query'],
    "answer": item['qa_pairs']['answer']
  })

In [46]:
print(examples)

[{'query': 'What is the name of the jacket that is suitable for light activity up to 20 degrees and moderate activity up to -30 degrees?', 'answer': 'Ultra-Lofty 850 Stretch Down Hooded Jacket'}, {'query': 'What is the name of the sheet set that is made of brushed cotton flannel?', 'answer': 'CozyCloud Flannel Sheet Set'}, {'query': 'What is the name of the product that is made of 85% wool and 15% nylon?', 'answer': "Men's Maine Explorer Zip-Front Jac-Shirt"}, {'query': 'What is the name of the shirt that is made of 100% cotton?', 'answer': 'Easy-Care Classic Stripe Short-Sleeve Shirt, Traditional Fit'}, {'query': 'What is the weight of the kayak with the seat?', 'answer': '38 lb.'}, {'query': 'Do the Cozy Comfort Pullover Set have side pockets?', 'answer': 'Yes'}, {'query': 'What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?', 'answer': 'The DownTek collection'}]


In [47]:
from langchain.chains import RetrievalQA
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.embeddings import VertexAIEmbeddings

# create the in memory VectorDB index
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=VertexAIEmbeddings()
).from_loaders([loader])

# Q&A chain
qa = RetrievalQA.from_chain_type(
    llm=bison,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

In [49]:
print(f"question: {examples[0]['query']}")
answer = qa.run(examples[0]["query"])
print(f"answer: {answer}")

question: What is the name of the jacket that is suitable for light activity up to 20 degrees and moderate activity up to -30 degrees?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
answer: Ultra-Lofty 850 Stretch Down Hooded Jacket


In [50]:
import langchain
langchain.debug = True

qa.run(examples[0]["query"])

# Turn off the debug mode
langchain.debug = False

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "What is the name of the jacket that is suitable for light activity up to 20 degrees and moderate activity up to -30 degrees?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is the name of the jacket that is suitable for light activity up to 20 degrees and moderate activity up to -30 degrees?",
  "context": ": 11\nname: Ultra-Lofty 850 Stretch Down Hooded Jacket\ndescription: This technical stretch down jacket from our DownTek collection is sure to keep you warm and comfortable with its full-stretch construction providing exceptional range of motion. With a slightly fitted style that falls at the hip and best with a midweight layer, this 

In [51]:
questions = []
for item in examples:
  questions.append({
    "query": item['query']
  })

predictions = qa.apply(questions)

print(f"\n{predictions}")



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m

[{'query': 'What is the name of the jacket that is suitable for light activity up to 20 degrees and moderate activity up to -30 degrees?', 'result': 'Ultra-Lofty 850 Stretch Down Hooded Jacket'}, {'query': 'What is the name of the sheet set that is made of brushed cotton flannel?', 'result': 'The Cosy Cushion Flannel Sheet Set, Stripe is made of brushed cotton flannel.'}, {'query': 'What is the name of the product that is made of 85% wool and 15% nylon?', 'result': 'The product that 

In [52]:
from langchain.evaluation.qa import QAEvalChain

eval_chain = QAEvalChain.from_llm(bison)

graded_outputs = eval_chain.evaluate(examples, predictions)

for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + examples[i]['query'])
    print("Real Answer: " + examples[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print()

Example 0:
Question: What is the name of the jacket that is suitable for light activity up to 20 degrees and moderate activity up to -30 degrees?
Real Answer: Ultra-Lofty 850 Stretch Down Hooded Jacket
Predicted Answer: Ultra-Lofty 850 Stretch Down Hooded Jacket
Predicted Grade: CORRECT

Example 1:
Question: What is the name of the sheet set that is made of brushed cotton flannel?
Real Answer: CozyCloud Flannel Sheet Set
Predicted Answer: The Cosy Cushion Flannel Sheet Set, Stripe is made of brushed cotton flannel.
Predicted Grade: INCORRECT

Example 2:
Question: What is the name of the product that is made of 85% wool and 15% nylon?
Real Answer: Men's Maine Explorer Zip-Front Jac-Shirt
Predicted Answer: The product that is made of 85% wool and 15% nylon is the Vista Wool Midweight Performance Crew, Print.
Predicted Grade: INCORRECT

Example 3:
Question: What is the name of the shirt that is made of 100% cotton?
Real Answer: Easy-Care Classic Stripe Short-Sleeve Shirt, Traditional Fit
