### setup

In [1]:
import socket
import re

PROJECT_ID = !(gcloud config get-value core/project)
PROJECT_ID = PROJECT_ID[0]

SVC_ACC = !(gcloud config get-value core/account)
SVC_ACC = SVC_ACC[0]

PROJECT_NUMBER=str(re.search(r'\d+', SVC_ACC).group())

LOCATION="us-central1"

UNIQUE_PREFIX = socket.gethostname()
UNIQUE_PREFIX = re.sub('[^A-Za-z0-9]+', '', UNIQUE_PREFIX)

BUCKET_NAME = f"{PROJECT_ID}-{UNIQUE_PREFIX}-{LOCATION}"

BUCKET_URI = f"gs://{BUCKET_NAME}"  # @param {type:"string"}

! gcloud config set project $PROJECT_ID
! gcloud storage buckets create {BUCKET_URI} --project={PROJECT_ID} --location={LOCATION}
! mkdir output

import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION, staging_bucket=BUCKET_URI)

Updated property [core/project].
Creating gs://my-project-0004-346516-pytorch112kagglewbi-us-central1/...
[1;31mERROR:[0m (gcloud.storage.buckets.create) HTTPError 409: Your previous request to create the named bucket succeeded and you already own it.
mkdir: cannot create directory ‘output’: File exists


### setup gemini models etc...

In [2]:
# from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_vertexai import VertexAI
from langchain_google_vertexai import VertexAIEmbeddings


# from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
import os 
from dotenv import load_dotenv
load_dotenv()

db_file_path='FAISS_Index'
embeddings = VertexAIEmbeddings('textembedding-gecko@latest')

def creation_of_vectorDB_in_local(loader):
    data = loader.load()
    db =FAISS.from_documents(data, embeddings)
    db.save_local(db_file_path)

def creation_FAQ_chain():
    db=FAISS.load_local(db_file_path, embeddings)
    retriever =db.as_retriever(score_threshold=0.7)
    
    # llm = ChatGoogleGenerativeAI(model="gemini-pro",temperature=0.2)

    # To use model
    llm = VertexAI(model_name="gemini-pro")

    prompt_temp="""Given the following context and a question, generate an answer based on this context only.
    In the answer try to provide as much text as possible from "response" section in the source document context without making much changes.
    If the answer is not found in the context, kindly state "This Question not Present in My Database." Don't try to make up an answer.
    CONTEXT: {context}
    QUESTION: {question}"""

    PROMPT = PromptTemplate(template=prompt_temp, input_variables=["context", "question"])
    chain = RetrievalQA.from_chain_type(llm=llm,chain_type="stuff", 
                                        retriever=retriever, 
                                        input_key="query", 
                                        return_source_documents=False,
                                        chain_type_kwargs={"prompt" : PROMPT})
    return chain


### connect to FAQ Datastore

In [3]:
#@title ### You will need to update these values

import vertexai
vertexai.init(project=PROJECT_ID, location=LOCATION)

DATA_STORE_ID = "faq-ds_1715661604491"  # @param {type:"string"}
DATA_STORE_LOCATION = "global"  # @param {type:"string"}

MODEL = "gemini-1.0-pro"  # @param {type:"string"}

if PROJECT_ID == "YOUR_PROJECT_ID" or DATA_STORE_ID == "YOUR_DATA_STORE_ID":
    raise ValueError(
        "Please set the PROJECT_ID, DATA_STORE_ID constants to reflect your environment."
    )

In [None]:
### 

In [None]:
from langchain.chains import RetrievalQA
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate

from langchain_google_vertexai import VertexAI
from langchain_google_community import VertexAISearchRetriever
from langchain_google_community import VertexAIMultiTurnSearchRetriever

In [None]:
llm = VertexAI(model_name=MODEL)

retriever = VertexAISearchRetriever(
    project_id=PROJECT_ID,
    location_id=DATA_STORE_LOCATION,
    data_store_id=DATA_STORE_ID,
    get_extractive_answers=True,
    max_documents=10,
    max_extractive_segment_count=1,
    max_extractive_answer_count=5,
)


In [21]:
search_query = "How do I track my package on the portal"  # @param {type:"string"}

retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever
)
retrieval_qa.invoke(search_query)

{'query': 'How do I track my package on the portal',
 'result': '## How to Track Your Package on the Portal\n\nBased on the provided information, there are two ways to track your package on the portal:\n\n1. **From the Ready to Ship list:** After completing the SED declaration, the item will be available in the **Ready to Ship list**. This list can be accessed through the following steps:\n    * Go to the **Package Summary** section (link: https://drive.google.com/file/d/14-ELQYg19D3ypTZDspqKnk6PcGucs8Qu/view?usp=drive_link).\n    * Click on the **SED** tab.\n    * Find the item you are looking for in the **Ready to Ship list**.\n    * Click on the **Tracking** link next to the item.\n\n2. **From the Package Summary:** You can also track your package directly from the **Package Summary** section. To do this:\n    * Go to the **Package Summary** section (link: https://drive.google.com/file/d/14-ELQYg19D3ypTZDspqKnk6PcGucs8Qu/view?usp=drive_link).\n    * Find the package you are looking 

In [22]:
import pandas as pd

# Replace 'your_file.csv' with the actual filename
df = pd.read_csv('Singpost_QnA_doc.csv')

questions_list = df["question"].tolist()

# print(questions_list)

['I need to update my email address', 'How do I get my package shipped', 'How do I navigate the members portal', 'How do I navigate the address section on the portal', 'How do I perform declaration on my package', 'What is SED and what does the customer need to do?', 'How do I navigate the package summary', 'How do I track my package on the portal', 'What kind of cases can CS Ops assist?', 'What kind of case can vPost FFPs assist with?', 'Which team can provide assistance for unclaimed vPost packages', 'Is bundling discount available?', 'What is VP ID']


In [42]:
df["all_ans_type1"] = [    retrieval_qa.invoke(quest_i) for quest_i in questions_list ]


In [43]:
# all_ans = [] 
# for quest_i in questions_list: 
#     ans = retrieval_qa.invoke(quest_i)
#     all_ans += ans
#     # print(ans)

### TYPE 2

In [44]:
retrieval_qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever
)

retrieval_qa_with_sources.invoke(search_query, return_only_outputs=False)

{'question': 'How do I track my package on the portal',
 'answer': "I apologize, but the text you've provided doesn't include instructions on how to track your package on the portal. Please provide more information or specific instructions on how to track your package on the portal, and I'd be happy to help!\n",
 'sources': ''}

In [27]:
# questions_list

['I need to update my email address',
 'How do I get my package shipped',
 'How do I navigate the members portal',
 'How do I navigate the address section on the portal',
 'How do I perform declaration on my package',
 'What is SED and what does the customer need to do?',
 'How do I navigate the package summary',
 'How do I track my package on the portal',
 'What kind of cases can CS Ops assist?',
 'What kind of case can vPost FFPs assist with?',
 'Which team can provide assistance for unclaimed vPost packages',
 'Is bundling discount available?',
 'What is VP ID']

In [46]:
df["all_ans_type2"] = [    retrieval_qa_with_sources.invoke(quest_i) for quest_i in questions_list ]


In [47]:
df

Unnamed: 0,question,answer,all_ans_type2,all_ans_type1
0,I need to update my email address,As each account is tied to a unique email addr...,{'question': 'I need to update my email addres...,"{'query': 'I need to update my email address',..."
1,How do I get my package shipped,Your unique VP ID that forms part of your over...,{'question': 'How do I get my package shipped'...,"{'query': 'How do I get my package shipped', '..."
2,How do I navigate the members portal,Refer to the following image(s) https://drive....,{'question': 'How do I navigate the members po...,{'query': 'How do I navigate the members porta...
3,How do I navigate the address section on the p...,Refer to the following image(s) https://drive....,{'question': 'How do I navigate the address se...,{'query': 'How do I navigate the address secti...
4,How do I perform declaration on my package,Refer to the following image(s) https://drive....,{'question': 'How do I perform declaration on ...,{'query': 'How do I perform declaration on my ...
5,What is SED and what does the customer need to...,SED refers to the United States Shipper’s Expo...,{'question': 'What is SED and what does the cu...,{'query': 'What is SED and what does the custo...
6,How do I navigate the package summary,Refer to the following image(s) https://drive....,{'question': 'How do I navigate the package su...,{'query': 'How do I navigate the package summa...
7,How do I track my package on the portal,Refer to the following image(s) https://drive....,{'question': 'How do I track my package on the...,{'query': 'How do I track my package on the po...
8,What kind of cases can CS Ops assist?,Item matched to wrong VP number (provide suppo...,{'question': 'What kind of cases can CS Ops as...,{'query': 'What kind of cases can CS Ops assis...
9,What kind of case can vPost FFPs assist with?,1. Request to match to correct owner / Item ta...,{'question': 'What kind of case can vPost FFPs...,{'query': 'What kind of case can vPost FFPs as...


In [41]:
# df["all_ans_type2"][0]

{'query': 'I need to update my email address',
 'result': "I'm sorry, but you cannot update the email address associated with your account. To change your email address, you will need to create a new account."}

### Type 3

In [54]:
from google.cloud import discoveryengine_v1alpha as discoveryengine


In [55]:
SEARCH_APP_LOCATION = "global"  # @param {type:"string"}
SEARCH_ENGINE_ID = "faq-app_1715661153113"  # @param {type:"string"}

In [56]:
search_query = "How do I get my package shipped?"


In [57]:
# Create a client using a regional endpoint
client = discoveryengine.SearchServiceClient(
    client_options=(
        ClientOptions(
            api_endpoint=f"{SEARCH_APP_LOCATION}-discoveryengine.googleapis.com"
        )
        if SEARCH_APP_LOCATION != "global"
        else None
    )
)

# The full resource name of the search app serving config
serving_config = f"projects/{PROJECT_ID}/locations/{SEARCH_APP_LOCATION}/collections/default_collection/engines/{SEARCH_ENGINE_ID}/servingConfigs/default_config"

response = client.search(
    discoveryengine.SearchRequest(
        serving_config=serving_config,
        query=search_query,
        page_size=10,
    )
)

In [58]:
response

SearchPager<results {
  id: "be88ac33cbb68d7b5309767940eb0989"
  document {
    name: "projects/255766800726/locations/global/collections/default_collection/dataStores/faq-ds_1715661604491/branches/0/documents/be88ac33cbb68d7b5309767940eb0989"
    id: "be88ac33cbb68d7b5309767940eb0989"
    struct_data {
      fields {
        key: "answer"
        value {
          string_value: "Your unique VP ID that forms part of your overseas address is very important as it helps us to identify your package. Please ensure it is in your address fdields as shown. In the event you are not bale to enter the VP ID as part of your name, you may indicate the VP ID in other parts of the address"
        }
      }
      fields {
        key: "question"
        value {
          string_value: "How do I get my package shipped"
        }
      }
    }
    derived_struct_data {
      fields {
        key: "extractive_answers"
        value {
          list_value {
            values {
              struct_value

### helper function -- just in case

In [None]:
# !gsutil cp  ./Singpost_QnA_doc.csv gs://my-project-0004-bucket02/llms