In [10]:
from transformers import BloomTokenizerFast, BloomForQuestionAnswering, BloomForCausalLM, TrainingArguments, Trainer
import os
from dotenv import load_dotenv
from langchain.embeddings import HuggingFaceEmbeddings
import pinecone
from langchain.vectorstores import Pinecone
from langchain.chains import RetrievalQA
from langchain import HuggingFaceHub
from langchain.prompts import PromptTemplate
from config.config import BASE_DIR, DATA_DIR, EMBEDDING_MODEL_NAME, PINECONE_INDEX_NAME


In [2]:
load_dotenv()

True

In [11]:

# Save it into pinecone
API_KEY = os.environ.get("PINECONE_API_KEY")
YOUR_ENV = os.environ.get("PINECONE_ENVIRONMENT", "us-west4-gcp-free")
index_name = PINECONE_INDEX_NAME
OPENAI_API_KEY = os.environ.get("OPEN_AI_KEY")

In [12]:
pinecone.init(
    api_key=API_KEY,
    environment=YOUR_ENV
)

if len(pinecone.list_indexes()) == 0:
    pinecone.create_index(name=index_name, metric="cosine", shards=1, dimension=len(res[0]))

pinecone.describe_index(pinecone.list_indexes()[0])

IndexDescription(name='test-docs', metric='cosine', replicas=1, dimension=384.0, shards=1, pods=1, pod_type='p1', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')

In [13]:
text_field = "text"

index = pinecone.Index(index_name)

In [14]:
embeddings = HuggingFaceEmbeddings(model_name = EMBEDDING_MODEL_NAME)

In [15]:
vectorstore = Pinecone(index, embeddings.embed_query, text_field)

In [16]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# completion llm
llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    model_name='gpt-3.5-turbo',
    temperature=0.0
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

In [18]:
from langchain.chains import RetrievalQAWithSourcesChain

qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

In [40]:
query_NVIDIA = "What is NVIDIa?"


In [17]:
vectorstore.similarity_search(
    "NVIDIA",  # our search query
    k=3  # return 3 most relevant docs
)

[Document(page_content="The second phase of the cycle.\n\nIn this phase the numbers  N and  M are compared. If  N is twice  M, then (i) number POLY is extended by k bits, (ii) number  M is doubled, and (iii) number  N is set to 0. If  N is less than twice  M, nothing happens.\n\nThe left rule of the second phase doubles the number  M in the second and the third equation. The test ``Is  N equal to 2 M?'' therefore reduces to one (the first) equation. The fourth equation extend the number  POLY with k bits. The fifth and sixth equations set the number  N to 0.\n\nThe right rule is always applicable. If the right rule is used where the left rule was applicable, then the number  N will never be equal to\n\nin the rest of the derivation. Thus  POLY will not be extended any more.\n\nWe claim that the left rule appears\n\ntimes and the right rule O(n) times in a derivation for input of size n. Obviously, the number  POLY is\n\nwhen the number  SIZE is i.\n\nFrom AVG to HP\n\n\n\nAVG\n\nIn thi

In [41]:
response = qa_with_sources(query_NVIDIA)

In [44]:
response['answer'], response['sources']

('NVIDIA is not mentioned in the provided document.\nSOURCES:', '')

In [45]:
query_disambiguation = "How could i implement a disambiguation mechanism?"

In [46]:
response = qa_with_sources(query_disambiguation)

In [47]:
response['answer'], response['sources']

('To implement a disambiguation mechanism, you can use an algorithm that coordinates anaphora resolution and prepositional phrase (PP) disambiguation. The algorithm applies resolution rules based on the focusing approach to the conceptual representation and uses attachment rules to fill empty roles in the Conceptual Structures (CSs). The algorithm is applied sentence by sentence, and the resolution of an anaphor is postponed if it is preceded by an unattached preposition. The algorithm was developed in the context of the COBALT project and is described in detail in the paper "An Algorithm to Co-Ordinate Anaphora Resolution and PPS Disambiguation Process" by Azzam (1994).\n',
 '9502033.xml')

In [51]:
query_senior_living = "I have some questions about senior living, what are "

In [54]:
response = qa_with_sources(query_senior_living)

In [55]:
response['answer'], response['sources']

("The best way to get started with senior living is to have open conversations with friends, family, healthcare professionals, and senior living experts. They can guide and support you through the decision-making process. Additionally, you can consider factors such as whether household chores and daily tasks have become overwhelming, if you need assistance with personal care, and if you have safety concerns. It's important to note that senior living offers various lifestyle options that prioritize independence while providing necessary care and support. The affordability of senior living can vary depending on factors such as location, level of care required, amenities offered, and the specific community chosen. However, many people are surprised to learn that the cost of senior living is often lower than the cost of staying in their current homes. It's recommended to use a Cost Comparison Calculator to get a more accurate comparison. Senior living offers benefits such as socializing on

In [57]:
query_espanol = "Dime acerca de la vida de la tercera edad"

In [58]:
response = qa_with_sources(query_espanol)

In [59]:
response['answer'], response['sources']

("I don't know the answer to this question.\nSOURCES:", '')