In [18]:
import os
os.getcwd()

import sys 
sys.path.append(os.path.abspath(os.path.join('../')))
from langchain import LLMChain
from langchain import SagemakerEndpoint
from langchain.prompts import PromptTemplate
from langchain.llms.sagemaker_endpoint import LLMContentHandler
import os
import json

from langchain.vectorstores import Pinecone

from langchain.chains import RetrievalQA

from langchain.chains.question_answering import load_qa_chain


from dotenv import load_dotenv
load_dotenv()




from config import *
from src.utils import connect_index

region = os.environ["AWS_REGION"]
endpoint_name = os.environ["SAGEMAKER_ENDPOINT_NAME"]
print(endpoint_name)

huggingface-pytorch-tgi-inference-2023-08-09-22-11-09-254


In [19]:
embeddings = CohereEmbeddings(model=COHERE_MODEL_NAME, cohere_api_key=COHERE_API_KEY)

In [20]:
index = connect_index(PINECONE_INDEX_NAME)
vectorstore = Pinecone(index, embeddings.embed_query, "text")

In [21]:
class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
        input_str = json.dumps({"inputs": prompt,
        "parameters" : {**model_kwargs}})
        return input_str.encode('utf-8')
    
    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json[0]["generated_text"]

In [22]:
template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum and keep the answer as concise as possible. 
Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:
"""
QA_CHAIN_PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])



In [23]:
content_handler = ContentHandler()

llm=SagemakerEndpoint(
     endpoint_name=endpoint_name, 
     region_name=region, 
     model_kwargs={"max_new_tokens": 200, "top_p": 0.9, "max_length": None, "temperature":1e-10},
     endpoint_kwargs={"CustomAttributes": 'accept_eula=true'},
     content_handler=content_handler,
     credentials_profile_name="fundamentl-ai"
 )

2023-08-09 18:01:38.840 Found credentials in shared credentials file: ~/.aws/credentials


In [24]:
chain = load_qa_chain(prompt=QA_CHAIN_PROMPT, llm=llm)

In [25]:
question = "What is the capital of France?"

In [26]:
from langchain.docstore.document import Document
example_doc_1 = """
Peter and Elizabeth took a taxi to attend the night party in the city. While in the party, Elizabeth collapsed and was rushed to the hospital.
Since she was diagnosed with a brain injury, the doctor told Peter to stay besides her until she gets well.
Therefore, Peter stayed with her at the hospital for 3 days without leaving.
"""

docs = [
    Document(
        page_content=example_doc_1,
    )
]

In [27]:
#qa = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever(), chain_type="stuff", return_source_documents=True, chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})

In [28]:
# prompt_template = """
# The following is a friendly conversation between a human and an AI. 
# The AI is talkative and provides lots of specific details from its context.
# If the AI does not know the answer to a question, it truthfully says it 
# does not know.
# {context}
# Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
# if not present in the document. 
# Solution:"""
# PROMPT = PromptTemplate(
#     template=prompt_template, input_variables=["context", "question"]
# )
# chain_type_kwargs = {"prompt": PROMPT}
qa_chain = RetrievalQA.from_chain_type(
    llm, 
    chain_type="stuff", 
    retriever=vectorstore.as_retriever(), 
    #chain_type_kwargs=chain_type_kwargs,
    return_source_documents=True
)

In [29]:
result = qa_chain({"query":"What is rainfall data?"})

ValueError: Error raised by inference endpoint: An error occurred (ValidationError) when calling the InvokeEndpoint operation: Endpoint huggingface-pytorch-tgi-inference-2023-08-09-22-11-09-254 of account 653844554812 not found.

In [None]:
result

{'query': 'What is rainfall data?',
  Document(page_content='Observational rainfall data is produced by measuring the amount of rainfall at precise locations, which are called measuring stations or gauges (e.g., the stars in Figure 1 below). Station data is typically obtained from national or regional meteorological services and should only be used in accordance with their guidance. Some of the advantages of station data are that it is accurate at the specific locations, it is collected frequently and it has been collected over the long-term, allowing for trend analysis. On the other hand, station data is only available for specific locations and is subject to bias due to wind, evaporation, and changes in measurement devices. Contact the local or regional meteorological services to learn whether station data is available, where the stations are located, how often the data is updated, and whether historical records are available.\n\nCENTRE FOR HUMANITARIAN DATA\n\n2\n\nRainfall can also

In [None]:
result['result'].split('\n')[6]

'CENTRE FOR HUMANITARIAN DATA'

In [None]:
result['source_documents']

 Document(page_content='Observational rainfall data is produced by measuring the amount of rainfall at precise locations, which are called measuring stations or gauges (e.g., the stars in Figure 1 below). Station data is typically obtained from national or regional meteorological services and should only be used in accordance with their guidance. Some of the advantages of station data are that it is accurate at the specific locations, it is collected frequently and it has been collected over the long-term, allowing for trend analysis. On the other hand, station data is only available for specific locations and is subject to bias due to wind, evaporation, and changes in measurement devices. Contact the local or regional meteorological services to learn whether station data is available, where the stations are located, how often the data is updated, and whether historical records are available.\n\nCENTRE FOR HUMANITARIAN DATA\n\n2\n\nRainfall can also be estimated using satellite or rada

In [None]:
qa_chain({"query": "What is Twitter?"})

{'query': 'What is Twitter?',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nEnguehard, C. 1994. Acquisition of terminology from colloquial texts. In Proceedings, Computational Linguistics for Speech and Handwriting Recognition (CLSHR).\n\nGrefenstette, G. 1994. Explorations in Automatic Thesaurus Discovery. Dordrecht, The Netherlands: Kluwer Academic Publisher.\n\nJacquemin, C. 1994. Recycling terms into a partial parser. In Proceedings, 4th Conference on Applied Natural Language Processing (ANLP'94),  113-118.\n\nLewis, D. D., and Croft, W. B. 1990. Term clustering of syntactic phrasess. In Proceedings, 13th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR'90),  385-404.\n\nResnik, P. 1993. Selection and Information : A Class-Based Approach to Lexical Relationships. Ph.D. thesis, University of Pennsylva