In [28]:
from langchain.prompts import PromptTemplate
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI
from chromadb.api.models.Collection import Collection
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI
from langchain.vectorstores.chroma import Chroma
from langchain.document_loaders.pdf import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import VectorStoreRetriever

In [5]:
def create_vector_database_from_pdf(pdf_path: str):
    """Creates a Vector Database on memory based on a pdf file and returns a Vector Store Retriever.

    Args:
        pdf_path: Path to pdf to convert into a Vector Database.

    Returns:
        VectorStoreRetriever with unstructured data.
    """

    # Load pdf and split it
    loader = PyPDFLoader(pdf_path)
    data = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 500,
        length_function=len,
        chunk_overlap=50
    )
    documents = text_splitter.split_documents(data)

    # Create Vector Store with Embeddings
    embedding_openai = OpenAIEmbeddings(model = "text-embedding-ada-002")
    vector_store = Chroma.from_documents(
        documents = documents,
        embedding = embedding_openai
    )

    # vector_store.persist()
    retriever = vector_store.as_retriever(search_kwargs = {"k":4})
    return retriever

retriever = create_vector_database_from_pdf("./src/frontend/tmp/atenttion_is_all_you_need.pdf")

In [24]:
template = '''
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know.
Don't try to make up an answer. The context could be in any language, but you will always answer in the language of the user's question.
{context}

Question: {question}
Answer:
'''

llm = ChatOpenAI(
    model_name = "gpt-3.5-turbo",
    temperature = 0.0,
    streaming=True
)

# Initialise RetrievalQA Chain
chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type ="stuff",
    chain_type_kwargs = {
        "prompt": PromptTemplate(
            template=template,
            input_variables=["context", "question"],
        ),
    },
)

response = chain.invoke({"query": "What are transformers?"})

print(response) 

{'query': 'What are transformers?', 'result': 'Transformers are a type of sequence transduction model that rely entirely on self-attention to compute representations of input and output without using sequence-aligned RNNs or convolution. They replace the recurrent layers commonly used in encoder-decoder architectures with multi-headed self-attention, allowing for faster training on translation tasks compared to architectures based on recurrent or convolutional layers.', 'source_documents': [Document(page_content='The Transformer allows for significantly more parallelization and can reach a new state of the art in\ntranslation quality after being trained for as little as twelve hours on eight P100 GPUs.\n2 Background\nThe goal of reducing sequential computation also forms the foundation of the Extended Neural GPU\n[16], ByteNet [ 18] and ConvS2S [ 9], all of which use convolutional neural networks as basic building', metadata={'page': 1, 'source': './src/frontend/tmp/atenttion_is_all_yo

In [21]:
response = chain.invoke({"query": "Que son transformers?"})
print(response)

{'query': 'Que son transformers?', 'result': 'Los transformers son un tipo de modelo de transducción de secuencias basado completamente en atención, que reemplaza las capas recurrentes más comúnmente utilizadas en arquitecturas codificador-decodificador con auto-atención multi-cabeza.', 'source_documents': [Document(page_content='The Transformer allows for significantly more parallelization and can reach a new state of the art in\ntranslation quality after being trained for as little as twelve hours on eight P100 GPUs.\n2 Background\nThe goal of reducing sequential computation also forms the foundation of the Extended Neural GPU\n[16], ByteNet [ 18] and ConvS2S [ 9], all of which use convolutional neural networks as basic building', metadata={'page': 1, 'source': './src/frontend/tmp/atenttion_is_all_you_need.pdf'}), Document(page_content='language modeling tasks [34].\nTo the best of our knowledge, however, the Transformer is the first transduction model relying\nentirely on self-atten

In [16]:
chain.combine_documents_chain.llm_chain.prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="Use the following pieces of context to answer the user's question. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n{context}")),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='{question}'))]

In [37]:
from src.frontend._templates import template_customer_service

llm = ChatOpenAI(
    model_name = "gpt-3.5-turbo",
    temperature = 0.0,
    streaming=True
)

# Initialise RetrievalQA Chain
chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type ="stuff",
    chain_type_kwargs = {
        "prompt": PromptTemplate(
            template=template_customer_service,
            input_variables=["company","context", "question"],
        ),
    },
)

response = chain.invoke({"query": "What are transformers?", "company":"jota"})

print(response) 

ValueError: Missing some input keys: {'company'}

In [None]:
qa = ConversationalRetrievalChain.from_llm(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    return_source_documents=True,
    verbose = True,
    combine_docs_chain_kwargs={'prompt': promptHist},
    memory = memory,
)