In [None]:
!pip install PdfReader
!pip install langchain
!pip install PyPDF2
!pip install InstructorEmbedding
!pip install sentence_transformers
!pip install faiss
!pip install faiss-gpu

In [5]:
from google.colab import drive
drive.mount("/content/drive")


Mounted at /content/drive


In [6]:
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings

from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory

from langchain.chains import ConversationalRetrievalChain
from langchain.chains import RetrievalQA

from langchain.llms import HuggingFaceHub
from langchain.prompts.prompt import PromptTemplate

from langchain.vectorstores import Chroma
from langchain.vectorstores import FAISS


In [8]:
def getPdf(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

## Cria os chunks do texto
def getChunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

## Cria embeddings para os chunks do testo
def getVectorstore(text_chunks):
    embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
    vectorstore = FAISS.from_texts(
        texts=text_chunks,
        embedding=embeddings)
    return vectorstore

## Cria um retrieval-based question-answering llm chain usando o Hugging Face model pré treinado
def retrievalChain(db,return_source_documents):
    llm = HuggingFaceHub(
        repo_id="tiiuae/falcon-7b-instruct",
        model_kwargs={"temperature":0.6,"max_length":500, "max_new_tokens":700}
      )

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type='stuff',
        retriever=db,
        return_source_documents=return_source_documents,
      )

    return qa_chain

# Vetorização e criação dos indices

In [9]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
raw_text = getPdf(['/content/drive/MyDrive/dados/artigos/english.pdf'])

text_chunks = getChunks(raw_text)

## Cria um vector store
## db contém os 3 maiores matched vectors => search_kwargs={‘k’: 3}`
vectorstore = getVectorstore(text_chunks)
db = vectorstore.as_retriever(search_kwargs={'k': 3})

model = retrievalChain(db,True)

question = "what is score matrix?"
answer=model(question)

print(answer['result'])
print(answer['source_documents'])


  from tqdm.autonotebook import trange


.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

2_Dense/config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/3.15M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/66.3k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.40k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

load INSTRUCTOR_Transformer
max_seq_length  512





The score matrix is a matrix that contains the weights assigned to each word in a word vector model. The score matrix is used to calculate the relative weight that each word should have in relation to the other words, and is used to map the queries onto the keys. The higher the score for a specific word, the more it is attended to.
[Document(page_content='32The intuition of the preceding steps is to capture the closeness of words in vector space to each other and then assign weights based on that closeness. Then the neighbor vectors are weighted according to the weight they exercise and added together to give a representation of a word, which takes into account the closeness of words in its neighborhood.Though this mechanism is simple, there is no learning of weights happening in this. And this is where the mechanism of query and key matrices comes into the picture. The weights in these matrices are what are learned by the network. Each individual word vector does a dot product with t

In [10]:
## Não fornece o contexto
llm = HuggingFaceHub(
    repo_id="tiiuae/falcon-7b-instruct",
    model_kwargs={"temperature":0.7,"max_length":500, "max_new_tokens":700})

llm(question)

questions=['why perform a softmax?',
           'how neighbor vectors are weighted?',
           'why take the softmax of the scaled score?']

answer=model(questions[0])
print(questions[0])
print(answer['result'])

answer=model(questions[1])
print(questions[1])
print(answer['result'])

answer=model(questions[2])
print(questions[2])
print(answer['result'])



why perform a softmax?


The softmax is performed to give a score to each word in the word list, which is then used to determine the attention weights in order to capture the close proximity of words in the query and key matrices.
how neighbor vectors are weighted?


The neighbor vectors are weighted based on the closeness of words in their neighborhood. This means that the closer a word is to a neighbor, the more weight it will be assigned to that word. The weights can be learned by the network through training, and each individual word vector is weighted based on the query and key matrices. The resulting score matrix is then used to calculate the attention weights, which are used to map the queries onto the keys. The higher the softmax scores are, the more attention is given to the words.
why take the softmax of the scaled score?

The softmax function is used to normalize the scores of the words so that they can be compared in a consistent way. The output of the softmax function is u

In [11]:
#https://krishnayogi.medium.com/building-a-question-answering-system-using-llm-50904793ae07