### Installing dependencies

In [None]:
!pip install torch
!pip install datasets
!pip install langchain
!pip install faiss-cpu
!pip install faiss-gpu
!pip install tensorflow
!pip install transformers

Collecting datasets
  Downloading datasets-2.16.1-py3-none-any.whl (507 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dill, multiprocess, datasets
Successfully installed datasets-2.16.1 dill-0.3.7 multiprocess-0.70.15
Collecting langchain
  Downloading langchain-0.1.0-py3-none-any.whl (797 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m798.0/798.0 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json

### Importing dependencies

In [None]:
import json
import faiss
import torch
import numpy as np

from langchain.chains import LLMChain
from langchain.llms import HuggingFaceHub
from langchain.prompts import PromptTemplate

from transformers import AutoTokenizer, AutoModel

### Common Functions

In [None]:
def load_model(model_name):
    print('Loading model...')
    model = AutoModel.from_pretrained(model_name)
    return model

def load_tokenizer(model_name):
    print('Loading tokenizer...')
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return tokenizer

def load_document(filepath):
    print('Loading document...')
    # file = open(filepath, 'r', encoding='utf-8')
    # document = json.loads(file.read())
    # file.close()

    with open(filepath, 'r', encoding='utf-8') as file:
      document = json.load(file)

    return document

def load_QA_database(model, tokenizer, document):
    print('Tokenizing data...')
    question_data = []
    answer_data = []

    for item in document:
        question_data.append(item["question"])
        answer_data.append(item["answer"])

    tokenized_questions = [tokenizer(question, return_tensors="pt", max_length=128, truncation=True) for question in question_data]

    embeddings = []
    for tokens in tokenized_questions:
        with torch.no_grad():
            output = model(**tokens)
        embeddings.append(output.last_hidden_state.mean(dim=1).squeeze().numpy())

    embeddings = np.vstack(embeddings)
    return embeddings, question_data, answer_data

def index_into_faiss(embeddings):
    print('Indexing database in faiss...')
    vector_dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(vector_dimension)
    faiss.normalize_L2(embeddings)
    index.add(embeddings)
    return index

def search_into_faiss(model, tokenizer, query, index, max_results):
    query_tokens = tokenizer(query, return_tensors="pt")
    with torch.no_grad():
        query_embedding = model(**query_tokens).last_hidden_state.mean(dim=1).squeeze().numpy()
    distance, nearest_neighbours = index.search(np.array([query_embedding]), max_results)

    return distance, nearest_neighbours

def get_data_neighbours(question_data, answer_data, nearest_neighbours):

    similar_queries = []
    context_queries = []

    for i, idx in enumerate(nearest_neighbours[0]):
        similar_queries.append(question_data[idx])
        context_queries.append(answer_data[idx])

    return similar_queries, context_queries

def generate_answer(model, input_text, input_context, prompt):
    print('Generating answer...')
    chain = LLMChain(prompt=prompt, llm=model, output_key="answer")
    result = chain({"input_text": input_text, "input_context": input_context})
    # result = chain.run(input_text=input_text, input_context=input_context)
    return result

def load_llm(model_name):
    print('Loading LLM model...')
    model_kwargs = {"temperature": 0.1, "max_new_tokens": 200, "repetition_penalty": 2}
    api_token = "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"

    model = HuggingFaceHub(repo_id=model_name, model_kwargs=model_kwargs, huggingfacehub_api_token=api_token)

    return model

### Instantiating methods and objects

In [None]:
model_name = 'microsoft/MiniLM-L12-H384-uncased'
llm_model_name = 'tiiuae/falcon-40b-instruct'
# document = load_document('./documents/mastercard_8fa6dd.html.json')
document = load_document('/content/drive/MyDrive/TCC/Documents Json/nubank_4uersz.html.json')
model = load_model(model_name)
tokenizer = load_tokenizer(model_name)
embeddings, question_data, answer_data = load_QA_database(model, tokenizer, document)
faiss_index = index_into_faiss(embeddings)
llm_model = load_llm(llm_model_name)

Loading document...
Loading model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/133M [00:00<?, ?B/s]

Loading tokenizer...


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Tokenizing data...
Indexing database in faiss...
Loading LLM model...




### Main execution

In [None]:
### Exemplos Curiosos ###
# Quais sao as taxas do cartao
# Quais são as taxas do cartao
# Quais são as taxas do cartão

result_number = 1
input_text = input('Type your question: ')

distance, nearest_neighbours = search_into_faiss(model, tokenizer, input_text, faiss_index, result_number)
similar_queries, context_queries = get_data_neighbours(question_data, answer_data, nearest_neighbours)

print()
print("Faiss results:")
print()

for i in range(0, result_number):
  print('Question:', similar_queries[i])
  print('Answer:', context_queries[i])
  print()

input_context = '\n'.join(context_queries)

template = """Você é um assistente de IA e analisará o contexto fornecido a seguir e com base nesse contexto informado, responderá à pergunta de forma resumida e clara.\n\nContexto: {input_context}\n\nPergunta: {input_text}\n\nResposta:"""
prompt = PromptTemplate(template=template.lower(), input_variables=["input_context", "input_text"])

result = generate_answer(llm_model, input_text, input_context, prompt)

print()
print('Generated Answer:')
print(result['answer'])

Type your question: Quais sao as taxas do cartao

Faiss results:

Question: quais sao as tarifas do cartao de credito nubank?
Answer: somos contra cobrar tarifas para voce cuidar do seu dinheiro. como usamos canais 100% digitais e reduzimos a burocracia e a papelada ao maximo, conseguimos repassar apenas economias, nunca gastos. a unica tarifa existente no cartao de credito nubank e para quem aderir ao programa de beneficios nubank rewards. diferentemente da maioria dos outros cartoes, essa tarifa e opcional e nos possibilita oferecermos ainda mais beneficios para voce otimizar a sua vida.e possivel acessar nossa tabela de tarifas na sessao de contratos.

Generating answer...


ValueError: Error raised by inference API: Model tiiuae/falcon-40b-instruct time out