### Installing dependencies

In [None]:
%%writefile requirements.txt

huggingface-hub==0.19.4
langchain==0.0.348
langsmith==0.0.69
langchain-core==0.0.12

Overwriting requirements.txt


In [None]:
!pip install nltk
!pip install torch
!pip install datasets
!pip install langchain
!pip install faiss-cpu
!pip install faiss-gpu
!pip install tensorflow
!pip install rouge-score
!pip install transformers

# !pip uninstall langchain-community -y
# !pip install -r requirements.txt

Collecting datasets
  Downloading datasets-2.17.0-py3-none-any.whl (536 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.6/536.6 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow>=12.0.0 (from datasets)
  Downloading pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (38.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.3/38.3 MB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyarrow, dill, multiprocess, datasets
  Attempting uninstall: pyarrow
    Found existi

### Importing dependencies

In [None]:
import json
import faiss
import torch
import numpy as np

from langchain.chains import LLMChain
from langchain.llms import HuggingFaceHub
# from langchain_community.llms import HuggingFaceHub
from langchain.prompts import PromptTemplate
from langchain import PromptTemplate, HuggingFaceHub, LLMChain, OpenAI, HuggingFacePipeline

from transformers import AutoTokenizer, AutoModel

### Common Functions

In [None]:
def load_model(model_name):
    print('Loading model...')
    model = AutoModel.from_pretrained(model_name)
    return model

def load_tokenizer(model_name):
    print('Loading tokenizer...')
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return tokenizer

def load_document(filepath):
    print('Loading document...')
    # file = open(filepath, 'r', encoding='utf-8')
    # document = json.loads(file.read())
    # file.close()

    with open(filepath, 'r', encoding='utf-8') as file:
      document = json.load(file)

    return document

def load_QA_database(model, tokenizer, document):
    print('Tokenizing data...')
    question_data = []
    answer_data = []

    for item in document:
        question_data.append(item["question"])
        answer_data.append(item["answer"])

    tokenized_questions = [tokenizer(question, return_tensors="pt", max_length=512, truncation=True) for question in question_data]

    embeddings = []
    for tokens in tokenized_questions:
        with torch.no_grad():
            output = model(**tokens)
        embeddings.append(output.last_hidden_state.mean(dim=1).squeeze().numpy())

    embeddings = np.vstack(embeddings)
    return embeddings, question_data, answer_data

def index_into_faiss(embeddings):
    print('Indexing database in faiss...')
    vector_dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(vector_dimension)
    faiss.normalize_L2(embeddings)
    index.add(embeddings)
    return index

def search_into_faiss(model, tokenizer, query, index, max_results):
    query_tokens = tokenizer(query, return_tensors="pt", max_length=512, truncation=True)
    with torch.no_grad():
        query_embedding = model(**query_tokens).last_hidden_state.mean(dim=1).squeeze().numpy()
    distance, nearest_neighbours = index.search(np.array([query_embedding]), max_results)

    return distance, nearest_neighbours

def get_data_neighbours(question_data, answer_data, nearest_neighbours):

    similar_queries = []
    context_queries = []

    for i, idx in enumerate(nearest_neighbours[0]):
        similar_queries.append(question_data[idx])
        context_queries.append(answer_data[idx])

    return similar_queries, context_queries

def generate_answer(model, input_text, input_context, prompt):
    print('Generating answer...')
    chain = LLMChain(prompt=prompt, llm=model, output_key="answer", verbose=True)
    result = chain({"input_text": input_text, "input_context": input_context})
    # result = chain.run(input_text=input_text, input_context=input_context)
    return result

def load_llm(model_name):
    print('Loading LLM model...')
    # model_kwargs = {"temperature": 0.1, "max_new_tokens": 200, "repetition_penalty": 2}
    # api_token = "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"

    # model = HuggingFaceHub(repo_id=model_name, model_kwargs=model_kwargs, huggingfacehub_api_token=api_token)

    pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        # model_kwargs = {"temperature": 0.1, "max_new_tokens": 200, "repetition_penalty": 2}
        # model_kwargs={"temperature": 0.000000001 , "max_length": 1024},
        max_new_tokens=200,
        temperature=0.1,
        repetition_penalty=2,
        top_p=0.95
    )

    pipe = HuggingFacePipeline(pipeline=pipeline)

    return model

### Instantiating methods and objects

In [None]:
model_name = 'microsoft/MiniLM-L12-H384-uncased'
# llm_model_name = 'meta-llama/Llama-2-7b-hf'
llm_model_name = 'meta-llama/Llama-2-7b-chat-hf'
document = load_document('/content/drive/MyDrive/TCC/Documents Json/ferreiracosta_all_results.json')
# document = load_document('/content/drive/MyDrive/TCC/Documents Json/mastercard_8fa6dd_v2.html.json')
# document = load_document('/content/drive/MyDrive/TCC/Documents Json/nubank_4uersz.html.json')
# document = load_document('/content/drive/MyDrive/TCC/Documents Json/caixa_ummipx_v2.html.json')
model = load_model(model_name)
tokenizer = load_tokenizer(model_name)
embeddings, question_data, answer_data = load_QA_database(model, tokenizer, document)
faiss_index = index_into_faiss(embeddings)
llm_model = load_llm(llm_model_name)

Loading document...
Loading model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/133M [00:00<?, ?B/s]

Loading tokenizer...


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Tokenizing data...
Indexing database in faiss...
Loading LLM model...


### Main execution

In [None]:
!pip install accelerate

Collecting accelerate
  Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/280.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/280.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.27.2


In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# from langchain import PromptTemplate, HuggingFaceHub, LLMChain, OpenAI, HuggingFacePipeline
# from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
# from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.embeddings.openai import OpenAIEmbeddings
# from langchain.vectorstores import FAISS, Pinecone
# from langchain.schema import retriever
# from langchain.chains.question_answering import load_qa_chain
# from langchain.chains import RetrievalQA, ChatVectorDBChain, LLMChain, SequentialChain, TransformChain, ConversationalRetrievalChain
# from langchain.chat_models import ChatOpenAI
# from langchain.document_loaders import CSVLoader

import transformers
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

model_name = 'meta-llama/Llama-2-7b-hf'

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.float16,
#     bnb_4bit_use_double_quant=True,
# )

# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     # quantization_config=bnb_config,
#     device_map="auto",
#     trust_remote_code=True,
# )

model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    # model_kwargs = {"temperature": 0.1, "max_new_tokens": 200, "repetition_penalty": 2}
    # model_kwargs={"temperature": 0.000000001 , "max_length": 1024},
    max_new_tokens=200,
    temperature=0.1,
    repetition_penalty=2,
    top_p=0.95
)

# from huggingface_hub import notebook_login
# notebook_login()

# import os
# os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

# !huggingface-cli login --token "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"

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

In [None]:
from langchain import PromptTemplate, HuggingFaceHub, LLMChain, OpenAI, HuggingFacePipeline

pipe = HuggingFacePipeline(pipeline=pipeline)
chain = LLMChain(prompt=prompt, llm=pipe, output_key="remedio")

NameError: name 'pipeline' is not defined

In [None]:
### Exemplos Curiosos ###
# Sobre qual banco estamos falando
# Quais sao as taxas do cartao
# Quais são as taxas do cartao
# Quais são as taxas do cartão

result_number = 1
input_text = 'Sobre o que estamos falando'
# input_text = input('Type your question: ')

distance, nearest_neighbours = search_into_faiss(model, tokenizer, input_text, faiss_index, result_number)
similar_queries, context_queries = get_data_neighbours(question_data, answer_data, nearest_neighbours)

print()
print("Faiss results:")
print()

for i in range(0, result_number):
  print('Question:', similar_queries[i])
  print('Answer:', context_queries[i])
  print()

input_context = '\n'.join(context_queries)

# template = """Você é um assistente de IA e analisará o contexto fornecido a seguir e com base nesse contexto informado, responderá à pergunta de forma resumida e clara.\n\nContexto: {input_context}\n\nPergunta: {input_text}\n\nResposta:"""
template = """Você é um assistente de IA que analisará o contexto fornecido a seguir fornecido a partir de um FAQ e com base nesse contexto informado, responderá à pergunta de forma resumida e clara, totalmente com base no contexto informado.\n\nContexto: {input_context}\n\nPergunta: {input_text}\n\nResposta:"""

prompt = PromptTemplate(template=template.lower(), input_variables=["input_context", "input_text"])

result = generate_answer(llm_model, input_text, input_context, prompt)

print()
print('Generated Answer:')
print(result['answer'])


Faiss results:

Question: meu pedido esta atrasado. o que faco?
Answer: pedimos que por favor, entre em contato com um de nossos atendentes, enviando uma solicitacao por e-mail ou atraves do telefone 81 3338.8333. e nao esqueca de ter o cpf em maos. se sua compra foi realizada em nossa loja on-line, voce tambem pode acompanhar o status do seu pedido clicando em...

Generating answer...


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mvocê é um assistente de ia que analisará o contexto fornecido a seguir fornecido a partir de um faq e com base nesse contexto informado, responderá à pergunta de forma resumida e clara, totalmente com base no contexto informado.

contexto: pedimos que por favor, entre em contato com um de nossos atendentes, enviando uma solicitacao por e-mail ou atraves do telefone 81 3338.8333. e nao esqueca de ter o cpf em maos. se sua compra foi realizada em nossa loja on-line, voce tambem pode acompanhar o status do seu pedido clicando 

ValueError: Error raised by inference API: Model requires a Pro subscription; check out hf.co/pricing to learn more. Make sure to include your HF token in your query.

### Generating Metrics

In [None]:
import time
import pandas as pd
from rouge_score import rouge_scorer
import nltk.translate.bleu_score as bleu

In [None]:
result_number = 1
generated_answers = []

for i in range(0, len(question_data)):
  input_text = question_data[i]
  print(f"[{i}] Question: {input_text}")
  distance, nearest_neighbours = search_into_faiss(model, tokenizer, input_text, faiss_index, result_number)
  similar_queries, context_queries = get_data_neighbours(question_data, answer_data, nearest_neighbours)
  input_context = '\n'.join(context_queries)
  template = """Você é um assistente de IA que analisará o contexto fornecido a seguir fornecido a partir de um FAQ e com base nesse contexto informado, responderá à pergunta de forma resumida e clara, totalmente com base no contexto informado.\n\nContexto: {input_context}\n\nPergunta: {input_text}\n\nResposta:"""
  prompt = PromptTemplate(template=template.lower(), input_variables=["input_context", "input_text"])
  result = generate_answer(llm_model, input_text, input_context, prompt)
  generated_answers.append(result['answer'])
  time.sleep(1)

In [None]:
chencherry = bleu.SmoothingFunction()
bleu.sentence_bleu(answer_data[0], generated_answers[0], smoothing_function=chencherry.method4)

In [None]:
bleu_score_results = []
chencherry = bleu.SmoothingFunction()

for i in range(0, len(question_data)):
  bleu_score_results.append(bleu.sentence_bleu(answer_data[i], generated_answers[i], smoothing_function=chencherry.method4))

In [None]:
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
scorer.score(answer_data[0], generated_answers[0])

In [None]:
rouge_score_results = []
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

for i in range(0, len(question_data)):
  rouge_score_results.append(scorer.score(answer_data[i], generated_answers[i])['rougeL'].fmeasure)

In [None]:
df_data = {
    'faq_question': question_data,
    'faq_answers': answer_data,
    'generated_answers': generated_answers,
    'bleu_score': bleu_score_results,
    'rouge_score': rouge_score_results
}

df = pd.DataFrame(df_data)

In [None]:
df.head()

In [None]:
df.to_csv('llama2-7b_FerreiaCosta.csv', index=False)