<a href="https://colab.research.google.com/github/Vicky-Panchal/Navigatron/blob/main/navigatron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **NAVIGATRON**

In [3]:
! nvidia-smi -L


GPU 0: Tesla T4 (UUID: GPU-c1e5e000-8ac1-dfba-3b29-c17154284e6c)


**Installs**

In [2]:
%%time

from IPython.display import clear_output

! pip install sentence_transformers==2.2.2

! pip install -qq -U langchain
! pip install -qq -U tiktoken
! pip install -qq -U pypdf
! pip install -qq -U faiss-gpu
! pip install -qq -U InstructorEmbedding

! pip install -qq -U transformers
! pip install -qq -U accelerate
! pip install -qq -U bitsandbytes

clear_output()

CPU times: user 843 ms, sys: 125 ms, total: 968 ms
Wall time: 2min 7s


**Imports**

In [4]:
import warnings
warnings.filterwarnings("ignore")

import os
import glob
import textwrap
import time

import langchain

# loaders
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader

# splits
from langchain.text_splitter import RecursiveCharacterTextSplitter

# prompts
from langchain import PromptTemplate, LLMChain

# vector stores
from langchain.vectorstores import FAISS

# models
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceInstructEmbeddings


# retrievers
from langchain.chains import RetrievalQA

import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

print('langchain:', langchain.__version__)
print('torch:', torch.__version__)
print('transformers:', transformers.__version__)


langchain: 0.1.17
torch: 2.2.1+cu121
transformers: 4.40.1


In [5]:
sorted(glob.glob('/content/NLP_Project/*'))

['/content/NLP_Project/Harry Potter - Book 1 - The Sorcerers Stone.pdf',
 '/content/NLP_Project/Harry Potter - Book 2 - The Chamber of Secrets.pdf',
 '/content/NLP_Project/Harry Potter - Book 3 - The Prisoner of Azkaban.pdf',
 '/content/NLP_Project/Harry Potter - Book 4 - The Goblet of Fire.pdf',
 '/content/NLP_Project/Harry Potter - Book 5 - The Order of the Phoenix.pdf',
 '/content/NLP_Project/Harry Potter - Book 6 - The Half-Blood Prince.pdf',
 '/content/NLP_Project/Harry Potter - Book 7 - The Deathly Hallows.pdf',
 '/content/NLP_Project/index.faiss',
 '/content/NLP_Project/index.pkl']

**CFG**


*   CFG class enables easy and organized experimentation



In [29]:
class CFG:
    # LLMs
    model_name = 'llama2-13b-chat' # wizardlm, llama2-7b-chat, llama2-13b-chat, mistral-7B
    temperature = 0
    top_p = 0.95
    repetition_penalty = 1.15

    # splitting
    split_chunk_size = 800
    split_overlap = 0

    # embeddings
    embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'

    # similar passages
    k = 6

    # paths
    PDFs_path = '/content/NLP_Project/'
    Embeddings_path =  '/content/faiss-hp-sentence-transformers'
    Output_folder = './harry-potter-vectordb'

**Define Model**

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

def get_model(model = CFG.model_name):

    print('\nDownloading model: ', model, '\n\n')

    if model == 'wizardlm':
        model_repo = 'TheBloke/wizardLM-7B-HF'

        tokenizer = AutoTokenizer.from_pretrained(model_repo)

        bnb_config = BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_compute_dtype = torch.float16,
            bnb_4bit_use_double_quant = True,
        )

        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            quantization_config = bnb_config,
            device_map = 'auto',
            low_cpu_mem_usage = True
        )

        max_len = 1024

    elif model == 'llama2-7b-chat':
        model_repo = 'daryl149/llama-2-7b-chat-hf'

        tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)

        bnb_config = BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_compute_dtype = torch.float16,
            bnb_4bit_use_double_quant = True,
        )

        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            quantization_config = bnb_config,
            device_map = 'auto',
            low_cpu_mem_usage = True,
            trust_remote_code = True
        )

        max_len = 2048

    elif model == 'llama2-13b-chat':
        model_repo = 'daryl149/llama-2-13b-chat-hf'

        tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)

        bnb_config = BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_compute_dtype = torch.float16,
            bnb_4bit_use_double_quant = True,
        )

        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            quantization_config = bnb_config,
            device_map = 'auto',
            low_cpu_mem_usage = True,
            trust_remote_code = True
        )

        max_len = 2048 # 8192

    elif model == 'mistral-7B':
        model_repo = 'mistralai/Mistral-7B-v0.1'

        tokenizer = AutoTokenizer.from_pretrained(model_repo)

        bnb_config = BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_compute_dtype = torch.float16,
            bnb_4bit_use_double_quant = True,
        )

        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            quantization_config = bnb_config,
            device_map = 'auto',
            low_cpu_mem_usage = True,
        )

        max_len = 1024

    else:
        print("Not implemented model (tokenizer and backbone)")

    return tokenizer, model, max_len

In [8]:
%%time

tokenizer, model, max_len = get_model(model = CFG.model_name)

clear_output()

CPU times: user 47.5 s, sys: 1min, total: 1min 48s
Wall time: 9min 44s


In [9]:
model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 5120, padding_idx=0)
    (layers): ModuleList(
      (0-39): 40 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=5120, out_features=5120, bias=False)
          (k_proj): Linear4bit(in_features=5120, out_features=5120, bias=False)
          (v_proj): Linear4bit(in_features=5120, out_features=5120, bias=False)
          (o_proj): Linear4bit(in_features=5120, out_features=5120, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=5120, out_features=13824, bias=False)
          (up_proj): Linear4bit(in_features=5120, out_features=13824, bias=False)
          (down_proj): Linear4bit(in_features=13824, out_features=5120, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )


In [10]:
### check how Accelerate split the model across the available devices (GPUs)
model.hf_device_map

{'': 0}

🤗 **pipeline**


*   Hugging Face pipeline



In [11]:
### hugging face pipeline
pipe = pipeline(
    task = "text-generation",
    model = model,
    tokenizer = tokenizer,
    pad_token_id = tokenizer.eos_token_id,
#     do_sample = True,
    max_length = max_len,
    temperature = CFG.temperature,
    top_p = CFG.top_p,
    repetition_penalty = CFG.repetition_penalty
)

### langchain pipeline
llm = HuggingFacePipeline(pipeline = pipe)

In [12]:
llm

HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7ea8b00fcb20>)

In [13]:
%%time
### testing model, not using the harry potter books yet
### answer is not necessarily related to harry potter
query = "Give me 5 examples of cool potions and explain what they do"
llm.invoke(query)

CPU times: user 31.8 s, sys: 285 ms, total: 32.1 s
Wall time: 33.2 s


'Give me 5 examples of cool potions and explain what they do.\n\nSure thing! Here are five examples of cool potions that you might find in a fantasy world, along with their effects:\n\n1. Potion of Healing: This potion restores health to the drinker, healing wounds and injuries. It might also grant temporary immunity to future damage or disease.\n2. Potion of Strength: This potion grants the drinker increased physical strength and endurance for a short period of time, allowing them to lift heavier objects, run faster, and fight longer.\n3. Potion of Speed: This potion allows the drinker to move at incredible speeds for a short period of time, making it easier to escape danger or chase down enemies.\n4. Potion of Invisibility: This potion makes the drinker temporarily invisible, allowing them to sneak past guards, avoid detection by monsters, or steal valuable items without being caught.\n5. Potion of Flight: This potion gives the drinker the ability to fly for a short period of time, a

**🦜🔗 Langchain**


*   Multiple document retriever with LangChain



In [14]:
CFG.model_name

'llama2-13b-chat'

**Loader**

In [15]:
%%time

loader = DirectoryLoader(
    CFG.PDFs_path,
    glob="./*.pdf",
    loader_cls=PyPDFLoader,
    show_progress=True,
    use_multithreading=True
)

documents = loader.load()

100%|██████████| 7/7 [01:26<00:00, 12.30s/it]

CPU times: user 1min 25s, sys: 632 ms, total: 1min 26s
Wall time: 1min 26s





In [16]:
print(f'We have {len(documents)} pages in total')

We have 4114 pages in total


In [17]:
documents[8].page_content

'P a g e  | 9 Harry Potter and the Chamber of Secrets – J.K.  Rowling Harry. “You stay out of your aunt’s way while she’s \ncleaning.” \nHarry left through the back door. It was a brilliant, \nsunny day. He crossed the lawn, slumped down on \nthe garden bench, and sang under his breath: \n“Happy birthday to me … happy birthday to me …” \nNo cards, no presents, and he would be spending the \nevening pretending not to exist. He gazed miserably \ninto the hedge. He had never felt so lonely. More than \nanything else at Hogwarts, more even than playing \nQuidditch, Harry missed his best friends, Ron \nWeasley and Hermione Granger. They, however, didn’t \nseem to be missing him at all. Neither of them had \nwritten to him all summer, even though Ron had said \nhe was going to ask Harry to come and stay. \nCountless times, Harry had been on the point of \nunlocking Hedwig’s cage by magic and sending her to \nRon and Hermione with a letter, but it wasn’t worth \nthe risk. Underage wizards wer

**Splitter**


*   Splitting the text into chunks so its passages are easily searchable for similarity



In [18]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = CFG.split_chunk_size,
    chunk_overlap = CFG.split_overlap
)

texts = text_splitter.split_documents(documents)

print(f'We have created {len(texts)} chunks from {len(documents)} pages')

We have created 10519 chunks from 4114 pages


**Create Embeddings**


*   If you use Chroma vector store it will take ~35 min to create embeddings
*   If you use FAISS vector store on GPU it will take just ~3 min

We need to create the embeddings only once, and then we can just load the vector store and query the database using similarity search.

Loading the embeddings takes only a few seconds.



In [19]:
%%time

### we create the embeddings only if they do not exist yet
if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):

    ### download embeddings model
    embeddings = HuggingFaceInstructEmbeddings(
        model_name = CFG.embeddings_model_repo,
        model_kwargs = {"device": "cuda"}
    )

    ### create embeddings and DB
    vectordb = FAISS.from_documents(
        documents = texts,
        embedding = embeddings
    )

    ### persist vector database
    vectordb.save_local(f"{CFG.Output_folder}/faiss_index_hp") # save in output folder
#     vectordb.save_local(f"{CFG.Embeddings_path}/faiss_index_hp") # save in input folder

CPU times: user 29 µs, sys: 10 µs, total: 39 µs
Wall time: 41.5 µs


**Load vector database**


In [20]:
%%time

### download embeddings model
embeddings = HuggingFaceInstructEmbeddings(
    model_name = CFG.embeddings_model_repo,
    model_kwargs = {"device": "cuda"}
)

### load vector DB embeddings
vectordb = FAISS.load_local(
    CFG.Embeddings_path,  # from input folder
    embeddings,
    allow_dangerous_deserialization=True  # Add this line to allow deserialization
)


clear_output()

CPU times: user 1.02 s, sys: 587 ms, total: 1.61 s
Wall time: 5.66 s


In [21]:
### test if vector DB was loaded correctly
vectordb.similarity_search('magic creatures')

[Document(page_content='“Magic?” he repeated in a whisper. \n“That’s right,” said Dumbledore. \n“It’s … it’s magic, what I can do?” \n“What is it that you can do?” \n“All sorts,” breathed Riddle. A flush of excitement was \nrising up his neck into his hollow cheeks; he looked \nfevered. “I can make things move without touching \nthem. I can make animals do what I want them to do, \nwithout training them. I can make bad things happen \nto people who annoy me. I can make them hurt if I \nwant to.”', metadata={'source': '/kaggle/input/harry-potter-books-in-pdf-1-7/HP books/Harry Potter - Book 6 - The Half-Blood Prince.pdf', 'page': 302}),
 Document(page_content='91"Shut up, Malfoy," said Harry quietly. Hagrid was looking downcast and\nHarry wanted Hagrid\'s first lesson to be a success.\n"Righ\' then," said Hagrid, who seemed to have lost his thread, "so -- so\nyeh\'ve got yer books an\' -- an\' - - now yeh need the Magical Creatures.Yeah. So I\'ll go an\' get \'em. Hang on... "\nHe strod

**Prompt Template**

In [22]:
prompt_template = """
Don't try to make up an answer, if you don't know just say that you don't know.
Answer in the same language the question was asked.
Use only the following pieces of context to answer the question at the end.

{context}

Question: {question}
Answer:"""


PROMPT = PromptTemplate(
    template = prompt_template,
    input_variables = ["context", "question"]
)

**Retriever chain**


*   Retriever to retrieve relevant passages
*   Chain to answer questions
*   [RetrievalQA: Chain for question-answering](https://python.langchain.com/docs/modules/data_connection/retrievers/)





In [23]:
class CFG:
    # Other attributes...
    k = 6 # Or whatever value you need

# Assuming the rest of your code is unchanged
retriever = vectordb.as_retriever(search_kwargs={"k": CFG.k, "search_type": "similarity"})

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # map_reduce, map_rerank, stuff, refine
    retriever=retriever,
    chain_type_kwargs={"prompt": PROMPT},
    return_source_documents=True,
    verbose=False
)

In [24]:
### testing MMR search
question = "Which are Hagrid's favorite animals?"
vectordb.max_marginal_relevance_search(question, k = CFG.k)

[Document(page_content='would warn Hagrid myself, but I am  banished — it would be unwise \nfor me to go too near the forest now — Hagrid has troubles enough, \nwithout a centaurs’ battle.” \n“But — what’s Hagrid attempting to do?” said Harry nervously. \nFirenze looked at Harry impassively. \n“Hagrid has recently rendered me a great service,” said Firenze,', metadata={'source': '/kaggle/input/harry-potter-books-in-pdf-1-7/HP books/Harry Potter - Book 5 - The Order of the Phoenix.pdf', 'page': 619}),
 Document(page_content="wisely. Behind him, Buckbeak spat a few ferret bones onto Hagrid'spillow.", metadata={'source': '/kaggle/input/harry-potter-books-in-pdf-1-7/HP books/Harry Potter - Book 3 - The Prisoner of Azkaban.pdf', 'page': 228}),
 Document(page_content='says Draco Malfoy, a fourth-year student. “We all hate Hagrid, but we’re just too scared to say \nanything.” \nHagrid has no intention of ceasing his campaign \nof intimidation, however. In conversation with a \nDaily Prophet  

In [25]:
### testing similarity search
question = "Which are Hagrid's favorite animals?"
vectordb.similarity_search(question, k = CFG.k)

[Document(page_content='would warn Hagrid myself, but I am  banished — it would be unwise \nfor me to go too near the forest now — Hagrid has troubles enough, \nwithout a centaurs’ battle.” \n“But — what’s Hagrid attempting to do?” said Harry nervously. \nFirenze looked at Harry impassively. \n“Hagrid has recently rendered me a great service,” said Firenze,', metadata={'source': '/kaggle/input/harry-potter-books-in-pdf-1-7/HP books/Harry Potter - Book 5 - The Order of the Phoenix.pdf', 'page': 619}),
 Document(page_content="Harry could sort of see what Hagrid meant. Once you got over the first\nshock of seeing something that was, half horse, half bird, you startedto appreciate the hippogriffs' gleaming coats, changing smoothly fromfeather to hair, each of them a different color: stormy gray, bronze,", metadata={'source': '/kaggle/input/harry-potter-books-in-pdf-1-7/HP books/Harry Potter - Book 3 - The Prisoner of Azkaban.pdf', 'page': 91}),
 Document(page_content='CHAPTER  THIRTEEN \n\

**Post-process outputs**

In [26]:
def wrap_text_preserve_newlines(text, width=700):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text


def process_llm_response(llm_response):
    ans = wrap_text_preserve_newlines(llm_response['result'])

    sources_used = ' \n'.join(
        [
            source.metadata['source'].split('/')[-1][:-4]
            + ' - page: '
            + str(source.metadata['page'])
            for source in llm_response['source_documents']
        ]
    )

    ans = ans + '\n\nSources: \n' + sources_used
    return ans

In [27]:
def llm_ans(query):
    start = time.time()

    llm_response = qa_chain.invoke(query)
    ans = process_llm_response(llm_response)

    end = time.time()

    time_elapsed = int(round(end - start, 0))
    time_elapsed_str = f'\n\nTime elapsed: {time_elapsed} s'
    return ans + time_elapsed_str

**Ask questions**

*   Question Answering from multiple documents
*   Invoke QA Chain
*   Talk to your data



In [30]:
CFG.model_name

'llama2-13b-chat'

In [31]:
query = "Which challenges does Harry face during the Triwizard Tournament?"
print(llm_ans(query))


Don't try to make up an answer, if you don't know just say that you don't know.
Answer in the same language the question was asked.
Use only the following pieces of context to answer the question at the end.

“Speak for yourself,” said George  shortly. “You’ll try and get in,
won’t you, Harry?”
Harry thought briefly of Dumble dore’s insistence that nobody
under seventeen should submit th eir name, but then the wonder-
ful picture of himself winning the Triwizard Tournament filled his

whether it had anything to do with entering the Triwizard Tournament.
As Harry watched, George shook his head at Fred, scratched out
something with his quill, and said, in a very quiet voice that never-

Harry asked. “Thought any more about trying to enter?”
“I asked McGonagall how the champions are chosen but she

P a g e  | 117 Harry Potter and the Chamber of Secrets – J. K. Rowling “Whassamatter?” said Harry groggily.
“Quidditch practice!” said Wood. “Come on!”
Harry squinted at the window. There was 

In [32]:
query = "Is Malfoy an ally of Voldemort?"
print(llm_ans(query))


Don't try to make up an answer, if you don't know just say that you don't know.
Answer in the same language the question was asked.
Use only the following pieces of context to answer the question at the end.

infatuation with the Dark Arts, but now the tiniest
drop of pity mingled with his dislike. Where, Harry
wondered, was Malfoy now, and what was Voldemort
making him do under threat of killing him and his
parents?
Harry’s thoughts were interrupted by a nudge in the
ribs from Ginny. Professor McGonagall had risen to

ter directed toward finding and aiding your master?”
“My Lord, I was constantly on th e alert,” came Lucius Malfoy’s
voice swiftly from beneath the h ood. “Had there been any sign
from you, any whisper of your wh ereabouts, I would have been at
your side immediately, nothin g could have prevented me —”
“And yet you ran from my Mark, when a faithful Death Eater
sent it into the sky last summer?” said Voldemort lazily, and Mr.
Malfoy stopped talking abruptly. “Yes, I know

In [33]:
query = "What are horcrux?"
print(llm_ans(query))


Don't try to make up an answer, if you don't know just say that you don't know.
Answer in the same language the question was asked.
Use only the following pieces of context to answer the question at the end.

It was very well done, thought Harry, the hesitancy,
the casual tone, the careful flattery, none of it
overdone. He, Harry, had had too much experience of
trying to wheedle information out of reluctant people
not to recognize a master at work. He could tell that
Riddle wanted the information very, very much;
perhaps had been working toward this moment for
weeks.
“Well,” said Slughorn, not looking at Riddle, but
fiddling with the ribbon on top of his box of
crystalized pineapple, “well, it can’t hurt to give you
an overview, of course. Just so that you understand
the term. A Horcrux is the word used for an object in
which a person has concealed part of their soul.”
“I don’t quite understand how that works, though,
sir,” said Riddle.

low voice, as they stood in the deserted, snowy

In [34]:
query = "Give me 5 examples of cool potions and explain what they do"
print(llm_ans(query))


Don't try to make up an answer, if you don't know just say that you don't know.
Answer in the same language the question was asked.
Use only the following pieces of context to answer the question at the end.

“Ah, yes, Professor McGonagall did mention … not to
worry, my dear boy, not to worry at all. You can use
ingredients from the store cupboard today, and I’m
sure we can lend you some scales, and we’ve got a
small stock of old books here, they’ll do until you can
write to Flourish and Blotts. …”
Slughorn strode over to a corner cupboard and, after
a moment’s foraging, emerged with two very battered-
looking copies of Advanced Potion-Making  by Libatius
Borage, which he gave to Harry and Ron along with
two sets of tarnished scales.
“Now then,” said Slughorn, returning to the front of
the class and inflating his already bulging chest so
that the buttons on his waistcoat threatened to burst
off, “I’ve prepared a few potions for you to have a look

P a g e  | 205 Harry Potter and the H

**Gradio Chat UI**

In [35]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [36]:
! pip install --upgrade gradio -qq
clear_output()

In [37]:
import gradio as gr
print(gr.__version__)

4.28.3


In [38]:
def predict(message, history):
    # output = message # debug mode

    output = str(llm_ans(message)).replace("\n", "<br/>")
    return output

demo = gr.ChatInterface(
    predict,
    title = f' Open-Source LLM ({CFG.model_name}) for Harry Potter Question Answering'
)

demo.queue()
demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://9bf1675a7fc9141553.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


