In [3]:
# ! pip install langchain tiktoken pypdf faiss-gpu
# ! pip install transformers InstructorEmbedding sentence_transformers
# ! pip install accelerate bitsandbytes xformers einops

In [4]:
! nvidia-smi -L

GPU 0: NVIDIA GeForce RTX 3060 (UUID: GPU-cb28bdfd-369b-2760-c83c-ec4bb5b3e433)
GPU 1: NVIDIA GeForce RTX 3060 (UUID: GPU-09417bfe-6780-ab5d-e066-08ff6cd5b8bf)


In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import glob
import textwrap
import time

import langchain

# loaders
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader

# splits
from langchain.text_splitter import RecursiveCharacterTextSplitter

# prompts
from langchain import PromptTemplate, LLMChain

# vector stores
from langchain.vectorstores import FAISS

# models
from langchain.llms import HuggingFacePipeline
from InstructorEmbedding import INSTRUCTOR
from langchain.embeddings import HuggingFaceInstructEmbeddings

# retrievers
from langchain.chains import RetrievalQA

import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

print('LangChain:', langchain.__version__)

LangChain: 0.0.315


In [2]:
glob.glob(r"geeta/*")

['geeta/geeta.pdf']

In [3]:
class CFG:
    model_name = 'wizardlm'
    temperature = 0.25,
    top_p = 0.95
    repetition_penalty = 1.15
    
    split_chunk_size =  800
    split_overlap = 0
    
    embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'
    
    k = 3
    
    PDFs_path = "geeta/"
    Embeddings_path = "faiss_index_hp/"
    Persist_directory = "./geeta-vectordb"

In [4]:
def get_model(model = CFG.model_name):
    
    print('\nDownloading model: ', model, '\n\n')
    
    if model == 'wizardlm':
        model_repo = 'TheBloke/wizardLM-7B-HF'
        
        tokenizer = AutoTokenizer.from_pretrained(model_repo)
        
        model = AutoModelForCausalLM.from_pretrained(model_repo,
                                                     load_in_4bit=True,
                                                     device_map='auto',
                                                     torch_dtype=torch.float16,
                                                     low_cpu_mem_usage=True
                                                    )
        
        max_len = 1024
        
    elif model == 'bloom':
        model_repo = 'bigscience/bloom-7b1'
        
        tokenizer = AutoTokenizer.from_pretrained(model_repo)

        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            load_in_4bit=True,
            device_map='auto',
            torch_dtype=torch.float16,
            low_cpu_mem_usage=True,
        )
        
        max_len = 1024
        
    elif model == 'falcon':
        model_repo = 'h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2'
        
        tokenizer = AutoTokenizer.from_pretrained(model_repo)

        model = AutoModelForCausalLM.from_pretrained(
            model_repo,
            load_in_4bit=True,
            device_map='auto',
            torch_dtype=torch.float16,
            low_cpu_mem_usage=True,
            trust_remote_code=True
        )
        
        max_len = 1024

    else:
        print("Not implemented model (tokenizer and backbone)")

    return tokenizer, model, max_len

In [5]:
%%time

tokenizer, model, max_len = get_model(model=CFG.model_name)


Downloading model:  wizardlm 




Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

CPU times: user 12 s, sys: 5.84 s, total: 17.8 s
Wall time: 55.9 s


In [6]:
model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32001, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )


In [7]:
model.hf_device_map

{'model.embed_tokens': 0,
 'model.layers.0': 0,
 'model.layers.1': 0,
 'model.layers.2': 0,
 'model.layers.3': 0,
 'model.layers.4': 0,
 'model.layers.5': 0,
 'model.layers.6': 0,
 'model.layers.7': 0,
 'model.layers.8': 0,
 'model.layers.9': 0,
 'model.layers.10': 0,
 'model.layers.11': 0,
 'model.layers.12': 0,
 'model.layers.13': 1,
 'model.layers.14': 1,
 'model.layers.15': 1,
 'model.layers.16': 1,
 'model.layers.17': 1,
 'model.layers.18': 1,
 'model.layers.19': 1,
 'model.layers.20': 1,
 'model.layers.21': 1,
 'model.layers.22': 1,
 'model.layers.23': 1,
 'model.layers.24': 1,
 'model.layers.25': 1,
 'model.layers.26': 1,
 'model.layers.27': 1,
 'model.layers.28': 1,
 'model.layers.29': 1,
 'model.layers.30': 1,
 'model.layers.31': 1,
 'model.norm': 1,
 'lm_head': 1}

In [8]:
pipe = pipeline(
    task = "text-generation",
    model = model,
    tokenizer = tokenizer,
    pad_token_id = tokenizer.eos_token_id,
    max_length = max_len,
    temperature = CFG.temperature,
    top_p = CFG.top_p,
    repetition_penalty = CFG.repetition_penalty
)

llm = HuggingFacePipeline(pipeline = pipe)

In [9]:
llm

HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7fcd70ba10c0>)

In [10]:
%%time
### testing model, not using the harry potter books yet
### answer is not necessarily related to harry potter
query = "Give me 5 examples of cool potions and explain what they do"
print(llm(query))

.
1. Fire Potion: This potion gives the drinker the ability to control fire. It is made by boiling a combination of flammable ingredients together for several hours until it turns into a deep red liquid with a strong, pungent odor. When consumed, the drinker's body becomes surrounded in an aura of fire, making them nearly invulnerable to harm. However, the downside is that the user's emotions become highly unstable, causing them to lash out violently at anything or anyone who threatens their personal space.
2. Ice Potion: This potion freezes whatever it comes into contact with. It is made by combining a mixture of ice and water with a few drops of a rare plant extract. When drunk, the potion causes the drinker's skin to turn blue and their breath to fog up in the air. The user can then use this power to create an icy shield or even freeze opponents in place. However, like the fire potion, the user's emotions become highly unstable, causing them to become extremely cold and distant towa

In [11]:
%%time

loader = DirectoryLoader(
    CFG.PDFs_path,
    glob = './*.pdf',
    loader_cls=PyPDFLoader,
    show_progress=True,
    use_multithreading=True
    )

documents = loader.load()

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:10<00:00, 10.15s/it]

CPU times: user 8.15 s, sys: 337 ms, total: 8.49 s
Wall time: 10.2 s





In [12]:
print(f"We have {len(documents)} pages in total.")

We have 1051 pages in total.


In [13]:
print(documents[800].page_content)

Copyright © 1998 The Bhaktivedanta Book Trust Int'l. All Rights Reserved.eka-stham —situated in one; anupaçyati —one tries to see through authority;
tataù eva —thereafter; ca—also; vistäram —the expansion; brahma —the
Absolute; sampadyate —he attains; tadä—at th at time.
TRANSLATION
When a sensible man ceases to s ee different identities due to different
material bodies and he sees how beings  are expanded everywhere, he attains to
the Brahman conception.
PURPORT
When one can see that the various bodies  of living entities arise due to the
different desires of the individual soul and do not actually belong to the soul
itself, one actually sees. In the material conception of life, we find someone a
demigod, someone a  human being, a dog, a cat, etc. This is material vision, not
actual vision. This material differentiat ion is due to a material conception of
life. After the destruction of the material body, the spirit soul is one. The spirit
soul, due to contact with mat erial nature, ge

In [14]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = CFG.split_chunk_size,
    chunk_overlap = CFG.split_overlap
    )

texts = text_splitter.split_documents(documents)

print(f'We have craeted {len(texts)} chunks from {len(documents)} pages')

We have craeted 2774 chunks from 1051 pages


In [15]:
# %%time

# ### download embeddings model
# embeddings = HuggingFaceInstructEmbeddings(
#     model_name = CFG.embeddings_model_repo,
#     model_kwargs = {"device": "cuda"}
# )

# ### create embeddings and DB
# vectordb = FAISS.from_documents(
#     documents = texts, 
#     embedding = embeddings
# )

# ### persist vector database
# vectordb.save_local("faiss_index_hp")

load INSTRUCTOR_Transformer
max_seq_length  512
CPU times: user 7.99 s, sys: 188 ms, total: 8.17 s
Wall time: 8.05 s


In [16]:
%%time

### download embeddings model
embeddings = HuggingFaceInstructEmbeddings(
    model_name = CFG.embeddings_model_repo,
    model_kwargs = {"device": "cuda"}
)

### load vector DB embeddings
vectordb = FAISS.load_local(
    CFG.Embeddings_path,
    embeddings
)

load INSTRUCTOR_Transformer
max_seq_length  512
CPU times: user 312 ms, sys: 77.3 ms, total: 389 ms
Wall time: 215 ms


In [17]:
### test if vector DB was loaded correctly
vectordb.similarity_search('magic creatures')

[Document(page_content='becomes phantasmagoria, and the senses  are like serpents’ teeth that are', metadata={'source': 'geeta/geeta.pdf', 'page': 994}),
 Document(page_content='also in Him, and because they are condit ioned, they are averse to serving the\nSupreme Lord. Thus they are not allowed to enter into the spiritual sky. But\nwith the coming forth of material nature these living entities are ag ain given a\nchance to act in the material world an d prepare themselves to enter into the\nspiritual world. That is the mystery of  this material creation. Actually the\nliving entity is originally the spiritual part and parcel of the Supreme Lord, but\ndue to his rebellious nature, he is conditioned within material nature. It really\ndoes not matter how these living entities or superior entities of the Supreme\nLord have come in contact with mate rial nature. The Supreme Personality of\nGodhead knows, however, how and why this actually took place. In the', metadata={'source': 'geeta/ge

In [54]:
prompt_template = """
Give the answers as you are a spiritual speaker. Calmness should reflect in words.
Use only the following pieces of context to answer the question at the end.
You can make up the factual answer only if asked out of the context.
{context}

Question: {question}
Answer:"""


PROMPT = PromptTemplate(
    template = prompt_template, 
    input_variables = ["context", "question"]
)

In [55]:
# llm_chain = LLMChain(prompt=PROMPT, llm=llm)
# llm_chain

In [56]:
retriever = vectordb.as_retriever(search_kwargs = {"k": CFG.k, "search_type" : "similarity"})

qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff", # map_reduce, map_rerank, stuff, refine
    retriever = retriever, 
    chain_type_kwargs = {"prompt": PROMPT},
    return_source_documents = True,
    verbose = False
)

In [57]:
### testing MMR search
question = "What is the meaning of life?"
vectordb.max_marginal_relevance_search(question, k = CFG.k)

[Document(page_content='a blissful eternal life of knowledge.\nTEXT  24', metadata={'source': 'geeta/geeta.pdf', 'page': 790}),
 Document(page_content='the problems of life. In the Båhad-äraëyaka Upaniñad  (3.8.10 ) the perplexed\nman is described as follows: yo vä etad akñaraà gärgy aviditväsmäû lokät praiti sa\nkåpaëaù.  “He is a miser ly man who does not solve the problems of life as a\nhuman and who thus quits this world like the cats and dogs, without\nunderstanding the science of self-realization.” This human form of life is a\nmost valuable asset for the living entity who can utilize i t for solving the\nproblems of life; therefore, one who does not utilize this opportunity properly', metadata={'source': 'geeta/geeta.pdf', 'page': 100}),
 Document(page_content='factually, although the demons say that life is a dream, they are very expert in\nenjoying this dream. And so, instead of acquiring knowledge, they become\nmore and more implicated in their dreaml and. They conclude that 

In [58]:
### testing similarity search
question = "Which are Hagrid's favorite animals?"
vectordb.similarity_search(question, k = CFG.k)

[Document(page_content="Copyright © 1998 The Bhaktivedanta Book Trust Int'l. All Rights Reserved.prahlädaç cäsmi daityänäà\nkälaù kalayatäm aham\nmågäëäà ca mågendro ’haà\nvainateyaç ca pakñiëäm\nSYNONYMS\nprahlädaù —Prahläda; ca—also; asmi —I am; daityänäm —of the demons;\nkälaù —time; kalayatäm —of subduers; aham —I am; mågäëäm —of animals;\nca—and; måga-indraù —the lion; aham —I am; vainateyaù —Garuòa; ca—also;\npakñiëäm —of birds.\nTRANSLATION\nAmong the Daitya demons I am the de voted Prahläda, among subduers I am\ntime, among beasts I am the lion, and among birds I am Garuòa.\nPURPORT\nDiti and Aditi are two sisters. The sons  of Aditi are called Ädityas, and the\nsons of Diti are called Daityas. All th e Ädityas are devotees of the Lord, and\nall the Daityas are atheistic. Although Pr ahläda was born in the family of the", metadata={'source': 'geeta/geeta.pdf', 'page': 642}),
 Document(page_content="Copyright © 1998 The Bhaktivedanta Book Trust Int'l. All Rights Reserved.Yakñas 

In [59]:
def wrap_text_preserve_newlines(text, width=700):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text


def process_llm_response(llm_response):
    ans = wrap_text_preserve_newlines(llm_response['result'])
    
#     sources_used = ' \n'.join(
#         [
#             source.metadata['source'].split('/')[-1][:-4] #+ ' - page: ' + str(source.metadata['page'])
#             for source in llm_response['source_documents']
#         ]
#     )
    
    ans = ans #+ '\n\nSources: \n' + sources_used
    return ans

In [60]:
def llm_ans(query):
    start = time.time()
    llm_response = qa_chain(query)
    ans = process_llm_response(llm_response)
    end = time.time()

    time_elapsed = int(round(end - start, 0))
    time_elapsed_str = f'\n\nTime elapsed: {time_elapsed} s'
    return ans + time_elapsed_str

In [61]:
CFG.model_name

'wizardlm'

In [62]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32001, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )


In [63]:
query = "What is the meaning of life"
print(llm_ans(query))

 The meaning of life is to achieve self-realization or enlightenment through the process of yoga.

Time elapsed: 3 s


In [64]:
query = "How to live a happy life?"
print(llm_ans(query))

 By practicing detachment from material happiness and distress, and
engaging oneself in devotional service to Lord Krishna.

Time elapsed: 5 s


In [65]:
query = ""
while query != "quit":
    query = input("User: ")
    print("bot: ",llm_ans(query))

User: I am not happy, but why?
bot:   You have forgotten your true self and gotten lost in the world of
material objects. To find happiness again, focus on your inner Self and let go
of attachments to external things. Remember that all things are temporary and
changing, so it's important to maintain equanimity and balance in life.

Time elapsed: 9 s
User: quit
bot:   Refrain from doing something.

Time elapsed: 2 s
