In [1]:
## comment
## Agent + LLM + RAG + PromptTemplate 

In [2]:
## install
# !pip install langchain deeplake openai transformers sentencepiece sentence-transformers python-dotenv
# !pip install --upgrade openai
# /bin/bash: pip cache purge
# /bin/bash: conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
# /bin/bash: pip uninstall -y langchain openai
# /bin/bash: pip install langchain==0.0.316 openai==0.28.1
# /bin/bash: pip freeze > requirements.txt

In [3]:
## import
from dotenv import load_dotenv
load_dotenv()
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import DeepLake
from langchain.chains import RetrievalQA, LLMChain
from langchain.llms import HuggingFacePipeline
from langchain.schema import BaseRetriever, Document
from langchain import PromptTemplate
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer 
from typing import List
from langchain.agents import Tool, initialize_agent, AgentType
from langchain.chat_models import ChatOpenAI  # Use ChatOpenAI for chat-based models

In [4]:
## class, function, global-variable

## Custom Retriever that limits the number of documents returned
class CustomRetriever:
    def __init__(self, retriever, limit=1):
        self.retriever = retriever
        self.limit = limit

    def get_relevant_documents(self, query: str) -> List[Document]:
        try:
            # Retrieve documents and limit the number returned
            docs = self.retriever.get_relevant_documents(query)
            return docs[:self.limit]
        except ValueError as e:
            print(f"Error in CustomRetriever: {str(e)}")
            raise e

    async def aget_relevant_documents(self, query: str) -> List[Document]:
        try:
            # Asynchronous retrieval (if needed)
            docs = await self.retriever.aget_relevant_documents(query)
            return docs[:self.limit]
        except ValueError as e:
            print(f"Error in CustomRetriever: {str(e)}")
            raise e
    
## Open-source embeddings using SentenceTransformer
class CustomEmbeddings:
    def __init__(self):
        super(CustomEmbeddings, self).__init__()
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        
    def embed_documents(self, texts):
        return self.embedding_model.encode(texts, convert_to_tensor=False).tolist()

    def embed_query(self, text):
        return self.embedding_model.encode(text, convert_to_tensor=False).tolist()
    
## return context based on question, file resource, and limit
def get_context(question, filename, limit):
    ## documents
    loader = TextLoader(filename, encoding='utf-8')
    docs_from_file = loader.load()
    ## chunks
    text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)
    docs = text_splitter.split_documents(docs_from_file)
    ## embeddings
    embeddings = CustomEmbeddings()
    dataset_path = './my_deeplake_dataset'
    db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings, overwrite=True)
    db.add_documents(docs)
    ## retriever
    retriever = db.as_retriever()
    custom_retriever = CustomRetriever(retriever, limit=limit)
    ## context
    retrieved_docs = custom_retriever.get_relevant_documents(question)
    context = "\n".join([doc.page_content for doc in retrieved_docs])
    return context

In [5]:
## LLM, tokenizer, pipeline
try:
    print(model)
except Exception as e:
    model_name = "tiiuae/falcon-7b"
    cache_dir = "/scratch/tmp/"
    tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
    model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_dir)
    llm_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=1, max_length=512)  
# llm = HuggingFacePipeline(pipeline=llm_pipeline) ## open source
llm=ChatOpenAI(model_name='gpt-3.5-turbo') ## proprietary



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  return self.fget.__get__(instance, owner)()


In [6]:
## run: LLM + RAG + PromptTemplate

question = 'who is the ultimate boss?'
context = get_context(question, 'my_file.txt', 1) ## RAG
template = """
You are a helpful assistant. Use the following context to answer the question very very concisely.

Context: {context}

Question: {question}

Answer:
"""
prompt = PromptTemplate(input_variables=["context", "question"], template=template) ## prompt-engineering
llm_chain = LLMChain(llm=llm, prompt=prompt) ## llm
response = llm_chain.run({"context": context, "question": question})
print('#########################################################################')
print(response)
print('#########################################################################')

Created a chunk of size 302, which is longer than the specified 200
Created a chunk of size 545, which is longer than the specified 200
Created a chunk of size 548, which is longer than the specified 200
Using embedding function is deprecated and will be removed in the future. Please use embedding instead.
Creating 4 embeddings in 1 batches of size 4:: 100%|█████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s]


Dataset(path='./my_deeplake_dataset', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype     shape     dtype  compression
  -------    -------   -------   -------  ------- 
   text       text      (4, 1)     str     None   
 metadata     json      (4, 1)     str     None   
 embedding  embedding  (4, 384)  float32   None   
    id        text      (4, 1)     str     None   
#########################################################################
Meruem
#########################################################################


In [7]:
print('#########################################################################')
print(model)
print('#########################################################################')
print(model.transformer.word_embeddings)
print(model.transformer.h[0])
print(model.transformer.ln_f)
print(model.lm_head)
print('#########################################################################')

#########################################################################
FalconForCausalLM(
  (transformer): FalconModel(
    (word_embeddings): Embedding(65024, 4544)
    (h): ModuleList(
      (0-31): 32 x FalconDecoderLayer(
        (self_attention): FalconAttention(
          (rotary_emb): FalconRotaryEmbedding()
          (query_key_value): FalconLinear(in_features=4544, out_features=4672, bias=False)
          (dense): FalconLinear(in_features=4544, out_features=4544, bias=False)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (mlp): FalconMLP(
          (dense_h_to_4h): FalconLinear(in_features=4544, out_features=18176, bias=False)
          (act): GELUActivation()
          (dense_4h_to_h): FalconLinear(in_features=18176, out_features=4544, bias=False)
        )
        (input_layernorm): LayerNorm((4544,), eps=1e-05, elementwise_affine=True)
      )
    )
    (ln_f): LayerNorm((4544,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(

In [8]:
## run: Agent + LLM + RAG + PromptTemplate

question = 'who is the ultimate boss?'
context = get_context(question, 'my_file.txt', 1) ## RAG
template = """
You are a helpful assistant. Use the following context to answer the question very concisely.

Context: {context}

Question: {question}

Answer:
"""
prompt = PromptTemplate(input_variables=['context', 'question'], template=template)
llm_chain = LLMChain(llm=llm, prompt=prompt)  # LLM setup
tools = [
    Tool(
        name='tool_1',
        description='Use this tool to generate a response given the context and question.',
        func=lambda input: llm_chain.run({
            'context': input[0],
            'question': input[1]
        }),
    ),
]
agent = initialize_agent(
    tools,
    llm,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)
response = agent.run(input=[context, question])
print('#########################################################################')
print(response)
print('#########################################################################')

Created a chunk of size 302, which is longer than the specified 200
Created a chunk of size 545, which is longer than the specified 200
Created a chunk of size 548, which is longer than the specified 200
Using embedding function is deprecated and will be removed in the future. Please use embedding instead.
Creating 4 embeddings in 1 batches of size 4:: 100%|█████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 14.48it/s]


Dataset(path='./my_deeplake_dataset', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype     shape     dtype  compression
  -------    -------   -------   -------  ------- 
   text       text      (4, 1)     str     None   
 metadata     json      (4, 1)     str     None   
 embedding  embedding  (4, 384)  float32   None   
    id        text      (4, 1)     str     None   


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use tool_1 to generate a response based on the context and question.
Action: tool_1
Action Input: meruem is the ultimate boss.[0m
Observation: [36;1m[1;3mI'm sorry, but your question is unclear. Can you please provide more information or clarify what you are asking?[0m
Thought:[32;1m[1;3mI need to provide a more specific question about who the ultimate boss is.
Action: tool_1
Action Input: who is the ultimate boss?[0m
Observation: [36;1m[1;3mI'm sorry, but it seems like the question is incomplete. Can you please provide