# Initialize Model

In [1]:
from package import embedding, llm, agent_llm

# Import lib

In [2]:
from langchain.document_loaders import PyPDFLoader, PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Qdrant
from operator import itemgetter
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda

# Setup Environment Variables

In [3]:
from dotenv import load_dotenv

# load and store you secret api key
load_dotenv()

True

# UDF

In [4]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

# Chunking

In [5]:
chunk_size = 1000
chunk_overlap = 0.1

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=int(chunk_size*chunk_overlap)
)

In [6]:
loader = PyMuPDFLoader("https://arxiv.org/pdf/2404.19553")
docs = loader.load_and_split(text_splitter)

In [7]:
len(docs)

16

# Create vectorstore obj

In [8]:
vectorstore = Qdrant.from_documents(
    documents=docs,
    embedding=embedding,
    location=":memory:",
    collection_name="extending_context_window_llama_3"
)

In [9]:
retriever = vectorstore.as_retriever()

# RAG-LLM

## Define Prompt

In [10]:
RAG_TEMPLATE = """\
You are a helpful assistant. Use the available context to answer the question.
If you can't answer the question, say "I don't have enough information" and don't make your own answer.
Discard irrelavant information.

CONTEXT:
{context}

QUESTION:
{question}
"""

rag_prompt = PromptTemplate.from_template(RAG_TEMPLATE)

## Define Chain with LCEL

In [11]:
llm_chain = (
    {"context": itemgetter("question") | retriever | format_docs, "question": itemgetter("question")}
    | rag_prompt
    | llm
)

## Debug

In [12]:
%%time
response = llm_chain.invoke({"question": "what does the 'context' in 'long context' refer to?"})

CPU times: total: 0 ns
Wall time: 15 s


In [13]:
print(response)

Based on the provided context, I can help you with that!

The "context" in "long context" refers to the maximum number of tokens (or words) that a language model can process or understand at once. In other words, it's the length of the input text that the model can handle before making predictions or generating output.

In this specific context, there are mentions of extending the context window beyond 2 million tokens ([8]), scaling language models to 128k context ([9]), and evaluating long-context evaluation beyond 100k tokens ([17]). These references suggest that "context" is indeed related to the maximum input length that a model can handle.


In [14]:
llm_chain = (
    {"context": RunnablePassthrough() | retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
)

In [15]:
%%time
response = llm_chain.invoke("what does the 'context' in 'long context' refer to?")

CPU times: total: 15.6 ms
Wall time: 15.3 s


In [16]:
print(response)

Based on the provided context, I can help you with that!

The "context" in "long context" refers to the maximum number of tokens (or words) that a language model can process or understand at once. In other words, it's the length of the input text that the model can handle before making predictions or generating output.

In this specific context, there are mentions of extending the context window beyond 2 million tokens ([8]), scaling language models to 128k context ([9]), and evaluating long-context evaluation beyond 100k tokens ([17]). These references suggest that "context" is indeed related to the maximum input length that a model can handle.


# Retriver method

In [17]:
dir(retriever)

['Config',
 'InputType',
 'OutputType',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__class_getitem__',
 '__class_vars__',
 '__config__',
 '__custom_root_type__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__exclude_fields__',
 '__fields__',
 '__fields_set__',
 '__format__',
 '__ge__',
 '__get_validators__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__include_fields__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__json_encoder__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__or__',
 '__orig_bases__',
 '__parameters__',
 '__post_root_validators__',
 '__pre_root_validators__',
 '__pretty__',
 '__private_attributes__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__repr_args__',
 '__repr_name__',
 '__repr_str__',
 '__rich_repr__',
 '__ror__',
 '__schema_cache__',
 '__setattr__',
 '__setstate__',
 '__signature__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__try_update_forward_ref

In [26]:
query = "what does the 'context' in 'long context' refer to?"

In [27]:
retriever.invoke(query, k=3)[0]

Document(page_content='[8] Y. Ding, L. L. Zhang, C. Zhang, Y. Xu, N. Shang, J. Xu, F. Yang, and M. Yang. Longrope:\nExtending llm context window beyond 2 million tokens, 2024.\n[9] Y. Fu, R. Panda, X. Niu, X. Yue, H. Hajishirzi, Y. Kim, and H. Peng. Data engineering for\nscaling language models to 128k context, 2024.\n[10] D. Hendrycks, C. Burns, S. Basart, A. Zou, M. Mazeika, D. Song, and J. Steinhardt. Measuring\nmassive multitask language understanding, 2021.\n[11] A. Q. Jiang, A. Sablayrolles, A. Mensch, C. Bamford, D. S. Chaplot, D. de las Casas, F. Bressand,\nG. Lengyel, G. Lample, L. Saulnier, L. R. Lavaud, M.-A. Lachaux, P. Stock, T. L. Scao, T. Lavril,\nT. Wang, T. Lacroix, and W. E. Sayed. Mistral 7b, 2023.\n[12] D. Li*, R. Shao*, A. Xie, Y. Sheng, L. Zheng, J. E. Gonzalez, I. Stoica, X. Ma, , and H. Zhang.\nHow long can open-source llms truly promise on context length?, June 2023.\n[13] OpenAI. Gpt-4 technical report, 2024.', metadata={'source': 'https://arxiv.org/pdf/2404.1

In [25]:
retriever.get_relevant_documents(query, k=3)[0]

Document(page_content='[8] Y. Ding, L. L. Zhang, C. Zhang, Y. Xu, N. Shang, J. Xu, F. Yang, and M. Yang. Longrope:\nExtending llm context window beyond 2 million tokens, 2024.\n[9] Y. Fu, R. Panda, X. Niu, X. Yue, H. Hajishirzi, Y. Kim, and H. Peng. Data engineering for\nscaling language models to 128k context, 2024.\n[10] D. Hendrycks, C. Burns, S. Basart, A. Zou, M. Mazeika, D. Song, and J. Steinhardt. Measuring\nmassive multitask language understanding, 2021.\n[11] A. Q. Jiang, A. Sablayrolles, A. Mensch, C. Bamford, D. S. Chaplot, D. de las Casas, F. Bressand,\nG. Lengyel, G. Lample, L. Saulnier, L. R. Lavaud, M.-A. Lachaux, P. Stock, T. L. Scao, T. Lavril,\nT. Wang, T. Lacroix, and W. E. Sayed. Mistral 7b, 2023.\n[12] D. Li*, R. Shao*, A. Xie, Y. Sheng, L. Zheng, J. E. Gonzalez, I. Stoica, X. Ma, , and H. Zhang.\nHow long can open-source llms truly promise on context length?, June 2023.\n[13] OpenAI. Gpt-4 technical report, 2024.', metadata={'source': 'https://arxiv.org/pdf/2404.1

In [19]:
dir(retriever.vectorstore)

['CONTENT_KEY',
 'METADATA_KEY',
 'VECTOR_NAME',
 '__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_aembed_query',
 '_aembed_texts',
 '_agenerate_rest_batches',
 '_asimilarity_search_with_relevance_scores',
 '_build_condition',
 '_build_payloads',
 '_cosine_relevance_score_fn',
 '_document_from_scored_point',
 '_embed_query',
 '_embed_texts',
 '_embeddings',
 '_embeddings_function',
 '_euclidean_relevance_score_fn',
 '_generate_clients',
 '_generate_rest_batches',
 '_get_retriever_tags',
 '_max_inner_product_relevance_score_fn',
 '_qdrant_filter_from_dict',
 '_select_relevance_score_fn',
 '_similarity_

In [23]:
retriever.vectorstore.similarity_search_with_score(query, k=3)[0]

(Document(page_content='[8] Y. Ding, L. L. Zhang, C. Zhang, Y. Xu, N. Shang, J. Xu, F. Yang, and M. Yang. Longrope:\nExtending llm context window beyond 2 million tokens, 2024.\n[9] Y. Fu, R. Panda, X. Niu, X. Yue, H. Hajishirzi, Y. Kim, and H. Peng. Data engineering for\nscaling language models to 128k context, 2024.\n[10] D. Hendrycks, C. Burns, S. Basart, A. Zou, M. Mazeika, D. Song, and J. Steinhardt. Measuring\nmassive multitask language understanding, 2021.\n[11] A. Q. Jiang, A. Sablayrolles, A. Mensch, C. Bamford, D. S. Chaplot, D. de las Casas, F. Bressand,\nG. Lengyel, G. Lample, L. Saulnier, L. R. Lavaud, M.-A. Lachaux, P. Stock, T. L. Scao, T. Lavril,\nT. Wang, T. Lacroix, and W. E. Sayed. Mistral 7b, 2023.\n[12] D. Li*, R. Shao*, A. Xie, Y. Sheng, L. Zheng, J. E. Gonzalez, I. Stoica, X. Ma, , and H. Zhang.\nHow long can open-source llms truly promise on context length?, June 2023.\n[13] OpenAI. Gpt-4 technical report, 2024.', metadata={'source': 'https://arxiv.org/pdf/2404.