# **Import necessary libraries**

In [1]:
# Import necessary libraries
from dotenv import load_dotenv
import os
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
from langchain_text_splitters import Language, RecursiveCharacterTextSplitter
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames
from langchain_ibm import WatsonxEmbeddings
from langchain.vectorstores import Chroma
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from langchain_ibm import WatsonxLLM
from langchain.chains import RetrievalQA
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

True

# **Task 1: Load document using LangChain for different sources**

In [3]:
# PDF URL
pdf_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/WgM1DaUn2SYPcCg_It57tA/A-Comprehensive-Review-of-Low-Rank-Adaptation-in-Large-Language-Models-for-Efficient-Parameter-Tuning-1.pdf"

In [4]:
# Load the PDF Loader
loader = PyMuPDFLoader(pdf_url)
data = loader.load()
data

[Document(metadata={'source': 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/WgM1DaUn2SYPcCg_It57tA/A-Comprehensive-Review-of-Low-Rank-Adaptation-in-Large-Language-Models-for-Efficient-Parameter-Tuning-1.pdf', 'file_path': 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/WgM1DaUn2SYPcCg_It57tA/A-Comprehensive-Review-of-Low-Rank-Adaptation-in-Large-Language-Models-for-Efficient-Parameter-Tuning-1.pdf', 'page': 0, 'total_pages': 11, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'TeX', 'producer': 'pdfTeX-1.40.26', 'creationDate': 'D:20240910215042Z', 'modDate': 'D:20240910215042Z', 'trapped': ''}, page_content='A Comprehensive Review of Low-Rank\nAdaptation in Large Language Models for\nEfficient Parameter Tuning\nSeptember 10, 2024\nAbstract\nNatural Language Processing (NLP) often involves pre-training large\nmodels on extensive datasets and then adapting them for specific tasks\nthrough fine-tuning. Ho

In [5]:
# Print number of pages
len(data)

11

In [6]:
# Print the contents of very first page
print(data[0].page_content)

A Comprehensive Review of Low-Rank
Adaptation in Large Language Models for
Efficient Parameter Tuning
September 10, 2024
Abstract
Natural Language Processing (NLP) often involves pre-training large
models on extensive datasets and then adapting them for specific tasks
through fine-tuning. However, as these models grow larger, like GPT-3
with 175 billion parameters, fully fine-tuning them becomes computa-
tionally expensive. We propose a novel method called LoRA (Low-Rank
Adaptation) that significantly reduces the overhead by freezing the orig-
inal model weights and only training small rank decomposition matrices.
This leads to up to 10,000 times fewer trainable parameters and reduces
GPU memory usage by three times. LoRA not only maintains but some-
times surpasses fine-tuning performance on models like RoBERTa, De-
BERTa, GPT-2, and GPT-3.
Unlike other methods, LoRA introduces
no extra latency during inference, making it more efficient for practical
applications.
All relevant code an

In [7]:
# Print first 1000 characters of the pdf
print(data[0].page_content[:1000])

A Comprehensive Review of Low-Rank
Adaptation in Large Language Models for
Efficient Parameter Tuning
September 10, 2024
Abstract
Natural Language Processing (NLP) often involves pre-training large
models on extensive datasets and then adapting them for specific tasks
through fine-tuning. However, as these models grow larger, like GPT-3
with 175 billion parameters, fully fine-tuning them becomes computa-
tionally expensive. We propose a novel method called LoRA (Low-Rank
Adaptation) that significantly reduces the overhead by freezing the orig-
inal model weights and only training small rank decomposition matrices.
This leads to up to 10,000 times fewer trainable parameters and reduces
GPU memory usage by three times. LoRA not only maintains but some-
times surpasses fine-tuning performance on models like RoBERTa, De-
BERTa, GPT-2, and GPT-3.
Unlike other methods, LoRA introduces
no extra latency during inference, making it more efficient for practical
applications.
All relevant code an

## SCR

In [8]:
# PDF URL
pdf_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/WgM1DaUn2SYPcCg_It57tA/A-Comprehensive-Review-of-Low-Rank-Adaptation-in-Large-Language-Models-for-Efficient-Parameter-Tuning-1.pdf"

# Load the PDF Loader
loader = PyMuPDFLoader(pdf_url)
data = loader.load()

# Print first 1000 characters of the pdf
print(data[0].page_content[:1000])

A Comprehensive Review of Low-Rank
Adaptation in Large Language Models for
Efficient Parameter Tuning
September 10, 2024
Abstract
Natural Language Processing (NLP) often involves pre-training large
models on extensive datasets and then adapting them for specific tasks
through fine-tuning. However, as these models grow larger, like GPT-3
with 175 billion parameters, fully fine-tuning them becomes computa-
tionally expensive. We propose a novel method called LoRA (Low-Rank
Adaptation) that significantly reduces the overhead by freezing the orig-
inal model weights and only training small rank decomposition matrices.
This leads to up to 10,000 times fewer trainable parameters and reduces
GPU memory usage by three times. LoRA not only maintains but some-
times surpasses fine-tuning performance on models like RoBERTa, De-
BERTa, GPT-2, and GPT-3.
Unlike other methods, LoRA introduces
no extra latency during inference, making it more efficient for practical
applications.
All relevant code an

# **Task 2: Apply text splitting techniques**

In [9]:
[e.value for e in Language]

['cpp',
 'go',
 'java',
 'kotlin',
 'js',
 'ts',
 'php',
 'proto',
 'python',
 'rst',
 'ruby',
 'rust',
 'scala',
 'swift',
 'markdown',
 'latex',
 'html',
 'sol',
 'csharp',
 'cobol',
 'c',
 'lua',
 'perl',
 'haskell',
 'elixir',
 'powershell']

In [10]:
# LATEX code
latex_text = """
\documentclass{article}

\begin{document}

\maketitle

\section{Introduction}
Large language models (LLMs) are a type of machine learning model that can be trained on vast amounts of text data to generate human-like language. In recent years, LLMs have made significant advances in various natural language processing tasks, including language translation, text generation, and sentiment analysis.

\subsection{History of LLMs}
The earliest LLMs were developed in the 1980s and 1990s, but they were limited by the amount of data that could be processed and the computational power available at the time. In the past decade, however, advances in hardware and software have made it possible to train LLMs on massive datasets, leading to significant improvements in performance.

\subsection{Applications of LLMs}
LLMs have many applications in the industry, including chatbots, content creation, and virtual assistants. They can also be used in academia for research in linguistics, psychology, and computational linguistics.

\end{document}
"""

In [11]:
# Latex code splitter
latex_splitter = RecursiveCharacterTextSplitter.from_language(language=Language.LATEX, chunk_size = 60, chunk_overlap=0)
latex_docs = latex_splitter.create_documents([latex_text])
latex_docs

[Document(page_content='\\documentclass{article}\n\n\x08egin{document}\n\n\\maketitle'),
 Document(page_content='\\section{Introduction}\nLarge language models (LLMs) are a'),
 Document(page_content='type of machine learning model that can be trained on vast'),
 Document(page_content='amounts of text data to generate human-like language. In'),
 Document(page_content='recent years, LLMs have made significant advances in'),
 Document(page_content='various natural language processing tasks, including'),
 Document(page_content='language translation, text generation, and sentiment'),
 Document(page_content='analysis.'),
 Document(page_content='\\subsection{History of LLMs}\nThe earliest LLMs were'),
 Document(page_content='developed in the 1980s and 1990s, but they were limited by'),
 Document(page_content='the amount of data that could be processed and the'),
 Document(page_content='computational power available at the time. In the past'),
 Document(page_content='decade, however, advances 

# **Task 3: Embed documents**

In [12]:
embed_params = {
    EmbedTextParamsMetaNames.TRUNCATE_INPUT_TOKENS: 3,
    EmbedTextParamsMetaNames.RETURN_OPTIONS: {"input_text": True},
}

watsonx_embedding = WatsonxEmbeddings(
    model_id="ibm/slate-125m-english-rtrvr",
    url="https://us-south.ml.cloud.ibm.com",
    project_id=os.getenv("PROJECT_ID"),
    params=embed_params,
    apikey=os.getenv("API_KEY")
)

In [13]:
query = "How are you?"
query_result = watsonx_embedding.embed_query(query)
query_result[:5]

[-0.06722454, -0.023729993, 0.017487843, -0.013195328, -0.039584607]

In [14]:
len(query_result)

768

# **Task 4: Create and configure vector databases to store embeddings**

In [15]:
# Load the Text Loader
txt_loader = TextLoader("new-Policies.txt")
txt_data = txt_loader.load()
print(txt_data[0].page_content)

1. Code of Conduct

Our Code of Conduct establishes the core values and ethical standards that all members of our organization must adhere to. We are committed to fostering a workplace characterized by integrity, respect, and accountability.

Integrity: We commit to the highest ethical standards by being honest and transparent in all our dealings, whether with colleagues, clients, or the community. We protect sensitive information and avoid conflicts of interest.

Respect: We value diversity and every individual's contribution. Discrimination, harassment, or any form of disrespect is not tolerated. We promote an inclusive environment where differences are respected, and everyone is treated with dignity.

Accountability: We are responsible for our actions and decisions, complying with all relevant laws and regulations. We aim for continuous improvement and report any breaches of this code, supporting investigations into such matters.

Safety: We prioritize the safety of our employees, c

In [16]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len
)

chunks = text_splitter.split_documents(txt_data)
chunks

[Document(metadata={'source': 'new-Policies.txt'}, page_content='1. Code of Conduct'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='Our Code of Conduct establishes the core values and ethical standards that all members of our'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='all members of our organization must adhere to. We are committed to fostering a workplace'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='a workplace characterized by integrity, respect, and accountability.'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='Integrity: We commit to the highest ethical standards by being honest and transparent in all our'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='in all our dealings, whether with colleagues, clients, or the community. We protect sensitive'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='protect sensitive information and avoid conflicts of interest.')

In [17]:
len(chunks)

92

In [18]:
ids = [str(i) for i in range(0, len(chunks))]

In [19]:
vectordb = Chroma.from_documents(chunks, watsonx_embedding, ids=ids)
vectordb._collection.count()

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


92

In [20]:
for i in range(3):
    print(vectordb._collection.get(ids=str(i)))

Failed to send telemetry event CollectionGetEvent: capture() takes 1 positional argument but 3 were given


{'ids': ['0'], 'embeddings': None, 'metadatas': [{'source': 'new-Policies.txt'}], 'documents': ['1. Code of Conduct'], 'uris': None, 'data': None}
{'ids': ['1'], 'embeddings': None, 'metadatas': [{'source': 'new-Policies.txt'}], 'documents': ['Our Code of Conduct establishes the core values and ethical standards that all members of our'], 'uris': None, 'data': None}
{'ids': ['2'], 'embeddings': None, 'metadatas': [{'source': 'new-Policies.txt'}], 'documents': ['all members of our organization must adhere to. We are committed to fostering a workplace'], 'uris': None, 'data': None}


In [21]:
query = "Smoking Policy"
docs = vectordb.similarity_search(query, k=5)
docs

Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


[Document(metadata={'source': 'new-Policies.txt'}, page_content='This policy encourages the responsible use of mobile devices in line with legal and ethical'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='This policy promotes the safe and responsible use of digital communication tools in line with our'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='This policy lays the foundation for a diverse, inclusive, and talented workforce. It ensures that'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='Environmental Responsibility: We strive to reduce our environmental impact and promote sustainable'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='guidelines. The policy is regularly reviewed to stay current with evolving technology and security')]

In [22]:
ids = vectordb.get()["ids"]
vectordb.delete(ids)

Failed to send telemetry event CollectionDeleteEvent: capture() takes 1 positional argument but 3 were given


## SCR

In [23]:
# Load the Text Loader
txt_loader = TextLoader("new-Policies.txt")
txt_data = txt_loader.load()

# Text Splitting
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len
)

chunks = text_splitter.split_documents(txt_data)

# Create Chroma Vector Database
vectordb = Chroma.from_documents(chunks, watsonx_embedding)

# Conduct a similarity search and retrieve top 5 results
query = "Smoking policy"
docs = vectordb.similarity_search(query, k=5)
docs


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


[Document(metadata={'source': 'new-Policies.txt'}, page_content='This policy encourages the responsible use of mobile devices in line with legal and ethical'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='This policy promotes the safe and responsible use of digital communication tools in line with our'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='This policy lays the foundation for a diverse, inclusive, and talented workforce. It ensures that'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='guidelines. The policy is regularly reviewed to stay current with evolving technology and security'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='We encourage a culture of safety, including reporting any unsafe practices or conditions.')]

# **Task 5: Develop a retriever to fetch document segments based on queries**

In [24]:
retriever = vectordb.as_retriever(search_kwargs={"k":2})
query = "Email policy"
docs = retriever.invoke(query)
docs

[Document(metadata={'source': 'new-Policies.txt'}, page_content='and email use, including copyright and data protection laws.'),
 Document(metadata={'source': 'new-Policies.txt'}, page_content='This policy encourages the responsible use of mobile devices in line with legal and ethical')]

# **Task 6: Construct a QA Bot that leverages the LangChain and LLM to answer questions**

In [25]:
def llm_model(model_id):
    parameters = {
        GenParams.MAX_NEW_TOKENS: 256,  # this controls the maximum number of tokens in the generated output
        GenParams.TEMPERATURE: 0.5, # this randomness or creativity of the model's responses
    }
    
    credentials = {
        "url": "https://us-south.ml.cloud.ibm.com",
        "api_key": os.getenv("API_KEY")
    }
    
    project_id = os.getenv("PROJECT_ID")
    
    model = ModelInference(
        model_id=model_id,
        params=parameters,
        credentials=credentials,
        project_id=project_id
    )
    
    llm = WatsonxLLM(watsonx_model = model)
    
    return llm

In [26]:
mixtral_llm = llm_model("mistralai/mistral-large")



In [27]:
loaded_doc = ""
glob_vectordb = None

In [28]:
def retriever_qa(file, query):

    global loaded_doc
    
    if file.name != loaded_doc:

        ### print("New file detected")

        loaded_doc = file.name
        
        ### Load the PDF Loader
        loader = PyMuPDFLoader(file)
        data = loader.load()

        ### Text Splitting
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=50,
            length_function=len
        )

        chunks = text_splitter.split_documents(data)

        ids = [str(i) for i in range(0, len(chunks))]

        ### Create Chroma Vector Database
        vectordb1 = Chroma.from_documents(chunks, watsonx_embedding, ids=ids)

        global glob_vectordb 
        glob_vectordb = vectordb1

        ### print(glob_vectordb._collection.count())

    ## QA Bot
    qa = RetrievalQA.from_chain_type(
        llm = mixtral_llm,
        chain_type = "stuff",
        retriever = glob_vectordb.as_retriever(),
        return_source_documents = False
    )

    ## Generated output
    response = qa.invoke(query)["result"]

    return response

In [None]:
# Gradio interface
rag_application = gr.Interface(
    fn=retriever_qa,
    allow_flagging="never",
    inputs=[
        gr.File(label="Upload PDF File", file_count="single", file_types=['.pdf'], type="filepath"),
        gr.Textbox(label="Input Query", lines=2, placeholder="Type your question here...")
    ],
    outputs=gr.Textbox(label="Output"),
    title="AI RAG Assistant",
    description="Upload a PDF document and ask any question. The chatbot will try to answer using the provided document."
)

rag_application.launch(server_name="0.0.0.0", server_port= 7860)

Running on local URL:  http://0.0.0.0:7860


--------



To create a public link, set `share=True` in `launch()`.




Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
