Import necessary libraries

In [None]:
# Importing necessary libraries
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames
from ibm_watsonx_ai import Credentials
from langchain_ibm import WatsonxLLM, WatsonxEmbeddings

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA

import gradio as gr

# You can use this section to suppress warnings generated by your code:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

###LLM
Initialize the LLM by creating an instance of WatsonxLLM, a class in langchain_ibm. WatsonxLLM can use several underlying foundational models. In this project, Mixtral 8x7B is used.
The model is initialized with temperature= 0.5, and a maximum token generation of 256 tokens.

In [None]:
## LLM
def get_llm():
    model_id = 'mistralai/mixtral-8x7b-instruct-v01'
    parameters = {
        GenParams.MAX_NEW_TOKENS: 256,
        GenParams.TEMPERATURE: 0.5,
    }
    project_id = "skills-network"
    watsonx_llm = WatsonxLLM(
        model_id=model_id,
        url="https://us-south.ml.cloud.ibm.com",
        project_id=project_id,
        params=parameters,
    )
    return watsonx_llm

###Document loader  
Function to load the pdf file. PyPDFLoader is used in this project.

In [None]:
# Document loader function to load pdf file
def document_loader(file):
  loader= PyPDFLoader(file)
  loaded_doc= loader.load()
  return loaded_doc

In [None]:
# Loading a sample research document
file='/content/A_Comprehensive_Review_of_Low_Rank_Adaptation_in_Large_Language_Models_for_Efficient_Parameter_Tuning-1.pdf'
sample_doc= document_loader(file)

In [None]:
sample_doc # each page is a Document object

[Document(metadata={'source': '/content/A_Comprehensive_Review_of_Low_Rank_Adaptation_in_Large_Language_Models_for_Efficient_Parameter_Tuning-1.pdf', 'page': 0}, page_content='A Comprehensive Review of Low-Rank\nAdaptation in Large Language Models for\nEfficient Parameter Tuning\nSeptember 10, 2024\nAbstract\nNatural Language Processing (NLP) often involves pre-training large\nmodels on extensive datasets and then adapting them for specific tasks\nthrough fine-tuning. However, as these models grow larger, like GPT-3\nwith 175 billion parameters, fully fine-tuning them becomes computa-\ntionally expensive. We propose a novel method called LoRA (Low-Rank\nAdaptation) that significantly reduces the overhead by freezing the orig-\ninal model weights and only training small rank decomposition matrices.\nThis leads to up to 10,000 times fewer trainable parameters and reduces\nGPU memory usage by three times. LoRA not only maintains but some-\ntimes surpasses fine-tuning performance on models

In [None]:
# Printing first 1000 characters of the doc
print(sample_doc[0].page_content[:1000])

A Comprehensive Review of Low-Rank
Adaptation in Large Language Models for
Efficient Parameter Tuning
September 10, 2024
Abstract
Natural Language Processing (NLP) often involves pre-training large
models on extensive datasets and then adapting them for specific tasks
through fine-tuning. However, as these models grow larger, like GPT-3
with 175 billion parameters, fully fine-tuning them becomes computa-
tionally expensive. We propose a novel method called LoRA (Low-Rank
Adaptation) that significantly reduces the overhead by freezing the orig-
inal model weights and only training small rank decomposition matrices.
This leads to up to 10,000 times fewer trainable parameters and reduces
GPU memory usage by three times. LoRA not only maintains but some-
times surpasses fine-tuning performance on models like RoBERTa, De-
BERTa, GPT-2, and GPT-3. Unlike other methods, LoRA introduces
no extra latency during inference, making it more efficient for practical
applications. All relevant code an

In [None]:
# Doc is stored as a list. Each element in the list corresponds to a page in the doc
print(type(sample_doc))
print(sample_doc[0])

<class 'list'>
page_content='A Comprehensive Review of Low-Rank
Adaptation in Large Language Models for
Efficient Parameter Tuning
September 10, 2024
Abstract
Natural Language Processing (NLP) often involves pre-training large
models on extensive datasets and then adapting them for specific tasks
through fine-tuning. However, as these models grow larger, like GPT-3
with 175 billion parameters, fully fine-tuning them becomes computa-
tionally expensive. We propose a novel method called LoRA (Low-Rank
Adaptation) that significantly reduces the overhead by freezing the orig-
inal model weights and only training small rank decomposition matrices.
This leads to up to 10,000 times fewer trainable parameters and reduces
GPU memory usage by three times. LoRA not only maintains but some-
times surpasses fine-tuning performance on models like RoBERTa, De-
BERTa, GPT-2, and GPT-3. Unlike other methods, LoRA introduces
no extra latency during inference, making it more efficient for practical
appli

###Text Splitter
Function to split the loaded pdf file into chunks. RecursiveCharacterTextSplitter is used to split the doc into chunks.

In [None]:
# Text Splitter to chunk the loaded pdf file
def text_splitter(loaded_doc,chunk_size, chunk_overlap):
  doc_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.MARKDOWN, chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
  chunks = doc_splitter.split_documents(loaded_doc)
  return chunks

In [None]:
sample_chunks= text_splitter(sample_doc,250,20)

In [None]:
sample_chunks

[Document(metadata={'source': '/content/A_Comprehensive_Review_of_Low_Rank_Adaptation_in_Large_Language_Models_for_Efficient_Parameter_Tuning-1.pdf', 'page': 0}, page_content='A Comprehensive Review of Low-Rank\nAdaptation in Large Language Models for\nEfficient Parameter Tuning\nSeptember 10, 2024\nAbstract\nNatural Language Processing (NLP) often involves pre-training large'),
 Document(metadata={'source': '/content/A_Comprehensive_Review_of_Low_Rank_Adaptation_in_Large_Language_Models_for_Efficient_Parameter_Tuning-1.pdf', 'page': 0}, page_content='models on extensive datasets and then adapting them for specific tasks\nthrough fine-tuning. However, as these models grow larger, like GPT-3\nwith 175 billion parameters, fully fine-tuning them becomes computa-'),
 Document(metadata={'source': '/content/A_Comprehensive_Review_of_Low_Rank_Adaptation_in_Large_Language_Models_for_Efficient_Parameter_Tuning-1.pdf', 'page': 0}, page_content='tionally expensive. We propose a novel method calle

In [None]:
print(len(sample_chunks))
print(sample_chunks[10].page_content)
print(type(sample_chunks[10]))
print(type(sample_chunks))

124
model depth or reducing the usable sequence length. Furthermore, these meth-
ods typically do not perform as well as full fine-tuning, leading to a trade-off
between efficiency and model performance.
<class 'langchain_core.documents.base.Document'>
<class 'list'>


### Embedding Model
Embeddings are generated using IBM's Slate 125M English embeddings model.



In [None]:
## Embedding model
def watsonx_embedding():
    embed_params = {
        EmbedTextParamsMetaNames.TRUNCATE_INPUT_TOKENS: 3,
        EmbedTextParamsMetaNames.RETURN_OPTIONS: {"input_text": True},
    }
    watsonx_embedding = WatsonxEmbeddings(
        model_id="ibm/slate-125m-english-rtrvr",
        url="https://us-south.ml.cloud.ibm.com",
        project_id="skills-network",
        params=embed_params,
    )
    return watsonx_embedding

### Vector Store
Chroma vector store is used to store the generated vector embeddings

In [None]:
def vector_database(chunks):
    embedding_model = watsonx_embedding()
    vectordb = Chroma.from_documents(chunks, embedding_model)
    return vectordb

In [None]:
db = vector_database(sample_chunks)

In [None]:
print(len(db))
print(type(db))

124
<class 'langchain_community.vectorstores.chroma.Chroma'>


### Retriever
A retriever is an interface designed to return documents based on an unstructured query. Unlike a vector store, which stores and retrieves documents, a retriever's primary function is to find and return relevant documents. While vector stores can serve as the backbone of a retriever, there are various other types of retrievers that can be used as well.

Retrievers take a string `query` as input and output a list of `Documents`.


In [None]:
## Retriever
def retriever(file):
    splits = document_loader(file)
    chunks = text_splitter(splits)
    vectordb = vector_database(chunks)
    retriever = vectordb.as_retriever(search_type="similarity",search_kwargs={"k": 4})
    return retriever

###Question-Answering Chain
In this project, `RetrievalQA` from langchain, a chain that performs natural-language question-answering over a data source using retrieval-augmented generation (RAG), is used.

In [None]:
# RetrievalQA Chain
# LLM and retriever obj are taken by RetrievalQA
def retriever_qa(doc, query):
  llm= get_llm()
  retriever_obj= retriever(doc)
  qa= RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever= retriever_obj,
                                  return_source_documents=False)
  result= qa.invoke(query)
  return response['result']

### Set up the Gradio interface
The web interface is user customizable and will have the following:
*  A file upload functionality (provided by the `File` class in Gradio)
*  An input textbox where the question can be asked (provided by the `Textbox` class in Gradio)
* An output textbox where the question can be answered (provided by the `Textbox` class in Gradio)



In [None]:
# Create the Gradio interface
rag_application= gr.Interface(
    fn= retriever_qa,
    allow_flagging="never",
    inputs=[
    gr.File(label="Upload PDF File", file_count="single", file_types=['.pdf'], type="filepath"),  # Drag and drop file upload
        gr.Textbox(label="Input Query", lines=2, placeholder="Type your question here...")  # Input query box
    ],
    outputs= gr.Textbox(label="Summary of the Research Paper"), # Output response box
    title= "Scholar Synopsis AI",
    description="Upload a research paper. The assistant can summarize and answer any questions you have to help you grasp key ideas and insights regarding the paper."
)