<a href="https://colab.research.google.com/github/huishingchong/agile_llm/blob/main/pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RAG Pipeline

## Set up
### Import Packages and API keys

In [7]:
!pip install transformers datasets torch langchain-community faiss-cpu sentence-transformers langchain gradio


Collecting langchain
  Downloading langchain-0.1.9-py3-none-any.whl (816 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m817.0/817.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: langchain
Successfully installed langchain-0.1.9


In [3]:
# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token

from getpass import getpass

HUGGINGFACEHUB_API_TOKEN = getpass()

··········


In [4]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

### Model selection

In [26]:
from langchain_community.llms import HuggingFaceHub
from langchain_community.llms import HuggingFaceEndpoint
# llm = HuggingFaceHub(repo_id=model_name, model_kwargs={"temperature":0.5, "max_length":1024, "max_new_tokens":200})
model_name = "tiiuae/falcon-7b-instruct"
llm = HuggingFaceEndpoint(
    repo_id=model_name,
    model=model_name,
    task="text-generation",
    temperature=0.5,
    # max_length:1024,
    max_new_tokens=200
)
# llm = HuggingFaceHub(repo_id=model_name, model_kwargs={"temperature":0.5, "max_length":1024, "max_new_tokens":200})


ValidationError: 1 validation error for HuggingFaceEndpoint
__root__
  Please specify an `endpoint_url` or `repo_id` for the model. (type=value_error)

## RAG from synthetic data set

### Set up embeddings and documents to retrieve from

In [9]:
# Use langchain packages to help with implementing retrieval augmentation generation
from datasets import load_dataset
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from sentence_transformers import SentenceTransformer
# from langchain_core.prompts import ChatPromptTemplate
# from langchain_core.runnables import RunnablePassthrough
# from langchain_core.output_parsers import StrOutputParser

# Step 1: Load data for retrieval
loader = CSVLoader(file_path="rag_sample.csv")
documents = loader.load()

# Step 2: Split Documents
text_split = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
# text_split = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
d = text_split.split_documents(documents)

# Step 3: Create a FAISS Index
modelPath = "sentence-transformers/gtr-t5-base" # Use a t5 sentence transformer model that maps sentences & paragraphs to a 768 dimensional dense vector space
model_kwargs = {'device':'cpu'}
encode_kwargs = {'normalize_embeddings': True} # Normalizing embeddings can help improve similarity metrics by ensuring that embeddings magnitude does not affect the similarity scores

# Initialize an instance of HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

db = FAISS.from_documents(d, embedding=embeddings)

modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/1.86k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/219M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


tokenizer_config.json:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

2_Dense/config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

In [10]:
# just checking
r = db.as_retriever()
docs = r.get_relevant_documents("What skills should an AI solution analyst have?")
print(docs[0].page_content)

Prompt: AI solutions analyst
Output: Job Description:
As an AI Solutions Analyst, you will play a crucial role in driving organizational transformations for medium- and large-scale businesses by documenting, analyzing, and improving business processes. You will work within projects to map as-is processes to to-be processes, aligning them with the future operating model. Acting as a liaison between clients and project teams, you will coordinate and collaborate with stakeholders during workshops and contribute to the design and support of ongoing solutions post-transition. Additionally, you will collaborate with teammates on the analysis and design of complex business applications using the latest technologies, ensuring successful delivery of business solutions.
Responsibilities:
Document and analyze as-is processes (functional specs and user stories) and make recommendations for improvement by mapping to-be business processes aligned with the future operating model.


### Specify model: Falcon

In [None]:
from langchain_community.llms import HuggingFaceHub
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.prompts import PromptTemplate

model_name = "tiiuae/falcon-7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)
# llm = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)

#Define a text generation pipeline using the model and tokenizer
# hf = pipeline(
#     "text-generation",
#     model=model_name,
#     tokenizer=tokenizer,
#     max_new_tokens = 200,
#     temperature = 0.1,
#     eos_token_id=tokenizer.eos_token_id,
#     do_sample=True
# )
# llm = HuggingFacePipeline(pipeline=hf)
llm = HuggingFaceHub(repo_id=model_name, model_kwargs={"temperature":0.5, "max_length":1024, "max_new_tokens":200})
# https://medium.com/international-school-of-ai-data-science/implementing-rag-with-langchain-and-hugging-face-28e3ea66c5f7

# TO DELETE CACHE: huggingface-cli delete-cache

RetrievalQA

In [11]:
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
Answer professionally, to the best of your ability, and where appropriate, in a Computer Science educational context.
Use the context and be specific as you can.
Context: {context}
Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])

In [12]:
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
# retriever = db.as_retriever(search_kwargs={"k": 2})
retriever = db.as_retriever()
qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True, chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}, verbose=True)

question = "What skills should an AI solution analyst have?"
result = qa.invoke({"query": question})
print(result.get("source_documents", []))
print(result)
# print(result["result"].split('\nHelpful Answer:')[1].strip())

# Need to be on all the documents?




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
[Document(page_content='Prompt: AI solutions analyst\nOutput: Job Description:\nAs an AI Solutions Analyst, you will play a crucial role in driving organizational transformations for medium- and large-scale businesses by documenting, analyzing, and improving business processes. You will work within projects to map as-is processes to to-be processes, aligning them with the future operating model. Acting as a liaison between clients and project teams, you will coordinate and collaborate with stakeholders during workshops and contribute to the design and support of ongoing solutions post-transition. Additionally, you will collaborate with teammates on the analysis and design of complex business applications using the latest technologies, ensuring successful delivery of business solutions.\nResponsibilities:\nDocument and analyze as-is processes (functional specs and user stories) and make recommendations for improvem

In [24]:
import gradio as gr

def chat_interface(textbox, chat):
    # result = qa.invoke({'query': textbox})
    # docs = result.get("source_documents", [])
    # result=qa({"input_documents": docs,"question": query})
    # retriever = db.as_retriever()
    # docs = retriever.get_relevant_documents(textbox)
    # context = docs[0].page_content

    input_dict = {'query': textbox}
    result = qa.invoke(input_dict)
    print(result)
    text = result['result']
    # answer = text.split('\nHelpful Answer:')[1].strip()
    return text

# gr.Interface(
#     fn=chat_interface,
#     inputs=[gr.Textbox(placeholder="Ask me a question", label="Question", lines=5, type="text"),
#         gr.UploadButton(label="Upload File", file_types=[".pdf", ".csv", ".doc"])],
#     outputs=gr.Chatbot(height=300),
#     title="Chatbot",
#     description="Ask Chatbot any question",
#     theme="soft",
#     examples=[
#         ["What does AI stand for?"],
#         ["What is Software Engineering?"],
#         ["What is Cybersecurity?"]
#     ],
#     cache_examples=False,
# ).launch()

gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch(debug=True)

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://a211ce8df700dd0eda.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
{'query': 'How many rows are in the csv file?', 'result': '\nThe number of rows in the CSV file can be determined by opening the file in a text editor and counting the number of lines. Alternatively, you can use the Python `len()` function to determine the number of rows in the file.', 'source_documents': [Document(page_content='Prompt: Data Scientist\nOutput: Job Title: Data Scientist\n\nJob Description:\nWe are seeking a talented Data Scientist to join our team. The ideal candidate will be responsible for designing, implementing, and deploying data-driven solutions to extract insights from large datasets. You will work closely with cross-functional teams to identify business opportunities and provide actionable recommendations. The role involves conducting statistical analysis, developing machine learning models, and creating data visualizations to communicate findings effectively.\n\nResponsibilities:\n- Clean,



With some qa Chain

In [17]:
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import pipeline
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
Answer professionally, to the best of your ability, and where appropriate, in a Computer Science educational context.
Don't copy from the context.
Context: {context}
Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate(
        template=template, input_variables=["context", "question"]
    )
chain_type_kwargs = {"questionPrompt": QA_CHAIN_PROMPT}

qa_chain = load_qa_chain(llm, chain_type="stuff", verbose=True)


In [23]:
import gradio as gr

def chat_interface(textbox, chat):
    # retriever = db.as_retriever()
    docs = retriever.get_relevant_documents(textbox)
    # retriever = db.similarity_search(textbox)

    # query = f"{context}\nQuestion: {textbox}\nHelpful Answer:"
    input_dict = {'question': textbox, 'input_documents': d}
    result = qa_chain.invoke(input_dict, return_only_outputs=True)
    print(result)
    text = result['output_text']
    # answer = text.split('\nHelpful Answer:')[1].strip()
    return text

gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch(debug=True)

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://324612e13f85ae3873.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


ReadTimeout: The read operation timed out

## Evaluation

In [None]:
# RAG evaluation

# Load standardized test set
    # IT Consultant, Cloud Engineer...

# ROGUE?
# F1 Score: common between prediction and gold output
from langchain.evaluation import load_evaluator, EvaluatorType




### T5

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain import HuggingFacePipeline
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

pipe = pipeline(
    "text2text-generation",
    model=model_name,
    tokenizer=tokenizer,
    max_new_tokens = 200,
    temperature = 0.1,
    eos_token_id=tokenizer.eos_token_id,
    do_sample=True
)
llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [None]:
from langchain.chains import RetrievalQA
retriever = db.as_retriever(search_kwargs={"k": 2})

# qa = RetrievalQA.from_chain_type(
#   llm=llm,
#   chain_type="refine",
#   retriever=retriever,
#   return_source_documents=True
# )
qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=False, chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})
print(qa)

question = "What skills should an AI solution analyst have?"
result = qa({"query": question})
print(result["result"])

In [None]:
import gradio as gr

def chat_interface(textbox, chat):
    retriever = db.as_retriever()
    docs = retriever.get_relevant_documents(textbox)
    context = docs[0].page_content

    # query = f"{context}\nQuestion: {textbox}\nHelpful Answer:"
    input_dict = {'question': textbox, 'context': context}
    result = qa.invoke(input_dict)
    print(result)
    text = result['text']
    answer = text.split('\nHelpful Answer:')[1].strip()
    return answer

gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()

In [None]:
import gradio as gr

def chat_interface(textbox, chat):
    # retriever = db.as_retriever()
    # docs = r.get_relevant_documents(textbox)
    # context_str = docs[0].page_content

    # input_dict = {'question': textbox, 'context_str': context_str}
    # docs = retriever.get_relevant_documents(textbox)

    # query = f"{context}\nQuestion: {textbox}\nHelpful Answer:"
    result = qa({'query': textbox})
    docs = result.get("source_documents", [])
    print(docs)
    return result["result"]

gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()

In [None]:
# MAYBE DO THIS FIRST? AND SEE THE DOWNSIDE, AND LEARN THAT IT IS NOT REQUIRED (doesn't solve hallucinations and timely context!)
# Fine-tune with input and output example data sets

# Compare with different models (one fine-tuned one just pre-trained)

## Full adapted model (combined of all approaches)

In [None]:
# Knowledge retrieved
# Augmented Prompt
# Fine-tuned/pre-trained LLM

import gradio as gr
def chat_interface(textbox, chat):
    # docs = db.similarity_search(textbox)
    # input_dict = {'question': textbox, 'input_documents': docs }
    input_dict = {'question': textbox}
    response = llm_chain.run(input_dict)

    print("user:", textbox)
    print("bot:", response)
    return response

gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()