# RAG Pipeline

## Set up
### Import Packages and API keys 

In [None]:
# !pip install transformers datasets torch langchain-community faiss-cpu sentence-transformers
from getpass import getpass
from dotenv import load_dotenv
import os
from pathlib import Path

env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

huggingface_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')

if not huggingface_api_token:
    huggingface_api_token = getpass("Enter your Hugging Face Hub API token: ")

### Model selection

In [None]:
from langchain_community.llms import HuggingFaceHub
from transformers import AutoTokenizer, AutoModelForCausalLM

# tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
# model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-7b-instruct")

# I will be using T5 model from open source huggingface library
# model_name = "mistralai/Mistral-7B-v0.1"
# model_name = "mistralai/Mistral-7B-Instruct-v0.1"

# model_name = "tiiuae/falcon-7b-instruct"
model_name = "tiiuae/falcon-7b"

# model_name = "meta-llama/Llama-2-7b"

llm = HuggingFaceHub(repo_id=model_name, model_kwargs={"temperature":0.5, "max_length":1024, "max_new_tokens":200})

## Template-based Prompting

In [None]:
# I will be using Langchain

from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, ConversationalRetrievalChain
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import pipeline

template= """
Please answer the question.
Question: {question}
Response:
"""

prompt = PromptTemplate(template=template, input_variables=["question"])
# # llm_chain = load_qa_chain(llm, chain_type="stuff")
llm_chain = LLMChain(prompt=prompt, llm=llm)


### Chat Interface

In [None]:
import gradio as gr
# def chat_interface(textbox, chat):
#     input_dict = {'question': textbox}
#     response = llm_chain.run(input_dict)

#     print("user:", textbox)
#     print("bot:", response)
#     return response

def chat_interface(textbox, chat):
    input_dict = {'question': textbox}
    response_dict = llm_chain.invoke(input_dict)
    text = response_dict['text']  # Extract the text from the dictionary
    # Split the text based on "Response:" and extract the part after it
    # if "Response:" in text:
    response_text = text.split("Response:")[1].strip()
    print(text)
    return response_text

gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()

#I am a final year Computer Science student seeking to find a graduate role in __. What are practical skills required for a career in __?
#I am a beginner that wants to get into __, where should I start?


### Evaluation
Evaluating the model with prompting

In [None]:
# Load standardized test set
    # IT Consultant, Cloud Engineer...

# ROGUE? BLUE?

## RAG from synthetic data set

### Set up embeddings and documents to retrieve from

In [1]:
# Use langchain packages to help with implementing retrieval augmentation generation
from datasets import load_dataset
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from sentence_transformers import SentenceTransformer
# from langchain_core.prompts import ChatPromptTemplate
# from langchain_core.runnables import RunnablePassthrough
# from langchain_core.output_parsers import StrOutputParser

loader = CSVLoader(file_path="rag_sample.csv")
documents = loader.load()  # Load data for retrieval

# Step 2: Split Documents
text_split = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
# text_split = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
d = text_split.split_documents(documents)

# Step 3: Create a FAISS Index
modelPath = "sentence-transformers/gtr-t5-base" # Use a t5 sentence transformer model that maps sentences & paragraphs to a 768 dimensional dense vector space
model_kwargs = {'device':'cpu'}
encode_kwargs = {'normalize_embeddings': False}

# Initialize an instance of HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

db = FAISS.from_documents(d, embedding=embeddings)

# just checking
r = db.as_retriever()
docs = r.get_relevant_documents("What skills should an AI solution analyst have?")
print(docs[0].page_content)

  from .autonotebook import tqdm as notebook_tqdm
  return self.fget.__get__(instance, owner)()


Prompt: AI solutions analyst
Output: Job Description:
As an AI Solutions Analyst, you will play a crucial role in driving organizational transformations for medium- and large-scale businesses by documenting, analyzing, and improving business processes. You will work within projects to map as-is processes to to-be processes, aligning them with the future operating model. Acting as a liaison between clients and project teams, you will coordinate and collaborate with stakeholders during workshops and contribute to the design and support of ongoing solutions post-transition. Additionally, you will collaborate with teammates on the analysis and design of complex business applications using the latest technologies, ensuring successful delivery of business solutions.
Responsibilities:
Document and analyze as-is processes (functional specs and user stories) and make recommendations for improvement by mapping to-be business processes aligned with the future operating model.


### Falcon

In [2]:
from langchain_community.llms import HuggingFaceHub
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.prompts import PromptTemplate
# RAG in model
# template= """
# Try to be helpful as you can in a Computer Science context.
# Question: {question}
# Response:
# """
# prompt = PromptTemplate.from_template(template)
# model_name = "tiiuae/falcon-7b-instruct"
model_name = "tiiuae/falcon-7b"

tokenizer = AutoTokenizer.from_pretrained(model_name)
# llm = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)

#Define a text generation pipeline using the model and tokenizer
# hf = pipeline(
#     "text-generation", 
#     model=model_name, 
#     tokenizer=tokenizer,
#     max_new_tokens = 200,
#     temperature = 0.1,
#     eos_token_id=tokenizer.eos_token_id,
#     do_sample=True
# )
# llm = HuggingFacePipeline(pipeline=hf)
llm = HuggingFaceHub(repo_id=model_name, model_kwargs={"temperature":0.5, "max_length":1024, "max_new_tokens":200})
# https://medium.com/international-school-of-ai-data-science/implementing-rag-with-langchain-and-hugging-face-28e3ea66c5f7

# TO DELETE CACHE: huggingface-cli delete-cache

RetrievalQA

In [None]:
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [None]:
from langchain.chains import RetrievalQA
# retriever = db.as_retriever(search_kwargs={"k": 2})
retriever = db.as_retriever()
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="map_reduce", retriever=retriever, return_source_documents=False)
#combine_documents_chain=RefineDocumentsChain(initial_llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['context_str', 'question'], template='Context information is below. \n------------\n{context_str}\n------------\nGiven the context information and not prior knowledge, answer the question: {question}\n'), llm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x2a1c4f410>, model_kwargs={'temperature': 0.7, 'max_length': 512, 'max_new_tokens': 10})), refine_llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['context_str', 'existing_answer', 'question'], template="The original question is as follows: {question}\nWe have provided an existing answer: {existing_answer}\nWe have the opportunity to refine the existing answer (only if needed) with some more context below.\n------------\n{context_str}\n------------\nGiven the new context, refine the original answer to better answer the question. If the context isn't useful, return the original answer."), llm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x2a1c4f410>, model_kwargs={'temperature': 0.7, 'max_length': 512, 'max_new_tokens': 10})), document_variable_name='context_str', initial_response_name='existing_answer') retriever=VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x28a12a390>, search_kwargs={'k': 4})

question = "What skills should an AI solution analyst have?"
result = qa.invoke({"query": question})
print(result)

In [None]:
import gradio as gr

def chat_interface(textbox, chat):
    result = qa.invoke({'query': textbox})
    docs = result.get("source_documents", [])
    print(docs)
    return result["result"]
    
gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()

With LLMChain

In [3]:
from langchain.chains import LLMChain, ConversationalRetrievalChain
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import pipeline
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {context}
Question: {question}
Answer:"""

prompt = PromptTemplate(template=template, input_variables=["question", "context"])
# # llm_chain = load_qa_chain(llm, chain_type="stuff")
qa = LLMChain(prompt=prompt, llm=llm)

In [4]:
import gradio as gr

def chat_interface(textbox, chat):
    retriever = db.as_retriever()
    docs = retriever.get_relevant_documents(textbox)
    context = docs[0].page_content

    # query = f"{context}\nQuestion: {textbox}\nHelpful Answer:"
    input_dict = {'question': textbox, 'context': context}
    result = qa.invoke(input_dict)
    print(result)
    text = result['text']
    answer = text.split('\nAnswer:')[1].strip()
    return answer
    
gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




{'question': 'Hi, how are you doing?', 'context': 'Skills and qualifications requirements:\nBachelor’s degree in computer science, information technology or a related field\nProven experience with C#, JavaScript, Selenium and Playwright\nExcellent problem solving and analytical skills\nKnowledge and experience in the following would be desirable: CI/CD, Non-functional automated testing, Page object model and API mocking.', 'text': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\nSkills and qualifications requirements:\nBachelor’s degree in computer science, information technology or a related field\nProven experience with C#, JavaScript, Selenium and Playwright\nExcellent problem solving and analytical skills\nKnowledge and experience in the following would be desirable: CI/CD, Non-functional automated testing, Page object model and API mocking.\nQuestion: Hi, how are you

### T5

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain import HuggingFacePipeline
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

pipe = pipeline(
    "text2text-generation", 
    model=model_name, 
    tokenizer=tokenizer,
    max_new_tokens = 200,
    temperature = 0.1,
    eos_token_id=tokenizer.eos_token_id,
    do_sample=True
)
llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [None]:
from langchain.chains import RetrievalQA 
retriever = db.as_retriever(search_kwargs={"k": 2})

# qa = RetrievalQA.from_chain_type(   
#   llm=llm,   
#   chain_type="refine",
#   retriever=retriever, 
#   return_source_documents=True
# )
qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=False, chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})
print(qa)

question = "What skills should an AI solution analyst have?"
result = qa({"query": question})
print(result["result"])

In [None]:
import gradio as gr

def chat_interface(textbox, chat):
    retriever = db.as_retriever()
    docs = retriever.get_relevant_documents(textbox)
    context = docs[0].page_content

    # query = f"{context}\nQuestion: {textbox}\nHelpful Answer:"
    input_dict = {'question': textbox, 'context': context}
    result = qa.invoke(input_dict)
    print(result)
    text = result['text']
    answer = text.split('\nHelpful Answer:')[1].strip()
    return answer
    
gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()

In [None]:
import gradio as gr

def chat_interface(textbox, chat):
    # retriever = db.as_retriever()
    # docs = r.get_relevant_documents(textbox)
    # context_str = docs[0].page_content

    # input_dict = {'question': textbox, 'context_str': context_str}
    # docs = retriever.get_relevant_documents(textbox)

    # query = f"{context}\nQuestion: {textbox}\nHelpful Answer:"
    result = qa({'query': textbox})
    docs = result.get("source_documents", [])
    print(docs)
    return result["result"]

gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()

### Evaluation

## Fine Tuning

In [None]:
# MAYBE DO THIS FIRST? AND SEE THE DOWNSIDE, AND LEARN THAT IT IS NOT REQUIRED (doesn't solve hallucinations and timely context!)
# Fine-tune with input and output example data sets

# Compare with different models (one fine-tuned one just pre-trained)

### Evaluation

In [None]:
# Load test set
# Find that fine-tuning is not needed?

## Full adapted model (combined of all approaches)

In [None]:
# Knowledge retrieved
# Augmented Prompt
# Fine-tuned/pre-trained LLM

import gradio as gr
def chat_interface(textbox, chat):
    # docs = db.similarity_search(textbox)
    # input_dict = {'question': textbox, 'input_documents': docs }
    input_dict = {'question': textbox}
    response = llm_chain.run(input_dict)

    print("user:", textbox)
    print("bot:", response)
    return response

gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()