# RAG Pipeline

## Set up
### Import Packages and API keys 

In [1]:
# !pip install transformers datasets torch pinecone-client langchain-community faiss-cpu sentence-transformers
from getpass import getpass
from dotenv import load_dotenv
import os
from pathlib import Path

env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

huggingface_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')

if not huggingface_api_token:
    huggingface_api_token = getpass("Enter your Hugging Face Hub API token: ")

### Model selection

In [2]:
from langchain_community.llms import HuggingFaceHub
# I will be using T5 model from open source huggingface library
model_name = "google/flan-t5-xxl"
llm = HuggingFaceHub(repo_id=model_name, model_kwargs={"temperature":0.5, "max_length":1024, "max_new_tokens":200})

  from .autonotebook import tqdm as notebook_tqdm


## Template-based Prompting

In [3]:
# I will be using Langchain
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, ConversationalRetrievalChain

template= """
Try to be helpful as you can in Computer Science context.
Question: {question}
Response:
"""

prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=llm)
# llm_chain = load_qa_chain(llm, chain_type="stuff")


### Chat Interface

In [6]:
import gradio as gr
def chat_interface(textbox, chat):
    input_dict = {'question': textbox}
    response = llm_chain.run(input_dict)

    print("user:", textbox)
    print("bot:", response)
    return response

gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()


Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




  warn_deprecated(


user: What does AI stand for?
bot: Artificial Intelligence
user: What is Software Engineering?
bot: Software Engineering is a discipline that focuses on the development of software and the engineering practices that support it.
user: What is Cybersecurity?
bot: Cybersecurity is the science and practice of protecting information systems from cyber attacks.


### Evaluation
Evaluating the model without any action done

In [None]:
# Load standardized test set

# ROGUE? BLUE?

## RAG from synthetic data set

In [None]:
# Use langchain packages to help with implementing retrieval augmentation generation
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain


In [None]:
import gradio as gr
def chat_interface(textbox, chat):
    # docs = db.similarity_search(textbox)
    # input_dict = {'question': textbox, 'input_documents': docs }
    input_dict = {'question': textbox}
    response = llm_chain.run(input_dict)

    print("user:", textbox)
    print("bot:", response)
    return response

gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()

### Evaluation

## Fine Tuning

### Evaluation

## Full adapted model (combined of all approaches)

In [None]:
# Knowledge retrieved
# Augmented Prompt
# Fine-tuned LLM
