In [2]:
# !pip install transformers datasets torch langchain-community faiss-cpu sentence-transformers
from getpass import getpass
from dotenv import load_dotenv
import os
from pathlib import Path

env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

huggingface_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')

if not huggingface_api_token:
    huggingface_api_token = getpass("Enter your Hugging Face Hub API token: ")

In [14]:
import gradio as gr
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA, LLMChain
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFaceHub

# Define the RAG prompt template
rag_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
Answer professionally, to the best of your ability, and where appropriate, in a Computer Science educational context.
Use the context and be specific as you can.
Context: {context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(template=rag_template, input_variables=["context", "question"])

# Load data for retrieval from CSV
def load_csv(file):
    loader = CSVLoader(file_path=file)
    return loader.load()

# Split Documents
def split_documents(documents):
    text_split = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
    return text_split.split_documents(documents)

# Create a FAISS Index
def create_index(documents):
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/gtr-t5-base",
        model_kwargs={'device': 'cpu'},
        encode_kwargs={'normalize_embeddings': True}
    )
    db = FAISS.from_documents(documents, embedding=embeddings)
    return db

# Set up the LLM model for RAG
llm = HuggingFaceHub(repo_id="tiiuae/falcon-7b-instruct", model_kwargs={"temperature": 0.5, "max_length": 1024, "max_new_tokens": 200})

# Initialize the RetrievalQA chain with RAG prompt
qa = None

def initialize_qa_chain(llm, retriever):
    global qa
    qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True, chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}, verbose=True)

def chat_interface(textbox, file):
    if file is not None:
        # Load CSV file
        loader = CSVLoader(file_path=file)
        documents = loader.load()
        # Split documents
        text_split = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
        d = text_split.split_documents(documents)

        modelPath = "sentence-transformers/gtr-t5-base" # Use a t5 sentence transformer model that maps sentences & paragraphs to a 768 dimensional dense vector space
        model_kwargs = {'device':'cpu'}
        encode_kwargs = {'normalize_embeddings': True} # Normalizing embeddings can help improve similarity metrics by ensuring that embeddings magnitude does not affect the similarity scores

        embeddings = HuggingFaceEmbeddings(
            model_name=modelPath,
            model_kwargs=model_kwargs,
            encode_kwargs=encode_kwargs
        )

        db = FAISS.from_documents(d, embedding=embeddings)
        r = db.as_retriever()
        docs = r.get_relevant_documents("What skills should an AI solution analyst have?")
        print(docs[0].page_content)
        # Initialize QA chain
        initialize_qa_chain(llm, db.as_retriever())
        preview = "File uploaded successfully."

    else:
        preview = "No file uploaded. No RAG."

    if qa is not None:
        input_dict = {'query': textbox}
        result = qa.invoke(input_dict)
        text = result['result']
        print(text)
        answer = text.split('\nHelpful Answer:')[1].strip()
        return answer, preview
    
    # Communicate directly with the language model
    template= """
    Please answer the question.
    Answer professionally, and where appropriate, in a Computer Science educational context.
    Question: {question}
    Response:
    """
    prompt = PromptTemplate(template=template, input_variables=["question"])
    llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=True)
    input_dict = {'question': textbox}
    response_dict = llm_chain.invoke(input_dict)
    response = response_dict['text'].split("Response:")[1].strip()
    return response, preview

def create_demo():
    with gr.Blocks(title="RAG Chatbot Q&A", theme="Soft") as demo:
        with gr.Column():
            with gr.Row():
                response = gr.Text(type="text")

        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    show_label=False,
                    placeholder="Ask here",
                container=False)

            with gr.Column():
                submit_button = gr.Button('Send')

            with gr.Column():
                # uploaded_pdf = gr.UploadButton("Upload file", file_types=[".csv"])
                uploaded_file = gr.File(type="filepath")

            with gr.Column():
                preview = gr.Textbox(label="Preview", type="text")

    # Launch the custom interface with the components
    gr.Interface(
        fn=chat_interface,
        inputs=[text_input, uploaded_file],
        outputs=[response, preview],
        title="Chatbot",
        description="Ask Chatbot any question",
        theme="soft",
        examples=[
            ["What does AI stand for?"],
            ["What is Software Engineering?"],
            ["What is Cybersecurity?"]
        ],
        cache_examples=False,
    ).launch()

create_demo()


Running on local URL:  http://127.0.0.1:7882

To create a public link, set `share=True` in `launch()`.


Prompt: AI solutions analyst
Output: Job Description:
As an AI Solutions Analyst, you will play a crucial role in driving organizational transformations for medium- and large-scale businesses by documenting, analyzing, and improving business processes. You will work within projects to map as-is processes to to-be processes, aligning them with the future operating model. Acting as a liaison between clients and project teams, you will coordinate and collaborate with stakeholders during workshops and contribute to the design and support of ongoing solutions post-transition. Additionally, you will collaborate with teammates on the analysis and design of complex business applications using the latest technologies, ensuring successful delivery of business solutions.
Responsibilities:
Document and analyze as-is processes (functional specs and user stories) and make recommendations for improvement by mapping to-be business processes aligned with the future operating model.


[1m> Entering new