In [None]:
'''
Simple RAG Implementation
Author: Christian Sarmiento
Purpose: This notebook is intended to get a simple implementation of RAG set up with LangChain.
Date Created: 10/1/24
Last Updated: 11/28/24
Data: https://archive.ics.uci.edu/dataset/450/sports+articles+for+objectivity+analysis
Sources:
- https://python.langchain.com/docs/tutorials/rag/
- https://python.langchain.com/docs/tutorials/llm_chain/
- https://medium.com/@dinabavli/rag-basics-basic-implementation-of-retrieval-augmented-generation-rag-e80e0791159d
- ChatGPT: o1-preview
-----------------------------------------------------------------------------------------------------------------------
RAG Research             |               Machine Learning Independent Study             |              DR. EITEL LAURIA
'''

In [44]:
# Download RAGAS for RAG metrics
%pip install ragas
%pip install nltk

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [1]:
# Imports
import sys
sys.path.append("/Users/christiansarmiento/Library/CloudStorage/OneDrive-MaristCollege/Machine Learning/Private Code")
from api_keys import openAIKey
from api_keys import langchainKey
from langchain_openai import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain import hub  # for RAG prompt
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.docstore.document import Document
from langchain_core.messages import AIMessage, HumanMessage
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pandas as pd
import os
import gradio as gr  # easy frontend implementation
import numpy as np
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, SemanticSimilarity
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas import SingleTurnSample, EvaluationDataset
from ragas import evaluate


  from .autonotebook import tqdm as notebook_tqdm




In [2]:
# LangChain Enviornment Variables
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = langchainKey()
os.environ["OPENAI_API_KEY"] = openAIKey()

In [3]:
# Load OpenAI model with metric wrapper
#llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
#evalEmbeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())
llm = ChatOpenAI(model="gpt-4o-mini")

In [13]:
# Load Data
folderPath = "/Users/christiansarmiento/Library/CloudStorage/OneDrive-MaristCollege/Machine Learning/Data/sports_articles_corpus/Raw data"
sportsArticles = []

for fileName in os.listdir(folderPath):
    filePath = os.path.join(folderPath, fileName)
    loader = TextLoader(filePath, encoding='latin1')  # UTF-8 not working for the files
    doc = loader.load()
    sportsArticles.extend(doc)

In [14]:
# Split Documents into Chunks
textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
texts = textSplitter.split_documents(sportsArticles)

In [15]:
# Store Documents in Vector DB (Chroma)
vectorDB = Chroma.from_documents(documents=texts, embedding=OpenAIEmbeddings())

In [16]:
# Setup Retrieval System
retriever = vectorDB.as_retriever(search_type="similarity", search_kwargs={"k": 3})  # Retrieves 3 documents

# To get retrieved documents:
# retrievedDocuments = retriever.invoke("query")

In [None]:
# Setting up the RAG Chain

# Function to format documents into the prompt
def formatDocs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Setup RAG Chain
prompt = hub.pull("rlm/rag-prompt")
ragChain = (
    {"context": retriever | formatDocs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [22]:
# Results
for chunk in ragChain.stream("Explain the offside rule in soccer."):
    print(chunk, end="", flush=True)

The offside rule in soccer states that a player is in an offside position if they are nearer to the opponent's goal line than both the ball and the second-to-last opponent when the ball is played to them, unless they are in their own half or level with the second-to-last opponent. Being in an offside position is not an offense in itself; the player must become involved in active play to be penalized. The rule aims to prevent players from gaining an unfair advantage by lingering near the opponent's goal.

In [2]:
# Test if answers are coming from the llm or from the documents
# Try giving documents that aren't real then asking questions on things off of that
# Avoids the model relying on trained info 
# Play with the system prompt

# Next step after QA - feed answers into the system to make it more conversational
# Implement Gradio
# Knowledge Graph 
# Identifying Metrics - do research!!


In [None]:
'''
Implement with Marist Data
'''

In [4]:
# Load Data
csvPath = "/Users/christiansarmiento/Library/CloudStorage/OneDrive-MaristCollege/Machine Learning/Data/Marist_QA.csv"
maristQA = pd.read_csv(csvPath, header=None)

# To use RecursiveCharacterTextSplitter, we need a list of dictionaries
maristContext = [Document(page_content=text) for text in maristQA[1].tolist()]

In [5]:
# Split Documents into Chunks
textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
texts = textSplitter.split_documents(maristContext)

In [6]:
# Store Documents in Vector DB (Chroma)
vectorDB = Chroma.from_documents(documents=texts, embedding=OpenAIEmbeddings())

In [7]:
# Setup Retrieval System
retriever = vectorDB.as_retriever(search_type="similarity", search_kwargs={"k": 3})  # Retrieves 3 documents

In [8]:
# Prompts
systemPrompt = (
    
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
    
)

prompt = ChatPromptTemplate.from_messages(
    
    [
        ("system", systemPrompt),
        ("human", "{input}"),
    ]
    
)

contextualizeSystemPrompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualizePrompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualizeSystemPrompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
historyAwareRetriever = create_history_aware_retriever(
    llm, retriever, contextualizePrompt
)

qaPrompt = ChatPromptTemplate.from_messages(
    [
        ("system", systemPrompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [9]:
# Make chains
questionAnswerChain = create_stuff_documents_chain(llm, qaPrompt)
ragChain = create_retrieval_chain(historyAwareRetriever, questionAnswerChain)

In [11]:
# Talking to ChatGPT, making RAG conversational
conversationHistory = []
userQuery = input("Prompt (0 to quit): ")
while userQuery != '0':

    # Print input - this is just for a VSCode enviornment to see I/O together, feel free to comment out in Jupyter
    print(f"User: {userQuery}")

    # Call ChatGPT using RAG chain
    llmResponse = ragChain.invoke({"input": userQuery, "chat_history": conversationHistory})
    print(f"LLM: {llmResponse['answer']}")
    print()
    conversationHistory.extend([
        
        HumanMessage(content=userQuery),
        AIMessage(content=llmResponse["answer"]),
        
    ])

    # New prompt
    userQuery = input("Prompt (0 to quit): ")

Marist College is located on the banks of the Hudson River and also has a campus in Florence, Italy.

Marist College is in Poughkeepsie, New York, situated along the Hudson River. The Florence campus is located in Florence, Italy.



In [15]:
# Frontend with Gradio

'''
Function that calls the RAG chain for Gradio
'''
evaluationSamples = []
def simpleRAG(userQuery, history, correctAnswer):

    # Ensure there is a list to use for the conversation history
    if history is None:
        history = []

    # Call ChatGPT using RAG chain
    llmResponse = ragChain.invoke({"input": userQuery, "chat_history": history})

    # Get response and context for evaluation
    responseText = llmResponse["answer"]
    retrievedContexts = [context.page_content for context in retriever.get_relevant_documents(userQuery)]

    # Save information for RAG metrics
    evaluationSamples.append({
        "user_input": userQuery,
        "retrieved_contexts": retrievedContexts,
        "response": responseText,
        "reference": correctAnswer    # ground truth 
    })

    # Save chat history for conversational aspect
    history.extend([
        
        HumanMessage(content=userQuery),
        AIMessage(content=llmResponse["answer"]),
        
    ])

    # Save input and output to history
    history.append(HumanMessage(content=userQuery))
    history.append(AIMessage(content=llmResponse["answer"]))

    # Prepare display of data
    #chatDisplay = [(msg.content, "User" if isinstance(msg, HumanMessage) else "LLM") for msg in history]

    return history  #, chatDisplay


In [None]:
# Frontend
interface = gr.Interface(
    fn=simpleRAG,  
    inputs=["text", "state", gr.Textbox(label="Correct Answer")],  
    outputs=["chatbot", "state"],  
    title="Simple RAG",  
    description="Initial setup for a simple conversational RAG process."
)

# Launch the frontend
interface.launch()

In [11]:
# Function to evaluate our RAG pipeline
async def pipelineEvaluation(dataset, metrics):

    # Run through our runs
    results = []
    for run in dataset:

        # Save our inputs/outputs
        inputQuery = run["user_input"]
        groundTruthAnswer = run["reference"]
        contexts = run["retrieved_contexts"]
        response = run["response"]

        # Create a SingleTurnSample object
        sample = SingleTurnSample(
            user_input=inputQuery,
            response=response,
            reference=groundTruthAnswer,
            retrieved_contexts=contexts 
        )

        # Evaluate metrics
        runResults = {"input_query": inputQuery}
        for metric in metrics:

            # Get the score for the given metric
            try:

                score = await metric.single_turn_ascore(sample)
                runResults[type(metric).__name__] = score

            except Exception as e:
                # Catch errors for debugging
                runResults[type(metric).__name__] = f"Error: {str(e)}"
        
        # Save metric results
        results.append(runResults)

    # Calculate mean and standard deviation for each metric
    metricsStats = {}
    for metric in metrics:
        metricName = type(metric).__name__
        scores = [result[metricName] for result in results if isinstance(result[metricName], (int, float))]
        
        # Only calculate stats if there are valid scores
        if scores:
            metricsStats[metricName] = {
                "mean": np.mean(scores),
                "std_dev": np.std(scores),
            }
            
        else:
            metricsStats[metricName] = {
                "mean": "No valid scores",
                "std_dev": "No valid scores",
            }
    
    return results, metricsStats

In [12]:
# Load metrics
evalMetrics = [LLMContextRecall(llm=LangchainLLMWrapper(llm)), 
               FactualCorrectness(llm=LangchainLLMWrapper(llm)), 
               Faithfulness(llm=LangchainLLMWrapper(llm)), 
               SemanticSimilarity(embeddings=LangchainEmbeddingsWrapper(OpenAIEmbeddings()))]

In [13]:
# Evaluate our pipeline responses
evalResults = await pipelineEvaluation(evaluationSamples, evalMetrics)
for result in evalResults:
    print(result)

{'input_query': 'Who is Carolyn Matheus?', 'LLMContextRecall': 0.0, 'FactualCorrectness': 0.0, 'Faithfulness': 1.0, 'SemanticSimilarity': 0.9328844088386496}


In [19]:
# Sample 222 records from our dataset
maristTestSample = maristQA.sample(40, replace=False)
maristTestSample.head()

Unnamed: 0,0,1
147,Can the college help me plan for graduate school?,"""Center for Career ServicesYour Path to Succes..."
136,When was the tunnel made?,"""About Marist CollegeMarist History: 2008-Pres..."
362,When are computer science classes held?,"""Master of Science in Computer Science \u2013 ..."
192,How many people are on the presidential search...,"""Presidential SearchApril 15, 2021 Memo to the..."
588,Where is student financial services?,"""Transfer Student Admission Department Student..."


In [20]:
# Run our chain with each question and evaluate
chatHistory = None
for row in maristTestSample.iterrows():
    chatHistory = simpleRAG(row[1][0], chatHistory, row[1][1])

## Evaluation
evalResults, metricStats = await pipelineEvaluation(evaluationSamples, evalMetrics)
for result in evalResults:
    print(result)

for metric in metricStats.keys():
    print(f"{metric} - Mean: {metricStats[metric]['mean']}, St. Dev: {metricStats[metric]['std_dev']}")

{'input_query': 'Can visitors park on campus?', 'LLMContextRecall': 1.0, 'FactualCorrectness': 0.12, 'Faithfulness': 1.0, 'SemanticSimilarity': 0.8174370158601927}
{'input_query': 'What is the cost of ASIP Program?', 'LLMContextRecall': 0.8, 'FactualCorrectness': 0.19, 'Faithfulness': 1.0, 'SemanticSimilarity': 0.8295583480298927}
{'input_query': "Wat is Dr. Wermuth's phone number?", 'LLMContextRecall': 0.2, 'FactualCorrectness': 0.0, 'Faithfulness': 0.0, 'SemanticSimilarity': 0.7584736795890439}
{'input_query': 'Who is Desiree Dighton?', 'LLMContextRecall': 1.0, 'FactualCorrectness': 0.41, 'Faithfulness': 1.0, 'SemanticSimilarity': 0.9484875438147378}
{'input_query': 'How many courses should be applied by a student in  global studies.', 'LLMContextRecall': 0.5, 'FactualCorrectness': 0.11, 'Faithfulness': 0.6666666666666666, 'SemanticSimilarity': 0.8698464308443776}
{'input_query': 'What are some places students have been able to intern?', 'LLMContextRecall': 0.6, 'FactualCorrectness':