<a href="https://colab.research.google.com/github/ad71/ragbot/blob/master/self_eval_rag.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install langchain openai langchain_openai langchain_community transformers pypdf faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m43.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0


In [None]:
import os
import json
import pandas as pd
import numpy as np

from io import StringIO
from collections import Counter
from operator import itemgetter
from enum import Enum

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import JsonOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS, Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_community.vectorstores.faiss import DistanceStrategy

In [None]:
pdf_filepath = '/content/sample_data/machine_learning_basics.pdf'
loader = PyPDFLoader(pdf_filepath)
pdf_text = loader.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = text_splitter.split_documents(pdf_text)

In [None]:
chunks[0:5]

[Document(page_content='THE FUNDAMENTALS  \nOF MACHINE LEARNING', metadata={'source': '/content/sample_data/machine_learning_basics.pdf', 'page': 0}),
 Document(page_content='WHAT IS MACHINE LEARNING?\nBRIEF HISTORY OF MACHINE LEARNING\nHOW IT WORKS\nMACHINE LEARNING TECHNIQUES\nTHE IMPORTANCE OF THE HUMAN ELEMENT\nWHO’S USING IT?\nCHALLENGES AND HESITATIONS\nTHE FUTURE OF MACHINE LEARNING\nCONTRIBUTORS3\n5\n8\n9\n11\n12\n1514\n16TABLE OF CONTENTS\n2', metadata={'source': '/content/sample_data/machine_learning_basics.pdf', 'page': 1}),
 Document(page_content='WHAT IS MACHINE LEARNING?\nWhether we realize it or not, machine learning is something we \nencounter on a daily basis. While the technology is not new, \nwith the rise of artificial intelligence (AI) and the digital age, it is \nbecoming increasingly important to understand what it is, how it \ndiffers from AI, and the major role it will play in the future. This \nwhitepaper will discuss all of the above, and explore different \n

In [9]:
qa_prompt = ChatPromptTemplate.from_template('''
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
''')

In [10]:
# prompt template for LLM QA self eval

class gradeEnum(str, Enum):
    correct = 'correct'
    incorrect = 'incorrect'


class LLMEvalResult(BaseModel):
    grade: gradeEnum = Field(description='Final grade label. Accepted labels : Correct, Incorrect')
    description: str = Field(description='Explanation of why the specific grade was assigned. Must be concise. Not more than 2 sentences')

json_parser = JsonOutputParser(pydantic_object=LLMEvalResult)

# evaluates answer based only on question
qa_eval_prompt_text = '''
You are a teacher evaluating a test.
You are provided with a question along with an answer for the question written by a student. Evaluate the question-answer pair and provide feedback.
{format_instructions}
Question: {question}
Answer: {answer}
'''

qa_eval_prompt = PromptTemplate(
    template=qa_eval_prompt_text,
    input_variables=['question', 'answer'],
    partial_variables={'format_instructions': json_parser.get_format_instructions()},
)

# evaluates answer based on question and context
qa_eval_prompt_with_context_text = '''
You are a teacher evaluating a test.
You are provided with a question along with an answer for the question written by a student. Evaluate the question-answer pair using the provided context and provide feedback. Only mark the answer as correct if it agrees with the provided context.

{format_instructions}
Context: {context}
Question: {question}
Answer: {answer}
'''

qa_eval_prompt_with_context = PromptTemplate(
    template=qa_eval_prompt_text,
    input_variables=['question', 'answer', 'context'],
    partial_variables={'format_instructions': json_parser.get_format_instructions()}
)

In [11]:
from google.colab import userdata
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [12]:
llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)
llm_self_eval = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)

In [13]:
store = FAISS.from_documents(chunks, OpenAIEmbeddings(), distance_strategy=DistanceStrategy.COSINE)
retriever = store.as_retriever(search_kwargs={'k': 3})

In [14]:
# utils
def format_docs(docs):
    return '\n\n ----------------'.join(doc.page_content for doc in docs)

def retrieve_answer(output):
    return output.content

In [15]:
# simple qa chain
rag_chain = (
    RunnableParallel(context=retriever | format_docs, question=RunnablePassthrough())
    | qa_prompt
    | llm
)

In [16]:
rag_chain.invoke('When was SVM invented?')

AIMessage(content='Support Vector Machines (SVMs) were invented in 1992.', response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 369, 'total_tokens': 384}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-c5fe000c-7136-4ea3-b517-25b454315b02-0')

In [17]:
# same RAG without LCEL
question = 'When was SVM invented?'
context = format_docs(retriever.invoke(question))
prompt = qa_prompt.invoke({'question': question, 'context': context})
answer = llm.invoke(prompt)

answer

AIMessage(content='Support Vector Machines (SVMs) were invented in 1992.', response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 369, 'total_tokens': 384}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-07024e7e-edb1-464e-b10b-6890fe4396a2-0')

### Self eval the RAG system by evaluating the question-answer pair. In this scenario, we do not utilize the retrieved context as part of the evaluation.

In [20]:
rag_chain = (
    RunnableParallel(context=retriever | format_docs, question=RunnablePassthrough())
    | RunnableParallel(answer=qa_prompt | llm | retrieve_answer, question=itemgetter('question'))
    | RunnableParallel(evaluation=qa_eval_prompt | llm_self_eval | json_parser, answer=itemgetter('answer'))
)

In [21]:
rag_chain.invoke('When was SVM invented?')

{'evaluation': {'grade': 'Correct',
  'description': 'The answer is accurate and provides the necessary information about the invention of Support Vector Machines (SVMs) in 1992 by researchers at AT&T.'},
 'answer': 'Support Vector Machines (SVMs) were invented in 1992 by researchers at AT&T.'}

### QA RAG with self eval II
In addition to the question-answer pair, we also pass the retrieved context to the evaluator LLM

In [31]:
rag_chain = (
    RunnableParallel(context=retriever | format_docs, question=RunnablePassthrough())
    | RunnableParallel(answer=qa_prompt | llm | retrieve_answer, question=itemgetter('question'), context=itemgetter('context'))
    | RunnableParallel(input=qa_eval_prompt, context=itemgetter('context'))
    | RunnableParallel(input=itemgetter('input') | llm_self_eval, context=itemgetter('context'))
    | RunnableParallel(input=itemgetter('input') | json_parser, context=itemgetter('context'))
)

In [32]:
rag_chain.invoke('When was SVM invented?')

{'input': {'grade': 'Correct',
  'description': 'The answer correctly states that Support Vector Machines (SVMs) were invented in 1992 by researchers at AT&T.'},
 'context': 'learning approach called Hidden Markov Models (HMMs). This \nsaved billions of dollars in operating costs by spotting things like \ncollect calls.\nSUPPORT VECTOR MACHINES  1992\nResearchers at AT&T invented Support Vector Machines (SVMs) \nin 1992, a technique that revolutionized large scale classification \nbecause of its predictable performance.\n5\n\n ----------------CONVOLUTIONAL NEURAL NETWORK  1996\nPatrick Haffner (Lead Inventive Scientist at Interactions) and \nresearchers from AT&T proposed the first convolutional neural \nnetwork (CNN) in 1996, with a large scale application to check \nrecognition. The influence of this technology was not appreciated \nuntil 10 years later when it became rebranded as deep learning, and \nmachine learning researchers began to focus on another technique \ndeveloped by the

In [33]:
# more concise
rag_chain = (
    RunnableParallel(context=retriever | format_docs, question=RunnablePassthrough())
    | RunnableParallel(answer=qa_prompt | llm | retrieve_answer, question=itemgetter('question'), context=itemgetter('context'))
    | RunnableParallel(input=qa_eval_prompt | llm_self_eval | json_parser, context=itemgetter('context'))
)

In [34]:
# global variable to save intermediate steps
global context

def save_context(x):
    global context
    context = x
    return x

rag_chain = (
    RunnableParallel(context=retriever | format_docs | save_context, question=RunnablePassthrough())
    | RunnableParallel(answer=qa_prompt | llm | retrieve_answer, question=itemgetter('question'))
    | qa_eval_prompt
    | llm_self_eval
    | json_parser
)

In [35]:
rag_chain.invoke('When was SVM invented?')

{'grade': 'Correct',
 'description': 'The answer correctly states that Support Vector Machines (SVMs) were invented in 1992 by researchers at AT&T.'}

In [36]:
context

'learning approach called Hidden Markov Models (HMMs). This \nsaved billions of dollars in operating costs by spotting things like \ncollect calls.\nSUPPORT VECTOR MACHINES  1992\nResearchers at AT&T invented Support Vector Machines (SVMs) \nin 1992, a technique that revolutionized large scale classification \nbecause of its predictable performance.\n5\n\n ----------------CONVOLUTIONAL NEURAL NETWORK  1996\nPatrick Haffner (Lead Inventive Scientist at Interactions) and \nresearchers from AT&T proposed the first convolutional neural \nnetwork (CNN) in 1996, with a large scale application to check \nrecognition. The influence of this technology was not appreciated \nuntil 10 years later when it became rebranded as deep learning, and \nmachine learning researchers began to focus on another technique \ndeveloped by the same group at AT&T: Support Vector Machines.\n\n ----------------Voice Response (IVR) systems in 2001, combining 3 of its machine \nlearning technologies: SVMs, HMMs, and Ad