In [1]:
import os
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from langchain.embeddings import HuggingFaceEmbeddings
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
import logging
import sys
from dotenv import load_dotenv
from llama_index.llms import Ollama
import numpy as np
from trulens_eval import TruLlama, Feedback, Tru, feedback
from trulens_eval.tru_custom_app import instrument
tru = Tru()

load_dotenv('/Users/jeana/.env')

logging.basicConfig(
    stream=sys.stdout, level=logging.WARNING
)  # logging.DEBUG for more verbose output
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


### Input and Parameters

In [2]:
input_file_location = [r"/Users/jeana/Retrieval-Augmented-Generation/LlamaIndex/paul_graham_essay.txt"]

### Define Exceptions

In [3]:
class modelException(Exception):
    def __init__(self, invalid_value, allowed_values):
        self.invalid_value = invalid_value
        self.allowed_values = allowed_values
        message = f"Invalid value: {invalid_value}. Allowed values are: {', '.join(allowed_values)}"
        super().__init__(message)

### Define Evaluator with Feedback functions

In [4]:
def feedbacks():

    ##### INITIALZE FEEDBACK FUNCTION(S)#######

    # Initialize AzureOpenAI-based feedback function collection class:
    azopenai = feedback.AzureOpenAI(
                                    deployment_name=os.environ['OPENAI_DEPLOYMENT_NAME'],
                                    api_key = os.environ['OPENAI_API_KEY'],
                                    api_version=os.environ['OPENAI_DEPLOYMENT_VERSION'],
                                    azure_endpoint=os.environ['OPENAI_DEPLOYMENT_ENDPOINT'],
                                    # model = os.environ['OPENAI_MODEL_NAME']
                                    )

    # Question/answer relevance between overall question and answer.
    f_qa_relevance = Feedback(azopenai.relevance, name = "Answer Relevance").on_input_output()

    # Question/statement relevance between question and each context chunk.
    f_qs_relevance = Feedback(azopenai.qs_relevance, name = "Context Relevance").on_input().on(
        TruLlama.select_source_nodes().node.text
    ).aggregate(np.mean)

    # groundedness of output on the context
    groundedness = feedback.Groundedness(
                    # summarize_provider=azopenai, 
                    groundedness_provider=azopenai)
    f_groundedness = Feedback(groundedness.groundedness_measure, name = "Groundedness").on(TruLlama.select_source_nodes().node.text).on_output()
    
    feedbacks=[f_groundedness, f_qa_relevance, f_qs_relevance]
    
    return feedbacks


In [5]:
feedbacks = feedbacks()



Arguments ['api_key', 'api_version', 'azure_endpoint'] are ignored as `client` was provided.
✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input statement will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


### RAG from Scratch

In [6]:
class RAG():
    llm_list = ['llama2', 'gpt3.5Turbo']
    embedding_list = ['text-embedding-ada-002', 'sentence-transformers/all-mpnet-base-v2', "BAAI/bge-small-en-v1.5"]

    def __init__(self, llm_name, embedding_model, feedbacks):   
        if llm_name not in self.llm_list:
            raise modelException(llm_name, self.llm_list)
        if embedding_model not in self.embedding_list:
            raise modelException(embedding_model, self.embedding_list)        

        ## Getting the LLM for prediction
        if llm_name == 'gpt3.5Turbo':
            self.llm = AzureOpenAI(
                    # model= os.environ['OPENAI_MODEL_NAME'],
                    model = llm_name,
                    deployment_name= os.environ['OPENAI_DEPLOYMENT_NAME'],
                    api_key=os.environ['OPENAI_API_KEY'],
                    azure_endpoint=os.environ['OPENAI_DEPLOYMENT_ENDPOINT'],
                    api_version=os.environ['OPENAI_DEPLOYMENT_VERSION'],
                )
        elif llm_name == 'llama2':
            self.llm = Ollama(model="llama2")
        
        ## Gettting the embedding model
        if embedding_model == 'text-embedding-ada-002':
            self.embedding_model = AzureOpenAIEmbedding(
                    # model=os.environ['OPENAI_EMBEDDING_MODEL_NAME'],
                    model=embedding_model,
                    deployment_name=os.environ['OPENAI_EMBEDDING_DEPLOYMENT_NAME'],
                    api_key=os.environ['OPENAI_API_KEY'],
                    azure_endpoint=os.environ['OPENAI_DEPLOYMENT_ENDPOINT'],
                    api_version=os.environ['OPENAI_DEPLOYMENT_VERSION'],
                )
        elif embedding_model in ['sentence-transformers/all-mpnet-base-v2', "BAAI/bge-small-en-v1.5"]:
            self.embedding_model = HuggingFaceEmbeddings(
                    model_name=embedding_model)
        
        #Set service context, documents and Index
        self.service_context = ServiceContext.from_defaults(embed_model=self.embedding_model, llm = self.llm)

        self.documents = SimpleDirectoryReader(
                    input_files=[r"/Users/jeana/Retrieval-Augmented-Generation/LlamaIndex/paul_graham_essay.txt"] #or just indicate the fullpath of the folder containing the data
                                ).load_data()
        self.index = VectorStoreIndex.from_documents(self.documents, service_context=self.service_context)

    def query(self, query: str) -> str:
        ### INSTRUMENT CHAIN FOR LOGGING WITH TRULENS

        query_engine = self.index.as_query_engine()

        tru_query_engine_recorder = TruLlama(query_engine,
                app_id='LlamaIndex_App1',
                feedbacks=feedbacks)

        with tru_query_engine_recorder as recorder:
            answer = query_engine.query(query)
            print(answer.get_formatted_sources())
            print("query was:", query)
            print("answer was:", answer)

In [7]:
llm_name =  'llama2' #gpt3.5Turbo , llama2
embedding_model = 'sentence-transformers/all-mpnet-base-v2' # text-embedding-ada-002 , sentence-transformers/all-mpnet-base-v2, local
rag = RAG(llm_name, embedding_model, feedbacks)

In [9]:
rag

<__main__.RAG at 0x167b80890>