# Initial Framework RAG Model Support

## Pre-requisites

In [1]:
%pip install -q qdrant-client

Note: you may need to restart the kernel to use updated packages.


In [2]:
# load openai api key
import os

from dotenv import load_dotenv
load_dotenv()

if not 'OPENAI_API_KEY' in os.environ:
    raise ValueError('OPENAI_API_KEY is not set')

## Dataset Loader

In [3]:
# load documents
import os
from csv import DictReader
from uuid import uuid4

import pandas as pd


column_map = {"RFP_Question": "question", "RFP_Answer": "ground_truth"}


def load_documents(prefix):
    documents = []
    root_dir = "datasets/rag/"
    for file in os.listdir(root_dir):
        if file.startswith(prefix) and file.endswith(".csv"):
            # use csv dict reader to load the csv file
            with open(os.path.join(root_dir, file)) as f:
                reader = DictReader(f)
                for row in reader:
                    # add a unique id to the row
                    row["id"] = str(uuid4())
                    documents.append(row)

    df = pd.DataFrame(documents)
    df = df[["id", "RFP_Question", "RFP_Answer"]]
    # df.rename(columns=column_map, inplace=True)

    return df

def load_dataset_split(limit=None):
    df = load_documents("rfp_existing_questions")

    if limit:
        df = df.head(limit)

    # split the dataset into a "train" - which gets inserted into the vector store
    # and a "test" - which is used to evaluate the search results
    train_df = df.sample(frac=0.8)
    test_df = df.drop(train_df.index)

    return train_df, test_df

## Embedding Model Selection

First let's setup our embedding model and run some tests to make sure its working well.

In [4]:
from openai import OpenAI

from validmind.models import FunctionModel

client = OpenAI()


def embed(input):
    """Returns a text embedding for the given text"""
    input["embedding"] = (
        client.embeddings.create(
            input=input["RFP_Question"],
            model="text-embedding-3-small",
        )
        .data[0]
        .embedding
    )

    return input

vm_embedder = FunctionModel(input_id="embedding_model", predict_fn=embed)

Let's create our test dataset so we can run it through our different models.

In [5]:
import validmind as vm

train_df, test_df = load_dataset_split(20)

vm_test_ds = vm.init_dataset(
    test_df,
    text_column="RFP_Question", # some NLP which work with text data require a `text_column` to be specified
    target_column="RFP_Answer",
    __log=False,
)

vm_test_ds.df.head()

2024-05-07 13:34:35,965 - INFO(validmind.client): Pandas dataset detected. Initializing VM Dataset instance...


Unnamed: 0,id,RFP_Question,RFP_Answer
3,b91395d4-44f8-45f3-bfcf-95dc50812ffc,What actions do you undertake to secure user d...,User privacy and data security are paramount. ...
9,7d878600-9fd4-4d91-997e-ae4e800407f0,How do your LLMs continuously learn and update...,We implement advanced continuous learning mech...
11,61acbd2c-e04f-4b59-a38f-f902f9ae552b,How do you ensure your LLMs can efficiently ha...,We conduct extensive performance testing under...
14,f1f4289e-b136-4cb2-a124-ef4d02dc7c59,What is your approach to maintaining and suppo...,Our post-deployment support is designed to ens...


In [6]:
vm_test_ds.assign_predictions(vm_embedder)

2024-05-07 13:34:35,972 - INFO(validmind.vm_models.dataset.utils): Running predict_proba()... This may take a while
2024-05-07 13:34:35,972 - INFO(validmind.vm_models.dataset.utils): Not running predict_proba() for unsupported models.
2024-05-07 13:34:35,972 - INFO(validmind.vm_models.dataset.utils): Running predict()... This may take a while
2024-05-07 13:34:36,959 - INFO(validmind.vm_models.dataset.utils): Done running predict()


In [7]:
vm_test_ds.df.head()

Unnamed: 0,id,RFP_Question,RFP_Answer,embedding_model_prediction
3,b91395d4-44f8-45f3-bfcf-95dc50812ffc,What actions do you undertake to secure user d...,User privacy and data security are paramount. ...,"{'embedding': [0.007698851637542248, 0.0075916..."
9,7d878600-9fd4-4d91-997e-ae4e800407f0,How do your LLMs continuously learn and update...,We implement advanced continuous learning mech...,"{'embedding': [-0.010829819366335869, 0.029368..."
11,61acbd2c-e04f-4b59-a38f-f902f9ae552b,How do you ensure your LLMs can efficiently ha...,We conduct extensive performance testing under...,"{'embedding': [0.02485279180109501, 0.03754840..."
14,f1f4289e-b136-4cb2-a124-ef4d02dc7c59,What is your approach to maintaining and suppo...,Our post-deployment support is designed to ens...,"{'embedding': [-0.004068069159984589, 0.049269..."


Let's go ahead and run one of the ValidMind embeddings stability analysis tests to make sure our embeddings model is working properly.

In [8]:
from validmind.tests import run_test

# result = run_test(
#     "validmind.model_validation.embeddings.StabilityAnalysisRandomNoise",
#     inputs={"model": vm_embedder, "dataset": vm_test_ds},
#     params={"probability": 0.3},
# )

## Setup Vector Store

#### Generate embeddings for the questions

> Note: We use the name `train_df` to refer to the dataset that is loaded into the vector store and used as context. This is not a great name but its consistent with data science terminology.

In [9]:
train_df["embedding"] = [embed(row)["embedding"] for _, row in train_df.iterrows()]
train_df.head()

Unnamed: 0,id,RFP_Question,RFP_Answer,embedding
10,68d94049-74b8-465f-ac5b-4d4179d54032,What measures do you employ to ensure your LLM...,We prioritize transparency and explainability ...,"[0.010077687911689281, 0.02444615587592125, 0...."
4,964fb6ab-68c8-4248-989a-74a74eec7d03,What considerations do you take into account f...,Our design philosophy centers on simplicity an...,"[-0.0029329643584787846, -0.003287967294454574..."
12,c41ed83b-a827-456f-b6da-f4256aaca536,Could you provide case studies on successful L...,We can share case studies of successful LLM-ba...,"[-0.002833468606695533, 0.011640194803476334, ..."
6,3e598d8d-55c4-4946-a9d7-a580344e941b,How do you evaluate the success of your AI app...,Success measurement is tailored to each projec...,"[0.014263585209846497, 0.022252684459090233, 0..."
17,b0d46173-6f05-4ac4-9700-a6d8bcb61e43,How do you perform risk assessment and identif...,We conduct thorough assessments of AI systems ...,"[-0.011703639291226864, 0.023393888026475906, ..."


#### Insert embeddings and questions into Vector DB

In [10]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, PointStruct, VectorParams

qdrant = QdrantClient(":memory:")
qdrant.recreate_collection(
    "rfp_rag_collection",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)
qdrant.upsert(
    "rfp_rag_collection",
    points=[
        PointStruct(
            id=row["id"],
            vector=row["embedding"],
            payload={"RFP_Question": row["RFP_Question"], "RFP_Answer": row["RFP_Answer"]},
        )
        for _, row in train_df.iterrows()
    ],
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

## Setup Retrieval Model

In [11]:
def retrieve(input):
    input["contexts"] = []

    for result in qdrant.search(
        "rfp_rag_collection",
        query_vector=input["embedding"],
        limit=input.get(
            "limit", 10
        ),  # we could add a row to the dataset to specify a limit
    ):
        context = f"Q: {result.payload['RFP_Question']}\n"
        context += f"A: {result.payload['RFP_Answer']}\n"

        input["contexts"].append(context)

    return input


vm_retriever = FunctionModel(input_id="retrieval_model", predict_fn=retrieve)

In [12]:
vm_test_ds.assign_predictions(vm_retriever)

2024-05-07 13:34:41,159 - INFO(validmind.vm_models.dataset.utils): Running predict_proba()... This may take a while
2024-05-07 13:34:41,159 - INFO(validmind.vm_models.dataset.utils): Not running predict_proba() for unsupported models.
2024-05-07 13:34:41,159 - INFO(validmind.vm_models.dataset.utils): Running predict()... This may take a while
2024-05-07 13:34:41,161 - INFO(validmind.vm_models.dataset.utils): Done running predict()


In [13]:
vm_test_ds.df.head()

Unnamed: 0,id,RFP_Question,RFP_Answer,embedding_model_prediction,retrieval_model_prediction
3,b91395d4-44f8-45f3-bfcf-95dc50812ffc,What actions do you undertake to secure user d...,User privacy and data security are paramount. ...,"{'embedding': [0.007698851637542248, 0.0075916...",{'contexts': ['Q: What steps do you take to en...
9,7d878600-9fd4-4d91-997e-ae4e800407f0,How do your LLMs continuously learn and update...,We implement advanced continuous learning mech...,"{'embedding': [-0.010829819366335869, 0.029368...",{'contexts': ['Q: Please outline the training ...
11,61acbd2c-e04f-4b59-a38f-f902f9ae552b,How do you ensure your LLMs can efficiently ha...,We conduct extensive performance testing under...,"{'embedding': [0.02485279180109501, 0.03754840...",{'contexts': ['Q: Describe your strategy for i...
14,f1f4289e-b136-4cb2-a124-ef4d02dc7c59,What is your approach to maintaining and suppo...,Our post-deployment support is designed to ens...,"{'embedding': [-0.004068069159984589, 0.049269...",{'contexts': ['Q: Could you provide case studi...


## Setup Generation Model

In [14]:
system_prompt = """
You are an expert RFP AI assistant.
You are tasked with answering new RFP questions based on existing RFP questions and answers.
You will be provided with the existing RFP questions and answer pairs that are the most relevant to the new RFP question.
After that you will be provided with a new RFP question.
You will generate an answer and respond only with the answer.
Ignore your pre-existing knowledge and answer the question based on the provided context.
""".strip()


def generate(input):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": "\n\n".join(input["contexts"])},
            {"role": "user", "content": input["RFP_Question"]},
        ],
    )

    input["answer"] = response.choices[0].message.content

    return input

vm_generator = FunctionModel(input_id="generation_model", predict_fn=generate)

In [15]:
vm_test_ds.assign_predictions(vm_generator)

2024-05-07 13:34:41,247 - INFO(validmind.vm_models.dataset.utils): Running predict_proba()... This may take a while
2024-05-07 13:34:41,248 - INFO(validmind.vm_models.dataset.utils): Not running predict_proba() for unsupported models.
2024-05-07 13:34:41,249 - INFO(validmind.vm_models.dataset.utils): Running predict()... This may take a while
2024-05-07 13:34:52,296 - INFO(validmind.vm_models.dataset.utils): Done running predict()


In [16]:
vm_test_ds.df.head()

Unnamed: 0,id,RFP_Question,RFP_Answer,embedding_model_prediction,retrieval_model_prediction,generation_model_prediction
3,b91395d4-44f8-45f3-bfcf-95dc50812ffc,What actions do you undertake to secure user d...,User privacy and data security are paramount. ...,"{'embedding': [0.007698851637542248, 0.0075916...",{'contexts': ['Q: What steps do you take to en...,{'answer': 'We prioritize data security and pr...
9,7d878600-9fd4-4d91-997e-ae4e800407f0,How do your LLMs continuously learn and update...,We implement advanced continuous learning mech...,"{'embedding': [-0.010829819366335869, 0.029368...",{'contexts': ['Q: Please outline the training ...,{'answer': 'Our LLMs are designed to adapt and...
11,61acbd2c-e04f-4b59-a38f-f902f9ae552b,How do you ensure your LLMs can efficiently ha...,We conduct extensive performance testing under...,"{'embedding': [0.02485279180109501, 0.03754840...",{'contexts': ['Q: Describe your strategy for i...,{'answer': 'To ensure our Large Language Model...
14,f1f4289e-b136-4cb2-a124-ef4d02dc7c59,What is your approach to maintaining and suppo...,Our post-deployment support is designed to ens...,"{'embedding': [-0.004068069159984589, 0.049269...",{'contexts': ['Q: Could you provide case studi...,{'answer': 'We provide ongoing support and mai...


## Setup RAG Model (Pipeline of "Component" Models)

Now that we have our individual models setup, let's create a `RAGModel` instance that will chain them together and give us a single model that can be evalated end-to-end.

In [17]:
from validmind.models import PipelineModel

vm_rag_model = PipelineModel(vm_embedder | vm_retriever | vm_generator)

Let's run the test dataset through the entire pipeline. It will overwrite the current predictions that we generated from the individual models, but the key here is that calling `predict` on the `RAGModel` will run the entire pipeline and store the intermediate predictions in the dataframe.

In [18]:
result_df = vm_rag_model.predict(vm_test_ds.df)

AttributeError: 'dict' object has no attribute 'head'

In [19]:
result_df

{'embedding_model': [{'embedding': [0.007698851637542248,
    0.007591660600155592,
    0.07263781875371933,
    0.028500253334641457,
    0.023102860897779465,
    -0.020782485604286194,
    0.00657649664208293,
    0.023304631933569908,
    -0.0067026037722826,
    0.007244865410029888,
    0.026356428861618042,
    -0.03884105384349823,
    0.0004776314599439502,
    -0.03026575595140457,
    -0.01844950020313263,
    -0.012497235089540482,
    -0.0060310824774205685,
    0.015006771311163902,
    -0.01144423894584179,
    -0.0029162317514419556,
    0.0024244131054729223,
    0.05072036385536194,
    0.00574418855831027,
    0.016835326328873634,
    -0.015826469287276268,
    -0.0134052075445652,
    0.022560598328709602,
    0.014149241149425507,
    -0.025612397119402885,
    -0.03901760280132294,
    0.03510827571153641,
    -0.021955283358693123,
    -0.06623156368732452,
    0.029055126011371613,
    0.015322038903832436,
    -0.02414955198764801,
    -0.01686054840683937,
  

## Experiment with some RAGAS Metrics

Below I am just experimenting to see how the RAGAS metrics can work with the `RAGModel` instance. This is not a full implementation of the RAGAS metrics but just a poc. We'll want to make this work in a more general way so that the columns can be properly mapped from the user-provided `predict_col` or the default `predict_col` to the column names that RAGAS expects i.e. `question`, `contexts`, `answer`, `ground_truth`.

In [None]:
vm_ragas_ds = vm.init_dataset(result_df, __log=False)

In [None]:
import plotly.express as px

def plot_distribution(scores):
    # plot distribution of scores (0-1) from ragas metric
    # scores is a list of floats
    fig = px.histogram(x=scores, nbins=10)
    fig.show()

In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
result = run_test(
    "validmind.model_validation.ragas.AnswerSimilarity",
    inputs={"dataset": vm_ragas_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextEntityRecall",
    inputs={"dataset": vm_ragas_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextPrecision",
    inputs={"dataset": vm_ragas_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)

In [None]:
result = run_test(
    "validmind.model_validation.ragas.ContextRelevancy",
    inputs={"dataset": vm_ragas_ds},
    show=False,
)
plot_distribution(result.metric.summary.results[0].data)