# Model experimentation

In [None]:
import mlflow
import pandas as pd
from langchain.llms import GPT4All
import pinecone
from langchain.embeddings import GPT4AllEmbeddings
import os
from langchain.vectorstores import Pinecone
from langchain.chains import RetrievalQA
from dotenv import load_dotenv
from langchain import hub

In [None]:
load_dotenv("../.env")

## Evaluation data

Create the questions, and possible (not accurate) answers.

In [None]:
EVAL_QUESTIONS = [
    "What is SageMaker?",
    "What are all AWS regions where SageMaker is available?",
    "How to check if an endpoint is KMS encrypted?",
    "What are SageMaker Geospatial capabilities?"
]

In [None]:
ANSWERS = [
    "Amazon SageMaker is a fully managed service that provides every developer and data scientist with the ability to build, train, and deploy machine learning (ML) models",
    "All supported AWS regions except China (Beijing), Asia Pacific (Jakarta), Middle East (UAE), Asia Pacific (Hyderabad), Asia Pacific (Melbourne), AWS GovCloud (US-East), AWS GovCloud (US-West), Europe (Spain), China (Ningxia), Europe (Zurich) Region",
    "Checks whether AWS Key Management Service (KMS) key is configured for an Amazon SageMaker endpoint configuration. The rule is NON_COMPLIANT if 'KmsKeyId' is not specified for the Amazon SageMaker endpoint configuration.",
    "SageMaker geospatial capabilities rolesAs a managed service, Amazon SageMaker geospatial capabilities perform operations on your behalf on the AWS hardware that is managed by SageMaker. It can perform only operations that the user permits."
]

In [None]:
questions = pd.DataFrame(
    {
        "query": EVAL_QUESTIONS,
        "answer": ANSWERS
    }
)

# Retriever

Vectors will be retrieved from Pinecone

In [None]:
pinecone.init(
    api_key = os.getenv("PINECONE_API_KEY"),
    environment = os.getenv("PINECONE_ENV"),
)

In [None]:
index = pinecone.Index("clementine-loka")

In [None]:
index.describe_index_stats()

In [None]:
embeddings = GPT4AllEmbeddings()
vectorstore = Pinecone.from_existing_index("clementine-loka", embeddings)

In [None]:
def load_retriever(persist_directory):
    embeddings = GPT4AllEmbeddings()
    vectorstore = Pinecone.from_existing_index("clementine-loka", embeddings)
    retriever = vectorstore.as_retriever()
    return retriever

Example of usage

In [None]:
retrieved = vectorstore.similarity_search("What is Sagemaker?")

In [None]:
print(retrieved[0].page_content)

In [None]:
print(retrieved[0].metadata["source"])

## RAG: Retrieval Augmented Generation

In order to carry out the retrieving from the external knowledge base a prompt is required 

In [None]:
rag_prompt = hub.pull("rlm/rag-prompt")

## LLM & Chain

Two different LLMs will be evaluated

In [None]:
models = {
    "orca-mini-3b": "../models/orca-mini-3b-gguf2-q4_0.gguf",
    "gpt4all-falcon": "../models/gpt4all-falcon-q4_0.gguf"
}

In [None]:
retriever=load_retriever("")
llm = GPT4All(model="../models/orca-mini-3b-gguf2-q4_0.gguf")
qa_chain = RetrievalQA.from_chain_type(
    llm,
    chain_type='stuff',
    verbose=True,
    retriever=retriever,
    chain_type_kwargs={"prompt": rag_prompt},
    return_source_documents=False
)

An example of the results

In [None]:
question = "What are all AWS regions where SageMaker is available?"
result = qa_chain({"query": question})
result["result"]

## Model evaluation

The evaluation will be stored in a MLFlow server

In [None]:
persist_dir="database" # Mandatory parameter by MLFlow
mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI"))

Lets evaluate the two models

In [None]:
for model_name, model_path in models.items():
    llm = GPT4All(model=model_path)
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        chain_type='stuff',
        verbose=True,
        retriever=retriever,
        chain_type_kwargs={"prompt": rag_prompt},
        return_source_documents=False
    )
    

    with mlflow.start_run(run_name="log_model_"+ model_name):
        mlflow.log_param("model_name", model_name)
        
        logged_model = mlflow.langchain.log_model(
            qa_chain, 
            artifact_path="model",
            loader_fn=load_retriever,
            persist_dir=persist_dir,
        )
        
        generated = mlflow.evaluate(
            model=logged_model.model_uri,
            # model_type="question-answering", # Not necessary if metrics are specified
            data=questions,
            targets="answer",
            extra_metrics=[
                mlflow.metrics.toxicity(), 
                mlflow.metrics.latency(), 
                mlflow.metrics.ari_grade_level(), 
                mlflow.metrics.flesch_kincaid_grade_level(),
                mlflow.metrics.exact_match(),
            ],
        )
        mlflow.end_run()