In [9]:
import os
import mlflow
import pandas as pd
import chromadb
from chromadb.utils import embedding_functions
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
CHROMA_HOST = os.environ.get("CHROMA_HOST")

openai_client = OpenAI(
    api_key=OPENAI_API_KEY
)

openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=OPENAI_API_KEY,
    model_name="text-embedding-ada-002"
)
chroma_client = chromadb.HttpClient(
    host=CHROMA_HOST, 
    port=8000
)

collection = chroma_client.get_collection(
    name='content_collection', 
    embedding_function=openai_ef
)

In [10]:
def query(query_texts):

    try:
        # Ensure that query_texts is a list
        if not isinstance(query_texts, list):
            query_texts = [query_texts]


        results = collection.query(query_texts=query_texts, n_results=2)

        documents = []
        for i, document_list in enumerate(results['documents']):
            for j, document in enumerate(document_list):
                if results['distances'][i][j] < 0.5:
                    metadata = results['metadatas'][i][j]
                    documents.append({'document': document, 'metadatas': metadata})

        if not documents:
            print("No relevant documents")
            return "No relevant documents"

        return documents

    except Exception as e:
        print(f"Error during query: {e}")
        return []

In [11]:
def gpt_model(input_df):
    responses = []
    for index, row in input_df.iterrows():
        questions = row["questions"]
        context = query(questions) 
        system_prompt = """
        You are a helpful assistant who can answer question with information from the provided context, which comes from our platform content. 
        Make sure to only use information from the provided documents. 
        If the context does not have the information to answer the question reply `I'm sorry but I'm not sure about that.`
        Do not respond with anything outside of the scope of financial wellness or financial literacy. 
        """

        completion = openai_client.chat.completions.create(
            model="gpt-3.5-turbo-1106",
            messages=[
                {"role": "system", "content": f"{system_prompt}"},
                {"role": "user", "content": f"The user question:{questions} \n The context:{context}"}
            ]
        )
        response = completion.choices[0].message.content
        responses.append({"system_prompt": system_prompt, "response": response, "context": context})

    return responses


In [12]:
eval_df = pd.DataFrame(
    {
        "questions": [
            "What exactly is credit score?",
            "Give me a couple tips on how to budget.",
            "Who won the Super Bowl?",
            "What are the pros and cons of getting a credit card?"
        ]
    }
)

In [13]:
results = mlflow.evaluate(
    gpt_model,
    eval_df,  
    model_type="question-answering",
    predictions="response",
    evaluator_config={
        "col_mapping": {
            "system_prompt": "system_prompt",
            "inputs": "questions",  
            "context": "source_documents",  
        }
    },
)


2023/12/16 18:19:54 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.
2023/12/16 18:19:54 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2023/12/16 18:20:06 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2023/12/16 18:20:06 INFO mlflow.models.evaluation.default_evaluator: Evaluating builtin metrics: token_count
2023/12/16 18:20:06 INFO mlflow.models.evaluation.default_evaluator: Evaluating builtin metrics: toxicity
2023/12/16 18:20:06 INFO mlflow.models.evaluation.default_evaluator: Evaluating builtin metrics: flesch_kincaid_grade_level
2023/12/16 18:20:06 INFO mlflow.models.evaluation.default_evaluator: Evaluating builtin metrics: ari_grade_level
2023/12/16 18:20:06 INFO mlflow.models.evaluation.default_evaluator: Evaluating builtin metrics: exact_match


In [14]:
results.tables["eval_results_table"]


Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 598.50it/s]


Unnamed: 0,questions,outputs,token_count
0,What exactly is credit score?,A credit score is essentially a measure of the...,126
1,Give me a couple tips on how to budget.,"Based on the provided information, here are a ...",95
2,Who won the Super Bowl?,I'm sorry but I'm not sure about that.,11
3,What are the pros and cons of getting a credit...,The pros of getting a credit card include conv...,230


In [None]:
import mlflow

print(mlflow.__version__)