-1. Constants

In [None]:
openai_function_compatible_models = [
    "gpt-3.5-turbo-0613",
    "gpt-4-0613",
]

streaming_compatible_models = ["gpt-3.5-turbo, gpt4all-j-1.3"]

private_models = ["gpt4all-j-1.3"]

0. OpenAi configuration

In [None]:
import asyncio
import json
from typing import AsyncIterable, Awaitable

from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms.base import LLM
from logger import get_logger
from models.chat import ChatHistory
from repository.chat.format_chat_history import format_chat_history
from repository.chat.get_chat_history import get_chat_history
from repository.chat.update_chat_history import update_chat_history
from repository.chat.update_message_by_id import update_message_by_id
from supabase import Client, create_client
from vectorstore.supabase import (
    CustomSupabaseVectorStore,
)  # Custom class for handling vector storage with Supabase

from .base import BaseBrainPicking
from .prompts.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT

logger = get_logger(__name__)


class OpenAIBrainPicking(BaseBrainPicking):
    """
    Main class for the OpenAI Brain Picking functionality.
    It allows to initialize a Chat model, generate questions and retrieve answers using ConversationalRetrievalChain.
    """

    # Default class attributes
    model: str = "gpt-3.5-turbo"

    def __init__(
        self,
        model: str,
        brain_id: str,
        temperature: float,
        chat_id: str,
        max_tokens: int,
        user_openai_api_key: str,
        streaming: bool = False,
    ) -> "OpenAIBrainPicking":
        """
        Initialize the BrainPicking class by setting embeddings, supabase client, vector store, language model and chains.
        :return: OpenAIBrainPicking instance
        """
        super().__init__(
            model=model,
            brain_id=brain_id,
            chat_id=chat_id,
            max_tokens=max_tokens,
            temperature=temperature,
            user_openai_api_key=user_openai_api_key,
            streaming=streaming,
        )

    @property
    def embeddings(self) -> OpenAIEmbeddings:
        return OpenAIEmbeddings(openai_api_key=self.openai_api_key)

    @property
    def supabase_client(self) -> Client:
        return create_client(
            self.brain_settings.supabase_url, self.brain_settings.supabase_service_key
        )

    @property
    def vector_store(self) -> CustomSupabaseVectorStore:
        return CustomSupabaseVectorStore(
            self.supabase_client,
            self.embeddings,
            table_name="vectors",
            brain_id=self.brain_id,
        )

    @property
    def question_llm(self) -> LLM:
        return self._create_llm(model=self.model, streaming=False)

    @property
    def doc_llm(self) -> LLM:
        return self._create_llm(
            model=self.model, streaming=self.streaming, callbacks=self.callbacks
        )

    @property
    def question_generator(self) -> LLMChain:
        return LLMChain(llm=self.question_llm, prompt=CONDENSE_QUESTION_PROMPT)

    @property
    def doc_chain(self) -> LLMChain:
        return load_qa_chain(llm=self.doc_llm, chain_type="stuff")

    @property
    def qa(self) -> ConversationalRetrievalChain:
        return ConversationalRetrievalChain(
            retriever=self.vector_store.as_retriever(),
            question_generator=self.question_generator,
            combine_docs_chain=self.doc_chain,
            verbose=True,
        )

    def _create_llm(self, model, streaming=False, callbacks=None) -> LLM:
        """
        Determine the language model to be used.
        :param model: Language model name to be used.
        :param private_model_args: Dictionary containing model_path, n_ctx and n_batch.
        :param private: Boolean value to determine if private model is to be used.
        :return: Language model instance
        """
        return ChatOpenAI(
            temperature=0,
            model=model,
            streaming=streaming,
            callbacks=callbacks,
        )

    def _call_chain(self, chain, question, history):
        """
        Call a chain with a given question and history.
        :param chain: The chain eg QA (ConversationalRetrievalChain)
        :param question: The user prompt
        :param history: The chat history from DB
        :return: The answer.
        """
        return chain(
            {
                "question": question,
                "chat_history": history,
            }
        )

    def generate_answer(self, question: str) -> ChatHistory:
        """
        Generate an answer to a given question by interacting with the language model.
        :param question: The question
        :return: The generated answer.
        """
        transformed_history = []

        # Get the history from the database
        history = get_chat_history(self.chat_id)

        # Format the chat history into a list of tuples (human, ai)
        transformed_history = format_chat_history(history)

        # Generate the model response using the QA chain
        model_response = self._call_chain(self.qa, question, transformed_history)

        answer = model_response["answer"]

        # Update chat history
        chat_answer = update_chat_history(
            chat_id=self.chat_id,
            user_message=question,
            assistant=answer,
        )

        return chat_answer

    async def _acall_chain(self, chain, question, history):
        """
        Call a chain with a given question and history.
        :param chain: The chain eg QA (ConversationalRetrievalChain)
        :param question: The user prompt
        :param history: The chat history from DB
        :return: The answer.
        """
        return chain.acall(
            {
                "question": question,
                "chat_history": history,
            }
        )

    async def generate_stream(self, question: str) -> AsyncIterable:
        """
        Generate a streaming answer to a given question by interacting with the language model.
        :param question: The question
        :return: An async iterable which generates the answer.
        """

        history = get_chat_history(self.chat_id)
        callback = self.callbacks[0]

        transformed_history = []

        # Format the chat history into a list of tuples (human, ai)
        transformed_history = format_chat_history(history)

        # Initialize a list to hold the tokens
        response_tokens = []

        # Wrap an awaitable with a event to signal when it's done or an exception is raised.
        async def wrap_done(fn: Awaitable, event: asyncio.Event):
            try:
                await fn
            except Exception as e:
                logger.error(f"Caught exception: {e}")
            finally:
                event.set()

        task = asyncio.create_task(
            wrap_done(
                self.qa._acall_chain(self.qa, question, transformed_history),
                callback.done,
            )
        )

        streamed_chat_history = update_chat_history(
            chat_id=self.chat_id,
            user_message=question,
            assistant="",
        )

        # Use the aiter method of the callback to stream the response with server-sent-events
        async for token in callback.aiter():
            logger.info("Token: %s", token)

            # Add the token to the response_tokens list
            response_tokens.append(token)
            streamed_chat_history.assistant = token

            yield f"data: {json.dumps(streamed_chat_history.to_dict())}"

        await task

        # Join the tokens to create the assistant's response
        assistant = "".join(response_tokens)

        update_message_by_id(
            message_id=streamed_chat_history.message_id,
            user_message=question,
            assistant=assistant,
        )

1. Model function call 

In [None]:
from typing import Optional
from typing import Any, Dict


class FunctionCall:
    def __init__(
        self,
        name: Optional[str] = None,
        arguments: Optional[Dict[str, Any]] = None,
    ):
        self.name = name
        self.arguments = arguments

2. Model answer 

In [None]:
from typing import Optional
from .FunctionCall import FunctionCall


class OpenAiAnswer:
    def __init__(
        self,
        content: Optional[str] = None,
        function_call: FunctionCall = None,
    ):
        self.content = content
        self.function_call = function_call

3. Condense prompt 

In [None]:
from langchain.prompts.prompt import PromptTemplate

_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. include it in the standalone question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

4. Language prompt 

In [None]:
from langchain.prompts.prompt import PromptTemplate

prompt_template = """Your name is Quivr. You are a second brain. A person will ask you a question and you will provide a helpful answer. Write the answer in the same language as the question. If you don't know the answer, just say that you don't know. Don't try to make up an answer. Use the following context to answer the question:


{context}

Question: {question}
Helpful Answer:"""
QA_PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

5. Summarization 

In [None]:
import os

import guidance
import openai
from logger import get_logger

logger = get_logger(__name__)

openai_api_key = os.environ.get("OPENAI_API_KEY")
openai.api_key = openai_api_key
summary_llm = guidance.llms.OpenAI("gpt-3.5-turbo-0613", caching=False)


def llm_summerize(document):
    summary = guidance(
        """
{{#system~}}
You are a world best summarizer. \n
Condense the text, capturing essential points and core ideas. Include relevant \
examples, omit excess details, and ensure the summary's length matches the \
original's complexity.
{{/system~}}
{{#user~}}
Summarize the following text:
---
{{document}}
{{/user~}}

{{#assistant~}}
{{gen 'summarization' temperature=0.2 max_tokens=100}}
{{/assistant~}}
""",
        llm=summary_llm,
    )

    summary = summary(document=document)
    logger.info("Summarization: %s", summary)
    return summary["summarization"]


def llm_evaluate_summaries(question, summaries, model):
    if not model.startswith("gpt"):
        logger.info(f"Model {model} not supported. Using gpt-3.5-turbo instead.")
        model = "gpt-3.5-turbo-0613"
    logger.info(f"Evaluating summaries with {model}")
    evaluation_llm = guidance.llms.OpenAI(model, caching=False)
    evaluation = guidance(
        """
{{#system~}}
You are a world best evaluator. You evaluate the relevance of summaries based \
on user input question. Return evaluation in following csv format, csv headers \
are [summary_id,document_id,evaluation,reason].
Evaluator Task
- Evaluation should be a score number between 0 and 5.
- Reason should be a short sentence within 20 words explain why the evaluation.
---
Example
summary_id,document_id,evaluation,reason
1,4,3,"not mentioned about topic A"
2,2,4,"It is not relevant to the question"
{{/system~}}
{{#user~}}
Based on the question, do Evaluator Task for each summary.
---
Question: {{question}}
{{#each summaries}}
Summary
    summary_id: {{this.id}}
    document_id: {{this.document_id}}
    evaluation: ""
    reason: ""
    Summary Content: {{this.content}}
    File Name: {{this.metadata.file_name}}
{{/each}}
{{/user~}}
{{#assistant~}}
{{gen 'evaluation' temperature=0.2 stop='<|im_end|>'}}
{{/assistant~}}
""",
        llm=evaluation_llm,
    )
    result = evaluation(question=question, summaries=summaries)
    evaluations = {}
    for evaluation in result["evaluation"].split("\n"):
        if evaluation == "" or not evaluation[0].isdigit():
            continue
        logger.info("Evaluation Row: %s", evaluation)
        summary_id, document_id, score, *reason = evaluation.split(",")
        if not score.isdigit():
            continue
        score = int(score)
        if score < 3 or score > 5:
            continue
        evaluations[summary_id] = {
            "evaluation": score,
            "reason": ",".join(reason),
            "summary_id": summary_id,
            "document_id": document_id,
        }
    return [
        e
        for e in sorted(
            evaluations.values(), key=lambda x: x["evaluation"], reverse=True
        )
    ]

6. Function call configuration 

In [None]:
from typing import Any, Dict, List, Optional

from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from llm.models.FunctionCall import FunctionCall
from llm.models.OpenAiAnswer import OpenAiAnswer
from logger import get_logger
from models.chat import ChatHistory
from repository.chat.get_chat_history import get_chat_history
from repository.chat.update_chat_history import update_chat_history
from supabase import Client, create_client
from vectorstore.supabase import CustomSupabaseVectorStore

from .base import BaseBrainPicking

logger = get_logger(__name__)


def format_answer(model_response: Dict[str, Any]) -> OpenAiAnswer:
    answer = model_response["choices"][0]["message"]
    content = answer["content"]
    function_call = None

    if answer.get("function_call", None) is not None:
        function_call = FunctionCall(
            answer["function_call"]["name"],
            answer["function_call"]["arguments"],
        )

    return OpenAiAnswer(content=content, function_call=function_call)


class OpenAIFunctionsBrainPicking(BaseBrainPicking):
    """
    Class for the OpenAI Brain Picking functionality using OpenAI Functions.
    It allows to initialize a Chat model, generate questions and retrieve answers using ConversationalRetrievalChain.
    """

    # Default class attributes
    model: str = "gpt-3.5-turbo-0613"

    def __init__(
        self,
        model: str,
        chat_id: str,
        temperature: float,
        max_tokens: int,
        brain_id: str,
        user_openai_api_key: str,
        # TODO: add streaming
    ) -> "OpenAIFunctionsBrainPicking":
        super().__init__(
            model=model,
            chat_id=chat_id,
            max_tokens=max_tokens,
            user_openai_api_key=user_openai_api_key,
            temperature=temperature,
            brain_id=str(brain_id),
            streaming=False,
        )

    @property
    def openai_client(self) -> ChatOpenAI:
        return ChatOpenAI(openai_api_key=self.openai_api_key)

    @property
    def embeddings(self) -> OpenAIEmbeddings:
        return OpenAIEmbeddings(openai_api_key=self.openai_api_key)

    @property
    def supabase_client(self) -> Client:
        return create_client(
            self.brain_settings.supabase_url, self.brain_settings.supabase_service_key
        )

    @property
    def vector_store(self) -> CustomSupabaseVectorStore:
        return CustomSupabaseVectorStore(
            self.supabase_client,
            self.embeddings,
            table_name="vectors",
            brain_id=self.brain_id,
        )

    def _get_model_response(
        self,
        messages: List[Dict[str, str]],
        functions: Optional[List[Dict[str, Any]]] = None,
    ) -> Any:
        """
        Retrieve a model response given messages and functions
        """
        logger.info("Getting model response")
        kwargs = {
            "messages": messages,
            "model": self.model,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens,
        }

        if functions:
            logger.info("Adding functions to model response")
            kwargs["functions"] = functions

        return self.openai_client.completion_with_retry(**kwargs)

    def _get_chat_history(self) -> List[Dict[str, str]]:
        """
        Retrieves the chat history in a formatted list
        """
        logger.info("Getting chat history")
        history = get_chat_history(self.chat_id)
        return [
            item
            for chat in history
            for item in [
                {"role": "user", "content": chat.user_message},
                {"role": "assistant", "content": chat.assistant},
            ]
        ]

    def _get_context(self, question: str) -> str:
        """
        Retrieve documents related to the question
        """
        logger.info("Getting context")

        return self.vector_store.similarity_search(query=question)

    def _construct_prompt(
        self, question: str, useContext: bool = False, useHistory: bool = False
    ) -> List[Dict[str, str]]:
        """
        Constructs a prompt given a question, and optionally include context and history
        """
        logger.info("Constructing prompt")
        system_messages = [
            {
                "role": "system",
                "content": """Your name is Quivr. You are an assistant that has access to a person's documents and that can answer questions about them.
                A person will ask you a question and you will provide a helpful answer. 
                Write the answer in the same language as the question. 
                You have access to functions to help you answer the question.
                If you don't know the answer, just say that you don't know but be helpful and explain why you can't answer""",
            }
        ]

        if useHistory:
            logger.info("Adding chat history to prompt")
            history = self._get_chat_history()
            system_messages.append(
                {"role": "system", "content": "Previous messages are already in chat."}
            )
            system_messages.extend(history)

        if useContext:
            logger.info("Adding chat context to prompt")
            chat_context = self._get_context(question)
            context_message = f"Here are the documents you have access to: {chat_context if chat_context else 'No document found'}"
            system_messages.append({"role": "user", "content": context_message})

        system_messages.append({"role": "user", "content": question})

        return system_messages

    def generate_answer(self, question: str) -> ChatHistory:
        """
        Main function to get an answer for the given question
        """
        logger.info("Getting answer")
        functions = [
            {
                "name": "get_history_and_context",
                "description": "Get the chat history between you and the user and also get the relevant documents to answer the question. Always use that unless a very simple question is asked that a 5 years old could answer.",
                "parameters": {"type": "object", "properties": {}},
            },
        ]

        # First, try to get an answer using just the question
        response = self._get_model_response(
            messages=self._construct_prompt(question), functions=functions
        )
        formatted_response = format_answer(response)

        # If the model calls for history, try again with history included
        if (
            formatted_response.function_call
            and formatted_response.function_call.name == "get_history"
        ):
            logger.info("Model called for history")
            response = self._get_model_response(
                messages=self._construct_prompt(question, useHistory=True),
                functions=[],
            )

            formatted_response = format_answer(response)

        if (
            formatted_response.function_call
            and formatted_response.function_call.name == "get_history_and_context"
        ):
            logger.info("Model called for history and context")
            response = self._get_model_response(
                messages=self._construct_prompt(
                    question, useContext=True, useHistory=True
                ),
                functions=[],
            )
            formatted_response = format_answer(response)

        # Update chat history
        chat_history = update_chat_history(
            chat_id=self.chat_id,
            user_message=question,
            assistant=formatted_response.content or "",
        )

        return chat_history