In [1]:
from typing import List

from pydantic import SecretStr
from pydantic_settings import BaseSettings

class Settings(BaseSettings):
    """Settings for the demo app.

    Reads from environment variables.
    You can create the .env file from the .env_example file.

    !!! SecretStr is a pydantic type that hides the value in logs.
    If you want to use the real value, you should do:
    SETTINGS.<variable>.get_secret_value()
    """

    class Config:
        env_file = ".env"

    openai_api_key: SecretStr
    openai_model: str = "gpt-4"
    temperature: float = 0

    primary_doc_url: List[str] = ['https://medium.com/@andredp_33483/revolutionising-business-processes-integrating-slack-with-aws-using-serverless-architectures-1b5fb9cf1a0e']

    secondary_docs_url: List[str] = [
        'https://medium.com/serverless-transformation/building-a-robust-serverless-messaging-service-with-amazon-eventbridge-pipes-and-cdk-bf8250d10825',
        'https://medium.com/serverless-transformation/enabling-the-optimal-serverless-platform-team-cdk-and-team-topologies-fe4d9299adc9',
    ]
    
    supabase_url: str
    supabase_service_key: SecretStr


SETTINGS = Settings()  # type: ignore


In [13]:
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
from langchain_community.vectorstores import SupabaseVectorStore
from supabase.client import Client, create_client

def create_supabase_client() -> Client:
    return create_client(
        supabase_url=SETTINGS.supabase_url, 
        supabase_key=SETTINGS.supabase_service_key.get_secret_value()
    )


def create_gold_standard_vector_store() -> SupabaseVectorStore:
    """Create a vector store from a Supabase table."""
    return SupabaseVectorStore(
        client=create_supabase_client(), 
        embedding=OpenAIEmbeddings(
            openai_api_key=SETTINGS.openai_api_key.get_secret_value()
            ), 
        table_name="documents", 
        query_name="match_documents",
    )

def create_draft_vector_store() -> SupabaseVectorStore:
    """Create a vector store from a Supabase table."""
    return SupabaseVectorStore(
        client=create_supabase_client(), 
        embedding=OpenAIEmbeddings(
            openai_api_key=SETTINGS.openai_api_key.get_secret_value()
            ), 
        table_name="articles", 
        query_name="match_articles",
    )


def create_vector_store_from_documents(documents: list[Document], table_name: str) -> SupabaseVectorStore:
    """Create a vector store and populate it with a list of documents."""
    
    embedding=OpenAIEmbeddings(
            openai_api_key=SETTINGS.openai_api_key.get_secret_value()
            )
    client=create_supabase_client()

    return SupabaseVectorStore.from_documents(
        documents=documents,
        embedding=embedding,
        client=client,
        table_name=table_name,
        query_name="match_documents",
        # Lower chunk to prevent timeout
        chunk_size=100,
    )

In [14]:
from langchain.schema import BaseRetriever
from langchain_community.vectorstores import SupabaseVectorStore

def get_retriever(vector_store: SupabaseVectorStore) -> BaseRetriever:
    """Basic retriever function to get a retriever from a vector store."""
    retriever = vector_store.as_retriever(search_kwargs={'k': 3}) # Default K is 4

    return retriever



In [16]:
from langchain.prompts import ChatPromptTemplate

system_template = """You are Article GPT. The following pieces of context are considered to be the gold standard for future articles to aspire for. 
{gold_standard}

Use them to review and comment on the article you are about to read. You are to focus on the following aspects of the article:
- What is the tone of the article?
- What is the quality of the article?
- What is the level of detail of the article?
- What is the level of expertise of the article?
- What is the level of readability of the article?
- What is the level of relevance of the article?

For each of these aspects, you are to provide some feedback on the article in comparison to the gold standard articles. What works well? What doesn't work well? What could be improved?
----
{draft}
----
"""
qa_prompt = ChatPromptTemplate.from_template(system_template)
print(qa_prompt)

input_variables=['draft', 'gold_standard'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['draft', 'gold_standard'], template="You are Article GPT. The following pieces of context are considered to be the gold standard for future articles to aspire for. \n{gold_standard}\n\nUse them to review and comment on the article you are about to read. You are to focus on the following aspects of the article:\n- What is the tone of the article?\n- What is the quality of the article?\n- What is the level of detail of the article?\n- What is the level of expertise of the article?\n- What is the level of readability of the article?\n- What is the level of relevance of the article?\n\nFor each of these aspects, you are to provide some feedback on the article in comparison to the gold standard articles. What works well? What doesn't work well? What could be improved?\n----\n{draft}\n----\n"))]


In [17]:
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser


llm = ChatOpenAI(
    openai_api_key=SETTINGS.openai_api_key.get_secret_value(),
    model=SETTINGS.openai_model,
    temperature=SETTINGS.temperature,
    streaming=True,
)

gold_standard_vector_store = create_gold_standard_vector_store()
gold_standard_retriever = get_retriever(gold_standard_vector_store)

draft_vector_store = create_draft_vector_store()
draft_retriever = get_retriever(draft_vector_store)

rag_chain = (
  {
    "gold_standard": gold_standard_retriever,
    "draft": draft_retriever,
  },
  | qa_prompt
  | llm
  | StrOutputParser()
)
rag_chain.invoke()

SyntaxError: invalid syntax (2130413304.py, line 23)

In [6]:
# Use a faster model for condensing the question
qa = ConversationalRetrievalChain.from_llm(
    llm, 
    retriever=retriever1,
    verbose=True,
    memory=memory,
    condense_question_llm = ChatOpenAI(
        temperature=0, model='gpt-3.5-turbo', openai_api_key=SETTINGS.openai_api_key.get_secret_value()
        ),
    combine_docs_chain_kwargs={'prompt': qa_prompt}
)

ValidationError: 1 validation error for StuffDocumentsChain
__root__
  document_variable_name context was not found in llm_chain input_variables: ['draft_article', 'gold_standard'] (type=value_error)