In [52]:
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAI
from langchain_astradb import AstraDBVectorStore
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter

load_dotenv()

True

In [53]:
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
model_name = "gemini-pro"

llm = GoogleGenerativeAI(model=model_name,
                         google_api_key=GOOGLE_API_KEY,
                         max_output_tokens=2048,
                         max_retries=20,
                         temperature=0)
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [54]:
llm("Hello, how are you today?")

'As an AI chatbot, I don\'t have personal feelings or experiences, so I don\'t have a "today" or the capacity to feel emotions. I am designed to provide information and assist users to the best of my abilities.'

Ingest

In [55]:
from datasets import (
    load_dataset,
)

In [56]:
ASTRA_DB_API_ENDPOINT = os.getenv("ASTRA_DB_API_ENDPOINT")
ASTRA_DB_APPLICATION_TOKEN = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
ASTRA_DB_KEYSPACE_Philo = os.getenv("ASTRA_DB_KEYSPACE_Philo")

In [57]:
vstore = AstraDBVectorStore(
    embedding=embeddings,
    collection_name="astra_vector_demo",
    api_endpoint=ASTRA_DB_API_ENDPOINT,
    token=ASTRA_DB_APPLICATION_TOKEN,
    namespace=ASTRA_DB_KEYSPACE_Philo,
)

In [61]:
philo_dataset = load_dataset("datastax/philosopher-quotes")["train"]

docs = []
for entry in philo_dataset:
    # print(entry)
    author = entry["author"]
    quote = entry["quote"]
    tags = entry["tags"]
    metadata = {"author": author, "tags": tags}

    doc = Document(page_content=f"Quote: {quote}, Author:{author}, tags:{str(tags)}", metadata=metadata)
    # print(doc)
    docs.append(doc)

# inserted_ids = vstore.add_documents(docs)
# print(f"\nInserted {len(inserted_ids)} documents.")

In [62]:
docs

[Document(page_content="Quote: True happiness comes from gaining insight and growing into your best possible self. Otherwise all you're having is immediate gratification pleasure, which is fleeting and doesn't grow you as a person., Author:aristotle, tags:knowledge", metadata={'author': 'aristotle', 'tags': 'knowledge'}),
 Document(page_content='Quote: The roots of education are bitter, but the fruit is sweet., Author:aristotle, tags:education;knowledge', metadata={'author': 'aristotle', 'tags': 'education;knowledge'}),
 Document(page_content='Quote: Before you heal the body you must first heal the mind, Author:aristotle, tags:ethics', metadata={'author': 'aristotle', 'tags': 'ethics'}),
 Document(page_content='Quote: The proof that you know something is that you are able to teach it, Author:aristotle, tags:education;knowledge', metadata={'author': 'aristotle', 'tags': 'education;knowledge'}),
 Document(page_content='Quote: Those who are not angry at the things they should be angry at 

In [59]:
retriever = vstore.as_retriever()
docs = retriever.get_relevant_documents("get me a quote of plato")
print(docs)

[Document(page_content='Quote: Do not expect justice where might is right., Author:plato, tags:None', metadata={'author': 'plato', 'tags': None}), Document(page_content='Quote: Thinking is the soul talking to itself., Author:plato, tags:None', metadata={'author': 'plato', 'tags': None}), Document(page_content='Quote: Ignorance is the root cause of all difficulties., Author:plato, tags:None', metadata={'author': 'plato', 'tags': None}), Document(page_content='Quote: Plato is my friend, but truth is a better friend., Author:aristotle, tags:None', metadata={'author': 'aristotle', 'tags': None})]


In [10]:
docs

[Document(page_content='Do the right thing because it is right.', metadata={'author': 'kant', 'tags': 'ethics'}),
 Document(page_content='Character is that which reveals moral purpose, exposing the class of things a man chooses and avoids.', metadata={'author': 'aristotle', 'tags': None}),
 Document(page_content='Philosophy begins with wonder.', metadata={'author': 'aristotle', 'tags': None}),
 Document(page_content='A moral system valid for all is basically immoral.', metadata={'author': 'nietzsche', 'tags': None})]

In [42]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
metadata_field_info = [
    AttributeInfo(
        name="author",
        description="The Author of the Quote",
        type="string",
    ),
    AttributeInfo(
        name="tags",
        description="Further information about the quote, like a tag or category",
        type="string or list[string]",
    ),
]
document_content_description = "The text of the quote from the philosopher."
retriever2 = SelfQueryRetriever.from_llm(
    llm, vstore, document_content_description, metadata_field_info, verbose=True,search_kwargs={'k': 2}
)

In [43]:
docs = retriever2.get_relevant_documents("Get me a about ethics from author aristotle")
print(docs)

[Document(page_content='You are what you repeatedly do', metadata={'author': 'aristotle', 'tags': None}), Document(page_content='Philosophy begins with wonder.', metadata={'author': 'aristotle', 'tags': None})]


In [46]:
from pydantic.v1 import BaseModel, Field

from typing import List
from langchain_core.output_parsers import BaseOutputParser
from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field
from langchain.retrievers.multi_query import MultiQueryRetriever

class LineListOutputParser(BaseOutputParser[List[str]]):
    """Output parser for a list of lines."""

    def parse(self, text: str):
        """
        Parses the given text and returns a LineList object.

        Args:
            text (str): The text to be parsed.

        Returns:
            LineList: The parsed LineList object.
        """
        print(f'parsing text: {text} \n')
        lines = text.strip().split("\n")
        lines = [
            line for line in lines if line.strip()
            and "alternative" not in line and "original question" not in line
        ]
        return lines


output_parser = LineListOutputParser()

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate two 
    different versions of the given user question to retrieve relevant quotes from a vector 
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search. Do not change the meaning of the question, do not change the authors name or the tag
    Provide these alternative questions separated by newlines. also mention the original question. And have question vartion by adding the author name as author: name of author in lower case also add tag: tag name in lower case.
    Original question: {question}""",
)

# Chain
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)

# Other inputs
# question = 


In [47]:
# Run
retriever = MultiQueryRetriever(
    retriever=retriever2, llm_chain=llm_chain, parser_key="lines", include_original=True)  # "lines" is the key (attribute name) of the parsed output
unique_docs = retriever.get_relevant_documents(
    query="What did Aristotle say about ethics?")


parsing text: Original question: What did Aristotle say about ethics?

Alternative question 1: What are Aristotle's views on the nature of ethics? author: aristotle tag: ethics
Alternative question 2: How did Aristotle define the concept of ethical behavior? author: aristotle tag: ethics 



OutputParserException: Parsing text
```json
{
    "query": "nature of ethics",
    "filter": "eq(\"author\", \"Aristotle\") and in(\"tags\", \"ethics\")"
}
```
 raised following error:
Unexpected token Token('CNAME', 'and') at line 1, column 27.
Expected one of: 
	* $END
Previous tokens: [Token('RPAR', ')')]


In [None]:
unique_docs

[Document(page_content='A promise made must be a promise kept.', metadata={'author': 'aristotle', 'tags': 'ethics'}),
 Document(page_content='Happiness is the reward of virtue.', metadata={'author': 'aristotle', 'tags': 'ethics'}),
 Document(page_content='He who hath many friends hath none.', metadata={'author': 'aristotle', 'tags': 'ethics'}),
 Document(page_content='Before you heal the body you must first heal the mind', metadata={'author': 'aristotle', 'tags': 'ethics'}),
 Document(page_content='You are what you repeatedly do', metadata={'author': 'aristotle', 'tags': None}),
 Document(page_content='Philosophy begins with wonder.', metadata={'author': 'aristotle', 'tags': None}),
 Document(page_content='Fortune favours the bold.', metadata={'author': 'aristotle', 'tags': None}),
 Document(page_content='A friend is another I.', metadata={'author': 'aristotle', 'tags': None}),
 Document(page_content='Happiness springs from doing good and helping others.', metadata={'author': 'plato', 