# Not final yet (but soon...)

In [None]:
# TEMPORARY INSTALL:
!pip install --quiet "git+https://github.com/datastax/astrapy@astra-vector#egg=astrapy"
!pip install --quiet "git+https://github.com/hemidactylus/langchain@SL-astra-db#egg=langchain&subdirectory=libs/langchain"
# these will become "pip install langchain astrapy"

! pip install --quiet \
    "datasets==2.14.6" \
    "openai==0.28.1" \
    "tiktoken==0.5.1"

In [None]:
import os
from getpass import getpass

from datasets import load_dataset

from langchain.vectorstores import AstraDB
from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings

## Init

In [None]:
if 'ASTRA_DB_API_ENDPOINT' not in os.environ:
    os.environ["ASTRA_DB_API_ENDPOINT"] = input("ASTRA_DB_API_ENDPOINT = ")

if 'ASTRA_DB_APPLICATION_TOKEN' not in os.environ:
    os.environ["ASTRA_DB_APPLICATION_TOKEN"] = getpass("ASTRA_DB_APPLICATION_TOKEN = ")

if 'ASTRA_DB_KEYSPACE' not in os.environ:
    ks = input("(Optional) ASTRA_DB_KEYSPACE = ")
    if ks:
        os.environ["ASTRA_DB_KEYSPACE"] = ks

In [None]:
if 'OPENAI_API_KEY' not in os.environ:
    os.environ["OPENAI_API_KEY"] = input("OPENAI_API_KEY = ")

In [None]:
embe = OpenAIEmbeddings()
vstore = AstraDB(
    embedding=embe,
    collection_name="astra_vector_demo",
    token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
    api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
    namespace=os.environ.get("ASTRA_DB_KEYSPACE"),     # <-- this will disappear in a typical quickstart
)

## Load a small dataset

In [None]:
philo_dataset = load_dataset("datastax/philosopher-quotes")["train"]
print("An example entry:")
print(philo_dataset[16])

## Add dataset documents to the vector store

In [None]:
docs = []
for entry in philo_dataset:
    metadata = {"author": entry["author"]}
    if entry["tags"]:
        for tag in entry["tags"].split(";"):
            metadata[tag] = "y"
    doc = Document(page_content=entry["quote"], metadata=metadata)
    docs.append(doc)

inserted_ids = vstore.add_documents(docs)
print(f"\nInserted {len(inserted_ids)} documents.")

> TODO: show other add options (add_texts)

## ANN, simple

In [None]:
results = vstore.similarity_search("Our life is what we make of it", k=3)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

### Similarity values

In [None]:
results = vstore.similarity_search_with_score("Our life is what we make of it", k=3)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

## ANN, with metadata

In [None]:
results = vstore.similarity_search(
    "Our life is what we make of it",
    k=3,
    filter={"author": "aristotle"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

## MMR search

In [None]:
results = vstore.max_marginal_relevance_search(
    "Our life is what we make of it",
    k=3,
    filter={"author": "aristotle"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

> TODO: document deletion (various ways)

> TODO: loading from one/more PDFs as well (from url or local file?)

## A mini-RAG

We use the LCEL (langchain expression language), ready for e.g. LangServe and all

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

In [None]:
retriever = vstore.as_retriever(search_kwargs={'k': 3})

philo_template = """
You are a philosopher that draws inspiration from great thinkers of the past
to craft well-thought answers to user questions. You can use the provided context
as inspiration.
Your answers must be concise and to the point on other topics than philosophy.

CONTEXT:
{context}

QUESTION: {question}

YOUR ANSWER:"""

philo_prompt = ChatPromptTemplate.from_template(philo_template)

llm = ChatOpenAI()

chain = (
    {"context": retriever, "question": RunnablePassthrough()} 
    | philo_prompt 
    | llm 
    | StrOutputParser()
)

In [None]:
chain.invoke("What is the meaning of suffering?")