# Demo 2 (LangChain, QA)

In [1]:
import os
import cassio

cassio.init(
    token=os.environ['ASTRA_DB_APPLICATION_TOKEN'],
    database_id=os.environ['ASTRA_DB_ID'],
    keyspace=os.environ.get('ASTRA_DB_KEYSPACE'),
)

In [2]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores import Cassandra

In [3]:
oai_embeddings = OpenAIEmbeddings()
oai_llm = OpenAI()

In [4]:
demo2_qa_store = Cassandra(
    table_name='news_v_store',
    embedding=oai_embeddings,
    session=None,  # = get defaults from init()
    keyspace=None,  # = get defaults from init()
)

## Insert texts

Let's use something so new that no LLM can possibly have been trained on that:

In [5]:
from langchain.document_loaders import AsyncHtmlLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [6]:
src_url = 'https://www.cbsnews.com/news/nobel-prize-physics-2023-blurry-glimpse-zooming-electrons-in-atom/'

### Note the handy, if a bit convoluted, tools to scrape and vectorize Web pages:

In [7]:
html2text = Html2TextTransformer()
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=80)

loader = AsyncHtmlLoader([src_url])
docs = loader.load()

docs_cleaned = html2text.transform_documents(docs)

chunks = splitter.split_documents(docs_cleaned)

print(f"{len(chunks)} document chunks obtained.")

Fetching pages: 100%|#################################################################################################################| 1/1 [00:00<00:00,  3.56it/s]

25 document chunks obtained.





In [9]:
# example chunk:
chunks[13]

Document(page_content='But even when they "see" the electron, there\'s only so much they can view.\n\n"You can see whether it\'s on the one side of a molecule or on the other,"\nL\'Huillier, 65, said. "It\'s still very blurry."\n\n"The electrons are much more like waves, like water waves, than particles and\nwhat we try to measure with our technique is the position of the crest of the\nwaves," she added.', metadata={'source': 'https://www.cbsnews.com/news/nobel-prize-physics-2023-blurry-glimpse-zooming-electrons-in-atom/'})

In [8]:
_ = demo2_qa_store.add_documents(chunks)

In [10]:
from langchain.indexes.vectorstore import VectorStoreIndexWrapper

index = VectorStoreIndexWrapper(vectorstore=demo2_qa_store)

In [12]:
print(index.query(
    (
        "Who won the 2023 Nobel Prize in physics, and for "
        "what? Answer with a poem of max 80 words."
    ),
    llm=oai_llm,
))

 Pierre Agostini, Ferenc Krausz, and Anne L'Huillier, 
For their work on electrons in atoms, they won Nobel Prize in physics.
They uncovered secrets of the tiny parts, that are fundamental to our gadgets and hearts.
Their experiments give us tools to explore, understanding our world evermore.
