In [61]:
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_cohere import CohereEmbeddings
import cassio
import os, dotenv
from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
dotenv.load_dotenv()

True

In [34]:
pdf_doc = PyPDFLoader("National AI Policy Consultation Draft.pdf").load()
len(pdf_doc)

41

In [36]:
doc_chunks = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(pdf_doc)
print(doc_chunks[10])
len(doc_chunks)

157

In [66]:
ASTRA_DB_APPLICATION_TOKEN = os.getenv('ASTRA_DB_APPLICATION_TOKEN')
ASTRA_DB_ID = os.getenv('ASTRA_DB_ID')

In [71]:
cassio.init(
    database_id=ASTRA_DB_ID,
    token=ASTRA_DB_APPLICATION_TOKEN,
)

In [72]:
import os
os.environ["GOOGLE_API_KEY"] = os.getenv('GOOGLE_API_KEY')
os.environ["COHERE_API_KEY"] = os.getenv('COHERE_API_KEY')

## LLM
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
## Embeddings
embeddings = CohereEmbeddings()

In [73]:
astra_vector_store = Cassandra(embedding=embeddings,
                               table_name="astra_rag",
                               )

In [74]:
astra_vector_store.add_documents(doc_chunks)

['d4c8b7fa79d3407fb87dcfa6c788458b',
 '0f15e3bfee304bc6afd5f541358c5145',
 '9e6a43b00188476aadd1732dd784ce9d',
 'c75b94359d4b45768ab8bcf15999012b',
 '97baa03275864b7c8be4c2fe1fda96ee',
 'aea559d186ba4b37bf29fe1d1e5ef60f',
 '9f0d638dec9d4c8496590813e2c9b35e',
 '541ed555469941c298e35be18fe0f962',
 '3a24241bd4564587ba7c6b119291547e',
 'c520fc57cca34356bad52fb4a5cb8a72',
 'e636ee1030204f199d9ae536055b8000',
 '5f779b2acc2447289c063be627efafab',
 'a2aeb64fc7df4506b4f33808bf90e018',
 '19f5661d3aed49d8b4740cb1bc72c8a3',
 '43ba4fc8725f4f83a0472033ade5a2cc',
 '2f3b144526ad4ce4b4498e35486232e0',
 '655d22a87c8144f996181c69c003cff4',
 'e2d0a6b580e34795ad19dc3e1622312d',
 '19cfbf0cd27046cbb6ac1b3b954a7027',
 'd2163bfb13904f80ab6a98a6b6d8cf0b',
 'db3430b567bf44da9e93be07cfcbb6fc',
 'ecc9b4a6cd87456585795170210ef623',
 '8779effd7f484f1faff536636b6ca35c',
 '3ba074bc57e84cc1b6d0b0e675944cee',
 '0fed01bb711c472a9bdd40b795a3df35',
 'f666674bfd1a4877812768efea8baeae',
 '1cdaadb1be244a5aa749c55f5d393047',
 

In [75]:
astra_vector_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store)

In [76]:
from rich import print

response = astra_vector_index.query("What is National Artificail Intelligence Policy?", llm=llm)
print(response)

In [77]:
query = "What are the objectives of National AI Fund?"

retrieved_docs = astra_vector_store.similarity_search_with_relevance_scores(query=query, k=3)
print(retrieved_docs)

In [31]:
# deleting the collection
# astra_vector_store.delete_collection()