## Create a LangChain Firestore  class

In order to integrate our documents from Firestore in a LangChain RetrievalQA Agent
we need to create a custom LangChain Retriever class to get the documents from our Firestore DB.

In [1]:
from typing import Any, Dict, List, Optional, Union

import numpy as np
import firebase_admin
from firebase_admin import firestore
from google.cloud import aiplatform
from langchain.schema import BaseRetriever, Document
from langchain.callbacks.manager import CallbackManagerForRetrieverRun, AsyncCallbackManagerForRetrieverRun
from langchain.embeddings.base import Embeddings
from langchain.embeddings import TensorflowHubEmbeddings

In [2]:
model_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3"
embeddings = TensorflowHubEmbeddings(model_url=model_url)

2023-08-02 16:59:18.912753: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-02 16:59:18.944074: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-02 16:59:18.944493: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# Application Default credentials are automatically created.
app = firebase_admin.initialize_app()
db = firestore.client()

In [4]:
doc_ref = db.collection("questions").document("573273bfe17f3d1400422993")

doc = doc_ref.get()
if doc.exists:
    print(f"Document data: {doc.to_dict()}")
else:
    print("No such document!")

Document data: {'title': 'Dwight_D._Eisenhower', 'context': 'Eisenhower responded to the French defeat with the formation of the SEATO (Southeast Asia Treaty Organization) Alliance with the U.K., France, New Zealand and Australia in defense of Vietnam against communism. At that time the French and Chinese reconvened Geneva peace talks; Eisenhower agreed the U.S. would participate only as an observer. After France and the Communists agreed to a partition of Vietnam, Eisenhower rejected the agreement, offering military and economic aid to southern Vietnam. Ambrose argues that Eisenhower, by not participating in the Geneva agreement, had kept the U.S out of Vietnam; nevertheless, with the formation of SEATO, he had in the end put the U.S. back into the conflict.', 'answers': {'text': ['Southeast Asia Treaty Organization'], 'answer_start': [75]}, 'question': 'What is SEATO?'}


In [5]:
class FirestoreRetriever(BaseRetriever):

    index_endpoint_name: str
    deployed_index_id: str
    embeddings: Embeddings
    collection: str
    top_k: int = 5

    def _similarity_search(self, query_emb: np.ndarray):
        """
        Perform a similarity search.

        Args:
            query_emb: Query represented as an embedding

        Returns:
            A list of documents most similar to the query
        """
        my_index_endpoint = aiplatform.MatchingEngineIndexEndpoint(
            index_endpoint_name=self.index_endpoint_name
        )

        similar_docs = my_index_endpoint.find_neighbors(
            deployed_index_id=self.deployed_index_id, 
            queries=query_emb,
            num_neighbors=self.top_k
        )

        return similar_docs
    
    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        query_embedding = self.embeddings.embed_documents([query])
        similar_docs = self._similarity_search(query_embedding)

        relevant_docs = []
        for doc in similar_docs[0]:
            doc_id = doc.id
            doc_ref = db.collection(self.collection).document(doc_id)

            doc = doc_ref.get()
            relevant_docs.append(self._firestore_doc_to_langchain_doc(doc))
        return relevant_docs
         

    async def _aget_relevant_documents(
        self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
    ) -> List[Document]:
        raise NotImplementedError()

    def _firestore_doc_to_langchain_doc(self, fs_doc) -> Document:
        lc_doc = Document(
            page_content=fs_doc.get("context")
        )
        return lc_doc

## Test the retriever

In [6]:
retriever = FirestoreRetriever(
    index_endpoint_name="1221803709063757824",
    deployed_index_id="questions",
    collection="questions",
    embeddings = embeddings,
    top_k=1
)

In [7]:
query = "when was the college of engineering in the University of Notre Dame established?"
query = "In what year did the initial degrees get handed out at Notre Dame?"

In [8]:
retriever.get_relevant_documents(query=query)

[Document(page_content='The first degrees from the college were awarded in 1849. The university was expanded with new buildings to accommodate more students and faculty. With each new president, new academic programs were offered and new buildings built to accommodate them. The original Main Building built by Sorin just after he arrived was replaced by a larger "Main Building" in 1865, which housed the university\'s administration, classrooms, and dormitories. Beginning in 1873, a library collection was started by Father Lemonnier. By 1879 it had grown to ten thousand volumes that were housed in the Main Building.', metadata={})]

## LangChain RetrievalQA

With our custom LangChain Retrieval class we can use in our LangChain agent

In [9]:
from langchain.chat_models import ChatVertexAI
from langchain.chains import RetrievalQA

In [10]:
llm = ChatVertexAI()

In [11]:
retriever = FirestoreRetriever(
    index_endpoint_name="1221803709063757824",
    deployed_index_id="questions",
    collection="questions",
    embeddings = embeddings,
    top_k=5
)

In [12]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

In [13]:
query = "In what year did the initial degrees get handed out at Notre Dame?"

In [14]:
qa.run(query)

'The first degrees from the college were awarded in 1849.'