In [1]:
from llama_index.vector_stores.chroma import ChromaVectorStore

from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import VectorStoreIndex
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.vector_stores.types import VectorStoreQuery


In [4]:
import os
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
os.chdir(PROJECT_ROOT)

In [5]:
from src.embedding_client import RemoteEmbedding
EMBEDDING_SERVER_PORT = 8020
embed_model = RemoteEmbedding(f"http://localhost:{EMBEDDING_SERVER_PORT}")

In [6]:
import chromadb
CHROMA_DB_PORT = 8010
def get_chroma_db_client():
    client = chromadb.HttpClient(
        host="localhost",
        port=int(CHROMA_DB_PORT))
    return client

In [9]:
client = get_chroma_db_client()

In [10]:
qna_collection = client.get_collection('qna')
vec_store_qna = ChromaVectorStore(chroma_collection = qna_collection)

context_collection = client.get_collection('context')
vec_store_context = ChromaVectorStore(chroma_collection = context_collection)

In [25]:
help(VectorStoreIndex.from_vector_store)

Help on method from_vector_store in module llama_index.core.indices.vector_store.base:

from_vector_store(vector_store: llama_index.core.vector_stores.types.BasePydanticVectorStore, embed_model: Union[llama_index.core.base.embeddings.base.BaseEmbedding, ForwardRef('LCEmbeddings'), str, NoneType] = None, **kwargs: Any) -> 'VectorStoreIndex' method of abc.ABCMeta instance



In [15]:
qna_index = VectorStoreIndex.from_vector_store(vec_store_qna, embed_model=embed_model)
context_index = VectorStoreIndex.from_vector_store(vec_store_context, embed_model=embed_model)

In [17]:
qna_retriever = VectorIndexRetriever(
    index=qna_index, 
    similarity_top_k=3
)

context_retriever = VectorIndexRetriever(
    index=context_index, 
    similarity_top_k=3
)

retriever_list = [qna_retriever, context_retriever]

In [19]:
class SQUAD_DATA_RETRIEVER(BaseRetriever):
    def __init__(self, retriever_list):
        self.retriever_list = retriever_list

    def _retrieve(self, query):
        result = []
        for retriever in self.retriever_list:
            nodes = retriever.retrieve(query)
            result+=nodes
        return result

In [21]:
squad_retriever = SQUAD_DATA_RETRIEVER(retriever_list)

In [22]:
query = 'Where did beyonce grow up?'
nodes = squad_retriever.retrieve(query)

In [24]:
nodes[0]

NodeWithScore(node=TextNode(id_='56bf6b0f3aeaaa14008c9601', embedding=None, metadata={'context_hash': '5566f9c0998385b8a8a2c94aa64aa980', 'database': 'chatbot_ui_v3', 'collection': 'qna', 'answer': 'Houston, Texas', 'title': 'Beyoncé'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='In what city and state did Beyonce  grow up? ', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.8588517725815735)

In [26]:
from src.retriever import SQUAD_DATA_RETRIEVER

In [27]:
retriever = SQUAD_DATA_RETRIEVER(client, ['qna', 'context'], embed_model, 3 )

In [28]:
result = retriever.retrieve(query)

In [29]:
result

[NodeWithScore(node=TextNode(id_='56bf6b0f3aeaaa14008c9601', embedding=None, metadata={'database': 'chatbot_ui_v3', 'collection': 'qna', 'title': 'Beyoncé', 'answer': 'Houston, Texas', 'context_hash': '5566f9c0998385b8a8a2c94aa64aa980'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='In what city and state did Beyonce  grow up? ', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.8588517725815735),
 NodeWithScore(node=TextNode(id_='56be85543aeaaa14008c9065', embedding=None, metadata={'answer': 'singing and dancing', 'title': 'Beyoncé', 'collection': 'qna', 'context_hash': '5566f9c0998385b8a8a2c94aa64aa980', 'database': 'chatbot_ui_v3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='What areas d

In [30]:
filtered_nodes = [n for n in result if n.score and n.score >= 0.5]


In [31]:
filtered_nodes

[NodeWithScore(node=TextNode(id_='56bf6b0f3aeaaa14008c9601', embedding=None, metadata={'database': 'chatbot_ui_v3', 'collection': 'qna', 'title': 'Beyoncé', 'answer': 'Houston, Texas', 'context_hash': '5566f9c0998385b8a8a2c94aa64aa980'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='In what city and state did Beyonce  grow up? ', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.8588517725815735),
 NodeWithScore(node=TextNode(id_='56be85543aeaaa14008c9065', embedding=None, metadata={'answer': 'singing and dancing', 'title': 'Beyoncé', 'collection': 'qna', 'context_hash': '5566f9c0998385b8a8a2c94aa64aa980', 'database': 'chatbot_ui_v3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='What areas d

In [33]:
for node in filtered_nodes:
    print(node.metadata)
    print(5*'-')

{'database': 'chatbot_ui_v3', 'collection': 'qna', 'title': 'Beyoncé', 'answer': 'Houston, Texas', 'context_hash': '5566f9c0998385b8a8a2c94aa64aa980'}
-----
{'answer': 'singing and dancing', 'title': 'Beyoncé', 'collection': 'qna', 'context_hash': '5566f9c0998385b8a8a2c94aa64aa980', 'database': 'chatbot_ui_v3'}
-----
{'database': 'chatbot_ui_v3', 'answer': 'late 1990s', 'title': 'Beyoncé', 'collection': 'qna', 'context_hash': '5566f9c0998385b8a8a2c94aa64aa980'}
-----


In [34]:
for node in filtered_nodes:
    print(node.__dict__)
    print(5*'-')

{'node': TextNode(id_='56bf6b0f3aeaaa14008c9601', embedding=None, metadata={'database': 'chatbot_ui_v3', 'collection': 'qna', 'title': 'Beyoncé', 'answer': 'Houston, Texas', 'context_hash': '5566f9c0998385b8a8a2c94aa64aa980'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='In what city and state did Beyonce  grow up? ', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), 'score': 0.8588517725815735}
-----
{'node': TextNode(id_='56be85543aeaaa14008c9065', embedding=None, metadata={'answer': 'singing and dancing', 'title': 'Beyoncé', 'collection': 'qna', 'context_hash': '5566f9c0998385b8a8a2c94aa64aa980', 'database': 'chatbot_ui_v3'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='What areas did Beyonce com

In [37]:
for i,j in node.__dict__.items():
    print(i)
    print(5*'-')
    print(j)
    print(5*'=')

node
-----
Node ID: 56d43c5f2ccc5a1400d830a9
Text: When did Beyoncé rise to fame?
=====
score
-----
0.6890149087215783
=====


In [38]:
from pymongo import MongoClient
def get_mongo_db_client():
    client = MongoClient("mongodb://localhost:27017/")
    assert client.admin.command("ping") == {'ok': 1.0}
    return client

mongo_db_client = get_mongo_db_client()

In [48]:
def get_context(context_hash, collection):
    for i in collection.find(dict(context_hash=context_hash)):
        return i['context']
    

In [50]:
for node in filtered_nodes:
    if node.metadata.get('collection') == 'qna':
        database = node.metadata.get('database')
        collection = mongo_db_client[database]['context']
        context_hash = node.metadata.get('context_hash')
        context = get_context(context_hash, collection)
        print(context)
        print(5*'=')

Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy".
=====
Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead sing

In [51]:
context_to_lookup = {i.metadata['context_hash'] for i in filtered_nodes if i.metadata.get('collection') == 'qna'}

In [52]:
context_to_lookup

{'5566f9c0998385b8a8a2c94aa64aa980'}

In [45]:
for i in mongo_db_client[database][collection].find(dict(context_hash=context_hash)):
    print(i)

{'_id': ObjectId('6867ede60402e613c63ab4e7'), 'context_hash': '5566f9c0998385b8a8a2c94aa64aa980', 'context': 'Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny\'s Child. Managed by her father, Mathew Knowles, the group became one of the world\'s best-selling girl groups of all time. Their hiatus saw the release of Beyoncé\'s debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy".', 'title': 'Beyoncé'}
