In [None]:
import pandas as pd
import numpy as np


from Thorfinn.Karlsefi.kb_connect import kb
from Thorfinn.Hippo.chunkify import chunk
from Thorfinn.Models.embedder import embedder_factory
from Thorfinn.Hippo.utils import *
from Thorfinn.utils import *
from Thorfinn.Models.llm import llm_factory
from Thorfinn.Models.utils import *

In [None]:
keys = pd.read_json('config.json')

oapi_key = keys.iloc[0]['openai']
gapi_key = keys.iloc[0]['googleai']

<h3>In this notebook we will put together all the other notebooks to make a functional chatbot with a vecdb as its knowledgebase (all handled by Thorfinn!)</h3>

<h4>Below turns all of the files in the example folder into chunked blocks that will make up the knowledge base</h4>

In [None]:
metadata_df = get_metadata("example_folder_for_hippo")
chunker = chunk(embed_provider='openai')
knowledge_base = chunker.chunk_processing(df=metadata_df,to_embed='file_vision')
embedder = embedder_factory.create(api_key=oapi_key,provider='openai')
knowledge_base['embeddings'] = embedder.batch_embed(df=knowledge_base,to_embed='file_vision')
knowledge_base.columns = knowledge_base.columns.str.lower().str.replace(' ','_')
knowledge_base.head(1)

<h4>Next we startup the vecdb and create a new collection and load in the data</h4>

In [None]:
milv_connection = kb(collections=['test'])
print(milv_connection.start_local_db())

In [None]:
milv_connection.connect()

In [None]:
milv_connection.create_collection(collection_name='test')

<h4>We upload the knowledge base to the vecDB here</h4>

In [None]:
milv_connection.upload_data(collection_name="test",data=knowledge_base.to_json(orient='records'), partition='Jon_is_a_loser')

<h4>Now we vectorize the search query and do a search (we can throw the results of the search into a DataFrame for easy usage)</h4>

In [None]:
# using the embedder object we instantiated before to vectorize a search query
search_vector = embedder.embed_string(query="What is an attention mechanism?")

In [None]:
search_results = pd.DataFrame(milv_connection.search(table_name='test',search_vector=search_vector, partitions=["Jon_is_a_loser"],projected_fields=['name','file_vision']))
search_results.head(3)

<h4>Kill the milvus standup</h4>

In [None]:
milv_connection.stop_local_db(hard=True)