In [24]:
# Install req. dependencies, Uncomment when running first time to make sure you install all dependencies
# %pip install python-dotenv pymongo llama-index llama-index-embeddings-together llama-index-vector-stores-mongodb

In [15]:
import os
import tqdm
import pymongo
from dotenv import load_dotenv

In [25]:
# Load env variables
load_dotenv()

ATLAS_URI = os.getenv('ATLAS_URI')
TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')

if ATLAS_URI and TOGETHER_API_KEY:
    print("Loaded MongoDB connection url and Together API Key")

Loaded MongoDB connection url and Together API Key


In [26]:
# connect to mongodb using pymongo client
client = pymongo.MongoClient(ATLAS_URI)

In [18]:
# Database constants
DB_NAME = 'KB'
COLLECTION_NAME = 'docs'
INDEX_NAME = 'idx_embedding'

In [20]:
from llama_index.embeddings.together import TogetherEmbedding
from llama_index.core import ServiceContext

embed_model = TogetherEmbedding(
    model_name = "togethercomputer/m2-bert-80M-8k-retrieval", 
    api_key = TOGETHER_API_KEY
)

service_context = ServiceContext.from_defaults(embed_model = embed_model, llm = None)

LLM is explicitly disabled. Using MockLLM.


  service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=None)


In [21]:
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.core import StorageContext

vector_store = MongoDBAtlasVectorSearch(
    mongodb_client = client,
    db_name = DB_NAME, 
    collection_name = COLLECTION_NAME,
    index_name  = 'idx_embedding',
)

storage_context = StorageContext.from_defaults(vector_store = vector_store)

In [22]:
from llama_index.core import SimpleDirectoryReader

data_dir = 'data'

docs = SimpleDirectoryReader(
    input_dir = data_dir,
    recursive = True,
).load_data()

print (f"Loaded {len(docs)} chunks from '{data_dir}'")

Loaded 2081 chunks from 'data'


In [23]:
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_documents(
    docs, 
    storage_context=storage_context,
    service_context=service_context,
)