## Importing Libraries

In [1]:
from pymongo import MongoClient
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import JSONLoader
from langchain_mongodb import MongoDBAtlasVectorSearch


True

## Creating MongoDB Instance

In [2]:
client = MongoClient(os.getenv('MONGODB_URI'), tls=True,
    tlsAllowInvalidCertificates=True)
db = client.get_database(os.getenv('DB_NAME'))
c_name = os.getenv('COLLECTION_NAME')
collection = db[c_name]

In [3]:
collection.delete_many({})

DeleteResult({'n': 6, 'electionId': ObjectId('7fffffff000000000000016f'), 'opTime': {'ts': Timestamp(1729456672, 6), 't': 367}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1729456672, 11), 'signature': {'hash': b"T$\xdc\xa4I\x94[\x83\xa9\x9dU\xa4\xce\xcc'\xf9\xbd\xf9b\xf1", 'keyId': 7396507345313857564}}, 'operationTime': Timestamp(1729456672, 6)}, acknowledged=True)

## Metadata Function

In [None]:
def metadata_func(record: dict, metadata: dict) -> dict:
    def column_retriever(ls):
        # cname = []
        # dtype = []
        cdesc = {}
        encode_values = {}
        for i in range(len(ls)):
            cdesc[ls[i].get("Column_Description")] = ls[i].get("Column_Name")
            if ls[i].get("encoded_values",-10000)!=-10000:
              encode_values[ls[i].get("Column_Name")] = ls[i].get("encoded_values")

        return encode_values, cdesc
    encode_values, cdesc = column_retriever(record.get("Columns"))
    metadata["Table_Name"] = record.get("Table_Name")
    metadata["Table_Description"] = record.get("Table_Description")
    metadata["Encoded_Values"] = str(encode_values)
    metadata["Column_Description"] = str(cdesc)

    return metadata

loader = JSONLoader(
    file_path="tableinfo_new.json",
    jq_schema=".[].Table_Info[]",
    content_key="Table_Description",
    metadata_func=metadata_func,
)
data = loader.load()

## Creating Vector Embeddings

In [6]:
openai_api_key = os.getenv("OPENAI_API_KEY")
embedding_function = OpenAIEmbeddings(
    openai_api_key=openai_api_key, model="text-embedding-ada-002")

## Data Injection using Vector Embedddings in MongoDB

In [7]:
vector_search = MongoDBAtlasVectorSearch(collection=collection, embedding=embedding_function)
# Add documents to the vector index
vector_search.add_documents(
    documents=data,
    index_name=os.getenv("ATLAS_VECTOR_SEARCH_INDEX_NAME")
)

[ObjectId('67156a68c8305c5dd94ea40e'),
 ObjectId('67156a68c8305c5dd94ea40f'),
 ObjectId('67156a68c8305c5dd94ea410'),
 ObjectId('67156a68c8305c5dd94ea411'),
 ObjectId('67156a68c8305c5dd94ea412'),
 ObjectId('67156a68c8305c5dd94ea413')]