In [1]:
import os
import logging
from typing import Any, Set

from automata.config.base import AgentConfigName
from automata.singletons.dependency_factory import dependency_factory, DependencyFactory
from automata.singletons.py_module_loader import py_module_loader
from automata.context_providers.symbol_synchronization import (
    SymbolProviderSynchronizationContext,
)
from automata.symbol.graph import SymbolGraph
from automata.symbol_embedding.vector_databases import (
    ChromaSymbolEmbeddingVectorDatabase,
)
from automata.symbol_embedding.base import SymbolCodeEmbedding, SymbolDocEmbedding
from automata.llm.providers.openai import OpenAIEmbeddingProvider
from automata.experimental.search.rank import SymbolRank, SymbolRankConfig
from automata.symbol.symbol_utils import get_rankable_symbols

logger = logging.getLogger(__name__)


In [2]:
py_module_loader.reset()

root_path = "/Users/ocolegrove/repo_store/llama_index"
project_name = "langchain"

py_module_loader.initialize(root_path, project_name)

In [3]:
code_embedding_db = ChromaSymbolEmbeddingVectorDatabase(
    project_name,
    persist_directory=DependencyFactory.DEFAULT_CODE_EMBEDDING_FPATH,
    factory=SymbolCodeEmbedding.from_args,
)
len(code_embedding_db.get_ordered_entries())

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



5321

In [4]:
results = code_embedding_db.batch_get(keys=code_embedding_db.get_ordered_keys()[0:10])

result =  {'ids': 'langchain.agents.agent.Agent._construct_scratchpad', 'metadatas': [{'symbol_uri': 'scip-python python langchain 7e119dd77e468ac54ab3f2e27a5f1cadf29c5773 `langchain.agents.agent`/Agent#_construct_scratchpad().'}], 'embeddings': [[-0.011183842085301876, 0.015537749975919724, 0.01879274845123291, -0.011093424633145332, 0.01244271919131279, 0.013965891674160957, 0.01649755798280239, -0.002649902831763029, -0.031604088842868805, -0.014028487727046013, 0.018500633537769318, 0.03382973000407219, -0.03847575560212135, 0.01025880966335535, -0.0003701430687215179, -0.036611780524253845, -0.001377984182909131, -0.007789740338921547, 0.02847428433597088, -0.0113855404779315, -0.005035510752350092, -0.002940279198810458, 0.014299738220870495, -0.007956663146615028, 0.008624355308711529, -0.018333710730075836, 0.02138005569577217, -0.026415565982460976, -0.033106397837400436, 0.0059188115410506725, 0.004044405650347471, -0.013103456236422062, -0.0025073227006942034, -0.03597190976

In [5]:
results

[<automata.symbol_embedding.base.SymbolCodeEmbedding at 0x2a42cafd0>,
 <automata.symbol_embedding.base.SymbolCodeEmbedding at 0x2a42cafa0>,
 <automata.symbol_embedding.base.SymbolCodeEmbedding at 0x2a42caf40>,
 <automata.symbol_embedding.base.SymbolCodeEmbedding at 0x2a42cad60>,
 <automata.symbol_embedding.base.SymbolCodeEmbedding at 0x2a42cab20>,
 <automata.symbol_embedding.base.SymbolCodeEmbedding at 0x2a473d1c0>,
 <automata.symbol_embedding.base.SymbolCodeEmbedding at 0x2a473d400>,
 <automata.symbol_embedding.base.SymbolCodeEmbedding at 0x2a473d640>,
 <automata.symbol_embedding.base.SymbolCodeEmbedding at 0x2a473d880>,
 <automata.symbol_embedding.base.SymbolCodeEmbedding at 0x2a473dac0>]

In [None]:

# symbol_graph = SymbolGraph(
#     os.path.join(DependencyFactory.DEFAULT_SCIP_FPATH, f"{project_name}.scip")
# )
embedding_provider = OpenAIEmbeddingProvider()

dependency_factory.set_overrides(
    **{
#         "symbol_graph": symbol_graph,
        "code_embedding_db": code_embedding_db,
        "embedding_provider": embedding_provider,
    }
    
)

symbol_code_embedding_handler = dependency_factory.get("symbol_code_embedding_handler")


code_embeddings = symbol_code_embedding_handler.get_ordered_entries()

# query = "What is langchain?"

# print(f"Demonstrating code embedding search results for query = '{query}'")
# code_similarity_results = embedding_similarity_calculator.calculate_query_similarity_dict(code_embeddings, query)
# for i,rank in enumerate(list(code_similarity_results.items())[0:10]):
#     print(f"rank {i} = {rank[0].dotpath} with distance {rank[1]:.3f}")
    

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
# symbol_graph = SymbolGraph(
#     os.path.join(DependencyFactory.DEFAULT_SCIP_FPATH, f"{project_name}.scip")
# )
# embedding_provider = OpenAIEmbeddingProvider()

# dependency_factory.set_overrides(
#     **{
#         "symbol_graph": symbol_graph,
#         "code_embedding_db": code_embedding_db,
#         "embedding_provider": embedding_provider,
#     }
# )

# embedding_similarity_calculator = dependency_factory.get('embedding_similarity_calculator')
# symbol_code_embedding_handler = dependency_factory.get('symbol_code_embedding_handler')
