In [1]:
import os

from automata.core.utils import config_fpath
from automata.config.config_types import ConfigCategory
from automata.core.database.vector import JSONVectorDatabase
from automata.core.symbol.graph import SymbolGraph
from automata.core.symbol.search.rank import SymbolRankConfig, SymbolRank

In [2]:
scip_path = os.path.join(
    config_fpath(), ConfigCategory.SYMBOL.value, "index.scip"
)
symbol_graph = SymbolGraph(scip_path)

symbol_rank_config = SymbolRankConfig()
symbol_graph_subgraph = symbol_graph.get_rankable_symbol_subgraph()
symbol_rank = SymbolRank(symbol_graph_subgraph.graph, SymbolRankConfig())

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 699/699 [00:00<00:00, 5614.49it/s]


In [3]:
ranks = symbol_rank.get_ranks()

In [4]:
embedding_code_fpath = os.path.join(
    config_fpath(),
    ConfigCategory.SYMBOL.value,
    "symbol_code_embedding.json"
)

embedding_code_db = JSONVectorDatabase(embedding_code_fpath)

print("Printing out Code Embeddings for top ten ranked symbols\n")
for ir, (symbol, rank) in enumerate(ranks[0:10]):
    symbol_code_embedding = embedding_code_db.get(symbol)
    print(f"--->Symbol DotPath<---\n{symbol.dotpath}\n\nInteger Rank={ir}, Rank Score={rank:.3f}\n")
    print(f"--->Documentation Code<---\n\n{symbol_code_embedding.embedding_source}\n")
    print(f"--->Vector<---\n\n{symbol_code_embedding.vector}\n")

Printing out Code Embeddings for top ten ranked symbols

--->Symbol DotPath<---
automata.core.symbol.symbol_types.Symbol

Integer Rank=0, Rank Score=0.006

--->Documentation Code<---

@dataclass
class Symbol:
    """
    Symbol is similar to a URI, it identifies a class, method, or a local variable. SymbolInformation contains rich metadata about symbols such as the docstring.

    Symbol has a standardized string representation, which can be used interchangeably with Symbol. The syntax for Symbol is the following:

    # (<x>)+ stands for one or more repetitions of <x>
    <symbol>               ::= <scheme> ' ' <package> ' ' (<descriptor>)+ | 'local ' <local-id>
    <package>              ::= <manager> ' ' <package-name> ' ' <version>
    <scheme>               ::= any UTF-8, escape spaces with double space.
    <manager>              ::= same as above, use the placeholder '.' to indicate an empty value
    <package-name>         ::= same as above
    <version>              ::= same a

KeyError: 'Symbol Symbol(scip-python python automata 9db05b7e7ebd49f93703df45accd7e5f9d5cedb0 posixpath/join()., scip-python, Package(python automata 9db05b7e7ebd49f93703df45accd7e5f9d5cedb0), (Descriptor(posixpath, 1), Descriptor(join, 4))) not in database'

In [5]:
embedding_doc_fpath = os.path.join(
    config_fpath(),
    ConfigCategory.SYMBOL.value,
    "symbol_doc_embedding_l3.json"
)

embedding_doc_db = JSONVectorDatabase(embedding_doc_fpath)

print("Printing out Documentation Embeddings for top ten ranked symbols\n")
for ir, (symbol, rank) in enumerate(ranks[0:10]):
    symbol_doc_embedding = embedding_doc_db.get(symbol)
    print(f"--->Symbol DotPath<---\n{symbol.dotpath}\n\nInteger Rank={ir}, Rank Score={rank:.3f}\n")
    print(f"--->Documentation Code<---\n\n{symbol_doc_embedding.embedding_source}\n")
    print(f"--->Vector<---\n\n{symbol_doc_embedding.vector}\n")

Printing out Documentation Embeddings for top ten ranked symbols

--->Symbol DotPath<---
automata.core.symbol.symbol_types.Symbol

Integer Rank=0, Rank Score=0.006

--->Documentation Code<---

# Symbol

`Symbol` is similar to a URI, it identifies a class, method, or a local variable. `Symbol` has a standardized string representation, which can be used interchangeably with the URI and can be parsed using `automata.core.symbol.parser.parse_symbol`.

## Overview

`Symbol` has a standardized string representation that can be used to identify a class, method, or local variable. It also allows for metadata queries like determining if the symbol is local, meta, or a parameter. It can be initialized by passing its URI, scheme, package, and descriptors and offers various utility methods deriving more information about the symbol and related operations.

## Related Symbols

- `automata.core.symbol.parser.parse_symbol`
- `automata.tests.unit.test_symbol_parser.test_parse_symbol`

## Example

```p

KeyError: 'Symbol Symbol(scip-python python automata 9db05b7e7ebd49f93703df45accd7e5f9d5cedb0 posixpath/join()., scip-python, Package(python automata 9db05b7e7ebd49f93703df45accd7e5f9d5cedb0), (Descriptor(posixpath, 1), Descriptor(join, 4))) not in database'