# Forgather

A notebook for experimenting with Forgather's syntax.

In [None]:
import sys, os
modules_path = os.path.join('..', 'src')
if modules_path not in sys.path: sys.path.insert(0, modules_path)

from pprint import pp, pformat

from IPython import display as ds

from forgather import Latent
from forgather.config import ConfigEnvironment
from forgather.preprocess import PPEnvironment
from aiws.construct import generate_code

# Render code via Markdown render
def render_codeblock(language, source, header=None):
    if header is None:
        header = ""
    else:
        header = header + '\n'
    display(ds.Markdown(f"{header}```{language}\n{source}\n\n```"))

# Show common syntax definition.
with open(os.path.join('..', 'syntax.md'), 'r') as f:
    display(ds.Markdown(f.read()))

---
## Create Config Environment

A configuration environment is required to construct configurations from YAML/Jinja2 inputs; it conains the infromation needed to located Jina2 templates by name as well as defining the global variables available to templates.

```python
ConfigEnvironment(
    searchpath: Iterable[str | os.PathLike] | str | os.PathLike = tuple("."),
    pp_environment: Environment = None,
    global_vars: Dict[str, Any] = None,
):
```

- searchpath: A list of directories to search for templates in.
- pp_environment: Override the default Jinja2 environment class with another implementation.
- global_vars: Jinja2 global variables visible to all templates.

In [None]:
env = ConfigEnvironment()

## Define Input

A configuration document consists of a combination of YAML and Jinja2 syntax. Typically, a config template would be loaded from a file, but for testing we can create a template directly from a Python string.

Both the Jinja2 template and the configuration may accept variables.

In [None]:
document = """
-- set model_src = '../model_src/bits/'

main: !singleton:{{model_src}}causal_layer_stack.py:CausalLayerStack
    layer_factory: !lambda:{{model_src}}pre_ln_layer.py:PreLNLayer
        feedforward: !factory:{{model_src}}feedforward_layer.py:FeedforwardLayer
            d_model: !var "hidden_size"
            d_feedforward: !var "dim_feedforward"
            identity: feedforward_factory
        attention: !factory:torch.nn:Identity []
        norm1: &layer_norm_factory !factory:torch.nn:LayerNorm [!var "hidden_size"]
        norm2: *layer_norm_factory
    post_norm: *layer_norm_factory
    num_hidden_layers: 2
"""

# Keyword args to pass to the template
pp_kwargs = {
}
    
# Positional args to pass to factory
factory_args = [
]

# Keyword args to pass to factory
factory_kwargs = dict(
    hidden_size=64,
    dim_feedforward=256,
)

In [None]:
document = """
main: &foobar !list@foobar
    - !singleton:map
        # The generated object is equivalent to: "lambda arg0: pow(arg0, 2)"
        - !lambda:pow [ !var "arg0", !var "power" ]
        - !singleton:range [ 4 ]
"""

# Keyword args to pass to the template
pp_kwargs = {
}

# Positional args to pass to factory
factory_args = [
]

# Keyword args to pass to factory
factory_kwargs = dict(
    power=3,
)

In [None]:
document = """
.define: &foobar !dict@foobar
    foo: 1
    bar: 2
    baz: |
        She sells sea shells
        by the sea shore
main:
    - *foobar
"""

# Keyword args to pass to the template
pp_kwargs = {
}

# Positional args to pass to factory
factory_args = [
]

# Keyword args to pass to factory
factory_kwargs = dict(
)

## Convert Document to Graph

```python
class ConfigEnvironment:
... 
    def load(
        self,
        config_path: os.PathLike | str,
        /,
        **kwargs,
    ) -> Config:
...
    def load_from_string(
        self,
        config: str,
        /,
        **kwargs,
    ) -> Config:
```

- load: Load a template from a path; all paths relative to 'searchpaths' are searched for the template.
    - config_path: The relative (to searchpaths) template path.
    - kwargs: These are passed into the context of the template.
- load_from_string: As with load, but a Python string defines the template body; Note that this bypasses the template loader.
    - config: A Python string with a Jinja2 template.
    - kwargs: Passed to the template.

In [None]:
graph = env.load_from_string(document, **pp_kwargs).config
render_codeblock("python", pformat(graph), "### Node Graph")

## Convert Graph to YAML

Convert the node-graph to a YAML representation. This may not be exactly the same as it was in the source template, but should be symantically equivalent.

In [None]:
render_codeblock("yaml", Latent.to_yaml(graph))

## Convert Graph to Python

Convert the graph into the equivalent Python code.

The output is a dictionary containing the following:

- imports: A list of tuples describing the required imports.
- dynamic_imports: A list of tuples describing the required dynamic imports.
- variables: A list of tuples describing all of the variables.
- definitions: Definitions for nodes which show up in the graph more than once.
- main_body: The main-body of the generated Python code.

Note: Forgather does not use the intermediary step of converting the graph to code as part of Latent.materialize(); the normal execution path directly interprets the node-graph when constructing objects.

In [None]:
generated_code = Latent.to_py(graph)

def render_to_py(generated_code):
    # Normal imports
    # 
    if len(generated_code['imports']):
        print("imports: list[tuple[module: str, symbol_name: str]]")
        pp(generated_code['imports'])

    # The dynamic imports (imports, where a python file is specified)
    # list[tuple[module: str, symbol: str, searchpath: list[str]]]
    if len(generated_code['dynamic_imports']):
        print("\ndynamic-imports: list[tuple[module: str, symbol_name: str, searchpath: list[str]]]")
        pp(generated_code['dynamic_imports'])

    # Variable substitutions
    if len(generated_code['variables']):
        print("\nvariables: list[tuple[name: str, is_undefined: bool, default: Any]]")
        pp(generated_code['variables'])

    render_codeblock("python", generated_code['definitions'], "#### Definitions:")
    render_codeblock("python", generated_code['main_body'], "#### Main Body:")

render_to_py(generated_code)

## Convert Graph to Python with Jinja2 Template

This function takes the output from Latent.to_py(graph) and uses it to render Pyhon code using a Jinja2 template. If the template is unspecified, an implicit "built-in" template is used, which will generate appropriate import and dynamic import statements, where required.

```python
def generate_code(
    obj,
    template_name: Optional[str] = None,
    template_str: Optional[str] = None,
    searchpath: Optional[List[str | os.PathLike] | str | os.PathLike] = ".",
    env=None,  # jinja2 environment or compatible API
    output_file: Optional[str | os.PathLike] = None,
    return_value: Optional[Any] = Undefined,
    **kwargs,
) -> Any:
```

The default template accepts the following additional kwargs:

    factory_name: Optional[str]="construct", ; The name of the generated factory function.
    relaxed_kwargs: Optional[bool]=Undefined, ; if defined, **kwargs is added to the arg list
    
See 'help(generate_code)' for details.

In [None]:
generated_code = generate_code(graph, name_policy=None)
render_codeblock("python", generated_code, "### Generated Code", )

## Materialize Graph

Construct the objects directly from the graph.

In [None]:
obj = Latent.materialize(
    graph,
    *factory_args,
    **factory_kwargs,
)
render_codeblock("python", pformat(obj), "### Objects")

## Execute Generated Code

Execute the generated code, then call the generated 'construct' function to construct the objects.

Note: Lambda nodes with args are not working at present (although Latent.materialize() works)

In [None]:
exec(generated_code)
obj = construct(*factory_args, **factory_kwargs,)
render_codeblock("python", obj, "### Construct Object", )

In [None]:
head, tail = 'foo@bar'.split('@', maxsplit=1)
print(head, tail)