# Place for experimenting the progressive design

In [1]:
import os,sys
import yaml
import inspect
import importlib

sys.path.append('..')

import model_discovery.utils as U
from model_discovery.configs.gam_config import GAMConfig, GAMConfig_14M
from model_discovery.model.composer import GAUTree,GAUBase
# from model_discovery.evolution import BuildEvolution
from model_discovery.agents.flow.gau_utils import check_and_reformat_gau_code

ckpt_dir = os.environ['CKPT_DIR']
lib_dir = U.pjoin(ckpt_dir, 'test_composer', 'lib')
test_tree = GAUTree('TestTree', None, None, None,None,lib_dir)

prompts_dir='../model_discovery/agents/prompts/'
gab_py = U.read_file(U.pjoin(prompts_dir,'gab_template.py'))
gam_py = U.read_file(U.pjoin(prompts_dir,'gam_prompt.py'))
GAU_TEMPLATE = U.read_file(U.pjoin(prompts_dir,'gau_template.py'))
GAU_BASE=inspect.getsource(GAUBase)


  from .autonotebook import tqdm as notebook_tqdm


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\ChengJunyan1\.cache\huggingface\token
Login successful


## Parsers

In [3]:
import json

code='''
# gau.py   # DO NOT CHANGE OR REMOVE THE MAKK HERE, KEEP IT ALWAYS THE FIRST LINE #

import torch
import torch.nn as nn

from model_discovery.model.utils.modules import GAUBase # DO NOT CHANGE THIS IMPORT STATEMENT #


# YOU CAN IMPORT MORE MODULES HERE #

# YOU CAN DEFINE MORE CLASSES OR FUNCTIONS HERE #


class GAU(GAUBase): # DO NOT CHANGE THE NAME OF THIS CLASS
    """Generalized Autoregressive Block Unit
        Input:        X: (batch, seqlen, embed_dim), Z: {dict of all current intermediate variables}
        Output:       Y: (batch, seqlen, embed_dim), Z_: Optional, {dict of *new* intermediate variables to update the current Z}
        Constraints:  Causal, differentiable, parameter number, complexity, parallelizable
    """
    def __init__(self, embed_dim: int, device=None, dtype=None,**kwargs): # YOU CAN ADD MORE ARGUMENTS WITH OPTIONAL DEFAULT VALUES, BUT YOU HAVE TO HAVE embed_dim, device, dtype AS THE ARGUTMENTS #
        # argv: list of hyperparameters
        factory_kwargs = {"device": device, "dtype": dtype} # remember to pass it to all nn layers
        super().__init__(embed_dim) # DO NOT CHANGE THIS LINE #

        # COMPLETING THE CODE HERE #
        self.token_scorer: GAUBase = TokenScoringGAU(embed_dim, **factory_kwargs)
        self.dual_path: GAUBase = DualPathGAU(embed_dim, **factory_kwargs)
        self.latent_attention: GAUBase = LatentAttentionGAU(embed_dim, **factory_kwargs)


    # YOU CAN ADD MORE FUNCTIONS HERE #


    def _forward(self, X, **Z): 

        # THE CODE HERE MUST BE COMPLETED #
        # Step 1: Score tokens
        X, Z = self.token_scorer(X, **Z)
        # Step 2: Route through dual paths
        # Step 3: Apply latent attention
        Y, Z = self.latent_attention(X, **Z)
        X, Z = self.dual_path(X, **Z)

        return Y, Z
'''



In [4]:



# Example usage
code2 = """
# gau.py

import torch
import torch.nn as nn

from model_discovery.model.utils.modules import GAUBase

# Placeholder classes for future implementation
class MemoryAccessUnit(nn.Module):
    def __init__(self, embed_dim, memory_size, device=None, dtype=None):
        super().__init__(embed_dim)

    def _forward(self, X, **Z):
        return X, {}

class DownsamplingUnit(nn.Module):
    def __init__(self, embed_dim, downsample_factor, device=None, dtype=None):
        super().__init__(embed_dim)

    def _forward(self, X, **Z):
        return X, {}

class XAEU(GAUBase):  # This class will be renamed to the unit_name
    def __init__(self, embed_dim: int, device=None, dtype=None):
        super().__init__(embed_dim)
        self.unit: GAUBase = MemoryAccessUnit(embed_dim=embed_dim, device=device)

    def _forward(self, X, **Z):
        return X, Z
"""

unit_name = "XAU"  # Provide the unit_name to rename GAU class
reformatted_code, children_units, new_args, called, errors, warnings = check_and_reformat_gau_code(code, unit_name)
print("Reformatted Code:\n" + reformatted_code)
print("Errors:\n", errors)
print("Warnings:\n", warnings)
print("Children Units:\n", children_units)
print("New Arguments:\n", new_args)
print("Called Children:\n", called)



test_tree.add_unit(
    unit_name,reformatted_code,new_args,None,called,None,None,None,children_units,None
)
test_tree.root=test_tree.units['XAU']

Reformatted Code:
import torch
import torch.nn as nn
from model_discovery.model.utils.modules import GAUBase


class XAU(GAUBase):
    """Generalized Autoregressive Block Unit
        Input:        X: (batch, seqlen, embed_dim), Z: {dict of all current intermediate variables}
        Output:       Y: (batch, seqlen, embed_dim), Z_: Optional, {dict of *new* intermediate variables to update the current Z}
        Constraints:  Causal, differentiable, parameter number, complexity, parallelizable
    """

    def __init__(self, embed_dim: int, device=None, dtype=None, **kwargs):
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__(embed_dim)
        self.token_scorer: GAUBase = TokenScoringGAU(embed_dim=embed_dim,
            device=device, dtype=dtype, **kwargs)
        self.dual_path: GAUBase = DualPathGAU(embed_dim=embed_dim, device=
            device, dtype=dtype, **kwargs)
        self.latent_attention: GAUBase = LatentAttentionGAU(embed_dim=
         

In [5]:
print(reformatted_code)

import torch
import torch.nn as nn
from model_discovery.model.utils.modules import GAUBase


class XAU(GAUBase):
    """Generalized Autoregressive Block Unit
        Input:        X: (batch, seqlen, embed_dim), Z: {dict of all current intermediate variables}
        Output:       Y: (batch, seqlen, embed_dim), Z_: Optional, {dict of *new* intermediate variables to update the current Z}
        Constraints:  Causal, differentiable, parameter number, complexity, parallelizable
    """

    def __init__(self, embed_dim: int, device=None, dtype=None, **kwargs):
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__(embed_dim)
        self.token_scorer: GAUBase = TokenScoringGAU(embed_dim=embed_dim,
            device=device, dtype=dtype, **kwargs)
        self.dual_path: GAUBase = DualPathGAU(embed_dim=embed_dim, device=
            device, dtype=dtype, **kwargs)
        self.latent_attention: GAUBase = LatentAttentionGAU(embed_dim=
            embed_dim, devi

In [6]:
gab_template='''
# gab.py    # DO NOT CHANGE OR REMOVE THE MAKK HERE, KEEP IT ALWAYS THE FIRST LINE #

import torch
import torch.nn as nn

from model_discovery.model.utils.modules import GABBase # DO NOT CHANGE THIS IMPORT STATEMENT #


class GAB(GABBase):
    def __init__(self,embed_dim: int, block_loc: tuple, device=None,dtype=None,**kwargs): # YOU CAN ADD MORE ARGUMENTS, BUT YOU HAVE TO HAVE embed_dim, device, dtype AS THE ARGUTMENTS #
        factory_kwargs = {{"device": device, "dtype": dtype}} # remember to pass it to nn layers
        super().__init__(embed_dim, block_loc) # DO NOT CHANGE THIS LINE #
        self.root = {ROOT_UNIT_NAME}(embed_dim, embed_dim=embed_dim, device=device, dtype=dtype, **kwargs)

    def _forward(self, X, *Z): 
        X, Z = self.root(X, **Z)
        return X, Z
'''

In [7]:
import ast
import astor
from typing import List

def replace_from_second(text, old, new):
    first_part, remaining = text.split(old, 1)
    remaining = remaining.replace(old, new)
    return first_part + old + remaining

class GABComposer:
    
    def generate_gab_code(self,tree):
        root_node = tree.root
        generated_code = []
        
        # Recursively generate code for the root and its children
        self.generate_node_code(root_node.name, generated_code, tree.units)
        
        # Combine all generated code into a single Python file content
        gau_code = "\n".join(generated_code)

        gathered_args={}
        for unit in tree.units.values():
            gathered_args.update(unit.args)
        gab_code=gab_template.format(ROOT_UNIT_NAME=root_node.name)

        cfg_code=f'gab_config = {json.dumps(gathered_args)}'

        compoesed_code = f'{gab_code}\n\n{gau_code}\n\n{cfg_code}'

        compoesed_code=replace_from_second(compoesed_code,'import torch\n','')
        compoesed_code=replace_from_second(compoesed_code,'import torch.nn as nn\n','')
        compoesed_code=replace_from_second(compoesed_code,'from model_discovery.model.utils.modules import GAUBase\n','')

        return compoesed_code


    # Recursive function to generate code for a node and its children
    def generate_node_code(self, unit_name, generated_code: List[str], units):
        # Check if the node exists in units
        if unit_name not in units:
            # If the node does not exist in units, create a placeholder
            generated_code.append(self.create_placeholder_class(unit_name))
        else:
            node = units[unit_name]
            generated_code.append(node.code)
            
            # Recursively generate code for children
            children_units=set()
            for child_name, child_unit_name in node.children.items():
                children_units.add(child_unit_name)
            for child_unit in children_units:
                self.generate_node_code(child_unit, generated_code, units)

    # Function to create a placeholder class for a GAUNode
    def create_placeholder_class(self, unit_name) -> str:
        class_template = f"""
class {unit_name}(GAUBase): 
    def __init__(self, embed_dim: int, device=None, dtype=None, **kwargs): 
        factory_kwargs = {{"device": device, "dtype": dtype}} 
        super().__init__(embed_dim) 
        
    def _forward(self, X, **Z): 
        return X
"""
        return class_template

    # Function to convert the generated code to AST using ast and astor
    def convert_code_to_ast(self, code: str):
        try:
            return ast.parse(code)
        except SyntaxError as e:
            print(f"Syntax error in code: {code}")
            raise e

    # Function to convert AST back to Python code using astor
    def convert_ast_to_code(self, ast_tree: ast.AST) -> str:
        return astor.to_source(ast_tree)
    

# Example usage
generated_code = GABComposer().generate_gab_code(test_tree)
print(generated_code)  # This will print the final Python code for the entire GAUTree



# gab.py    # DO NOT CHANGE OR REMOVE THE MAKK HERE, KEEP IT ALWAYS THE FIRST LINE #

import torch
import torch.nn as nn

from model_discovery.model.utils.modules import GABBase # DO NOT CHANGE THIS IMPORT STATEMENT #


class GAB(GABBase):
    def __init__(self,embed_dim: int, block_loc: tuple, device=None,dtype=None,**kwargs): # YOU CAN ADD MORE ARGUMENTS, BUT YOU HAVE TO HAVE embed_dim, device, dtype AS THE ARGUTMENTS #
        factory_kwargs = {"device": device, "dtype": dtype} # remember to pass it to nn layers
        super().__init__(embed_dim, block_loc) # DO NOT CHANGE THIS LINE #
        self.root = XAU(embed_dim, embed_dim=embed_dim, device=device, dtype=dtype, **kwargs)

    def _forward(self, X, *Z): 
        X, Z = self.root(X, **Z)
        return X, Z


from model_discovery.model.utils.modules import GAUBase


class XAU(GAUBase):
    """Generalized Autoregressive Block Unit
        Input:        X: (batch, seqlen, embed_dim), Z: {dict of all current intermediate variable

In [56]:
print(code)

# gau.py

import torch
import torch.nn as nn

from model_discovery.model.utils.modules import GAUBase

# Placeholder imports for future GAUs
# from gau import RandomizedAttentionUnit, HierarchicalCompositionUnit

class HRAB(GAUBase):
 """Hierarchical Randomized Attention Block Unit
 Input: X: (batch, seqlen, embed_dim), Z: {dict of all current intermediate variables}
 Output: Y: (batch, seqlen, embed_dim), Z_: Optional, {dict of *new* intermediate variables to update the current Z}
 Constraints: Causal, differentiable, parameter number, complexity, parallelizable
 """
 def __init__(self, embed_dim: int, device=None, dtype=None, **kwargs):
 factory_kwargs = {"device": device, "dtype": dtype}
 super().__init__(embed_dim)
 
 # Initialize the Randomized Attention Unit
 self.randomized_attention: GAUBase = RandomizedAttentionUnit(embed_dim, **factory_kwargs, **kwargs)
 
 # Initialize the Hierarchical Composition Unit
 self.hierarchical_composition: GAUBase = HierarchicalCompositionUnit(embe