so the goal here is to figure out how to dynamically import functions and classes from different models within the sandbox

In [1]:
import importlib
def import_from_nested_path(folders, file, items):
    try:
        # Construct the module path from a list of folders
        module_path = ".".join(folders) + "." + file
        
        # Dynamically import the module
        module = importlib.import_module(module_path)
        
        # Extract specific items (functions, classes, etc.)
        imported_items = {}
        for item in items:
            if hasattr(module, item):
                imported_items[item] = getattr(module, item)
            else:
                print(f"{item} is not available in {module_path}")
        return imported_items
                
    except ImportError as e:
        print(f"Failed to import module: {e}")

In [2]:
imported_objects = import_from_nested_path(['models', 'customGPT'], 'inference', ['generate'])
generate = imported_objects.get('generate')

In [3]:
imported_objects = import_from_nested_path(['models', 'customGPT'], 'tools', ['load_model'])
load_model = imported_objects.get('load_model')

imported_objects = import_from_nested_path(['models', 'customGPT', 'tokenizers', 'bpe'], 'tokenizer', ['get_tokenizer'])
get_tokenizer = imported_objects.get('get_tokenizer')

In [2]:
import os

def run_in_directory(func, path, *args, **kwargs):
    original_dir = os.getcwd()  # Save the current directory
    os.chdir(path)  # Change to the target directory
    try:
        result = func(*args, **kwargs)  # Execute the function
    finally:
        os.chdir(original_dir)  # Change back to the original directory
    return result

# Example usage
def example_function():
    print("Current Working Directory:", os.getcwd())

# Calling the function with a custom directory
run_in_directory(example_function, "models/customGPT/")



Current Working Directory: /Users/tunadorable/local-repos/micro-GPT-sandbox/models/customGPT


In [12]:
import torch
import json
def load_model(
    name: str, # the filepath to the model. ex: 'models/customGPT/trained/customGPT_0.5m_tall_and_skinny'
    tokenizer_name: str, # the name of the folder with the tokenizer in it. 'bpe' in customGPT
    device: str = 'cuda' if torch.cuda.is_available() else 'cpu',
):
    path_parts = name.split('/')

    # grabbing the config and Model class from the correct directories
    def internal():
        from modules.model import Model
        from config import ModelConfig
        return Model, ModelConfig
    Model, ModelConfig = run_in_directory(internal, os.path.join(path_parts[0], path_parts[1]))
    
    # grabbing the get_tokenizer function from the correct directory
    imported_objects = import_from_nested_path(
        [path_parts[0], path_parts[1], 'tokenizers', tokenizer_name], 
        'tokenizer', 
        ['get_tokenizer']
    )
    get_tokenizer = imported_objects.get('get_tokenizer')

    # Deserialize the JSON file back to a dictionary
    with open(f'{name}/model_config.json', 'r') as f:
        config_dict = json.load(f)
    
    # Convert the dictionary back to a Config object
    cfg = ModelConfig(**config_dict)
    cfg.device = device
    
    # defining the tokenizer
    vocab_size = cfg.vocab_len - 3
    tokenizer = run_in_directory(get_tokenizer, os.path.join(path_parts[0], path_parts[1]), vocab_size)
    
    # Initialize a blank model
    model = Model(cfg).to(cfg.device) 
    
    # Load the saved model parameters
    model_path = os.path.join(path_parts[2], path_parts[3], 'model.pth')
    run_in_directory(
        lambda: model.load_state_dict(
            torch.load(
                model_path, 
                map_location=cfg.device
            )
        ), 
        os.path.join(path_parts[0], path_parts[1])
    )
    
    print(f'{sum(p.numel() for p in model.parameters())/1e3}K parameters\n{cfg}\n{model}')

    return model, tokenizer, cfg

In [13]:
models_to_compare = [
    'models/customGPT/trained/customGPT_0.5m_tall_and_skinny',
    'models/customGPT/trained/customGPT_0.5m_5foot11_and_skinnyfat',
    'models/customGPT/trained/customGPT_0.5m_short_and_thick',
    'models/fractal-head-attention/trained/FHA_GPT_0.3m_2024-05-07|13-05-29',
    'models/fractal-head-attention/trained/FHA_GPT_0.8m_2024-05-05|10-54-35'
]
tokenizer_list = ['bpe']*len(models_to_compare)

In [14]:
for model_name, tokenizer in zip(models_to_compare, tokenizer_list):
    model, _, _ = load_model(model_name, tokenizer)

502.592K parameters
ModelConfig(dim=64, vocab_len=2051, device='cpu', num_layers=10, second_resid_norm=False, mlp_hidden_mult=1, mlp_bias=False, mlp_nonlinearity='SiLU', mlp_gated=True, num_q_heads=10, num_kv_heads=2, head_dim=16, theta=10000, max_seq_len=512, scale_first_resid=True, norm_type='RMSNorm', norm_affine=True, norm_bias=True, eps=1e-06, max_batch_size=1)
Model(
  (token_embedder): Embedding(2051, 64)
  (layers): ModuleList(
    (0-9): 10 x Layer(
      (pre_attn_norm): Norm()
      (attn): MQA(
        (Wq): Linear(in_features=64, out_features=160, bias=False)
        (Wk): Linear(in_features=64, out_features=32, bias=False)
        (Wv): Linear(in_features=64, out_features=32, bias=False)
        (Wo): Linear(in_features=160, out_features=64, bias=False)
      )
      (pre_mlp_norm): Norm()
      (mlp): MLP(
        (Wgate): Linear(in_features=64, out_features=64, bias=False)
        (Wup): Linear(in_features=64, out_features=64, bias=False)
        (Wdown): Linear(in_feat

In [5]:
models_to_compare = [
    'models/customGPT/trained/customGPT_0.5m_tall_and_skinny',
    'models/customGPT/trained/customGPT_0.5m_5foot11_and_skinnyfat',
    'models/customGPT/trained/customGPT_0.5m_short_and_thick',
    'models/fractal-head-attention/trained/FHA_GPT_0.3m_2024-05-07|13-05-29',
    'models/fractal-head-attention/trained/FHA_GPT_0.8m_2024-05-05|10-54-35'
]

for model_name in models_to_compare:
    path_parts = model_name.split('/')

    imported_objects = import_from_nested_path(['models', 'customGPT'], 'tools', ['load_model'])
    load_model = imported_objects.get('load_model')

    print('='*20, path_parts[3], '='*20)
    model, _, _ = run_in_directory(load_model, os.path.join(path_parts[0], path_parts[1]), path_parts[3])

502.592K parameters
ModelConfig(dim=64, vocab_len=2051, device='cpu', num_layers=10, second_resid_norm=False, mlp_hidden_mult=1, mlp_bias=False, mlp_nonlinearity='SiLU', mlp_gated=True, num_q_heads=10, num_kv_heads=2, head_dim=16, theta=10000, max_seq_len=512, scale_first_resid=True, norm_type='RMSNorm', norm_affine=True, norm_bias=True, eps=1e-06, max_batch_size=1)
Model(
  (token_embedder): Embedding(2051, 64)
  (layers): ModuleList(
    (0-9): 10 x Layer(
      (pre_attn_norm): Norm()
      (attn): MQA(
        (Wq): Linear(in_features=64, out_features=160, bias=False)
        (Wk): Linear(in_features=64, out_features=32, bias=False)
        (Wv): Linear(in_features=64, out_features=32, bias=False)
        (Wo): Linear(in_features=160, out_features=64, bias=False)
      )
      (pre_mlp_norm): Norm()
      (mlp): MLP(
        (Wgate): Linear(in_features=64, out_features=64, bias=False)
        (Wup): Linear(in_features=64, out_features=64, bias=False)
        (Wdown): Linear(in_feat