# Setup

## Imports

In [15]:
# Import importlib to reload modules and sys and os to add the path for other imports
import importlib
import sys
import os
import torch
from accelerate import Accelerator

# Append the parent directory to the path to import the necessary modules
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# Import the utilities and the dataloader
from utils import trainutil, inferutil, setuputil

# Now reload the modules to ensure they are up-to-date
importlib.reload(setuputil)
importlib.reload(trainutil)
importlib.reload(inferutil)
#importlib.reload(GeluAvgEmbed)

# Import the funcs needed from utils
from utils.setuputil import setup_bert_config, display_bert_config
from utils.trainutil import train_model
from utils.inferutil import infer_one, infer_full

# Import the model class
#from classes.GeluAvgEmbed import GeluAvgEmbed

## Configuration

In [43]:
# Define the input config file
setup_config = {
    # Environment and Model Info
    "env": "gcp",                
    "approach": "bert",         
    "model_name": "BertCustomAdd",     
    "model_base": "prajjwal1/bert-tiny",  
    
    # System Configuration
    "device": "cuda:0",
    "threads": 12,
    "seed": 42,
    
    # Data Configuration
    "data_dir": "../../data/farzan",
    "data_ds": "manual",
    
    # Model Parameters
    "rows": 100,
    "cols": 100,
    "tokens": 32,
    
    # Training Parameters
    "batch": 40,
    "lr": 5e-3,
    "mu": 0.25,
    "epochs": 20,
    "patience": 2,
    "save_int": 10,
    "save_dir": '../models/'
}

new_config = {
    # Environment and Model Info
    "env": "gcp",                
    "approach": "simple",         
    "model_name": "SimpleGeluEmbedAdd",
    "model_base": "glove50", 
    
    # System Configuration
    "device": "cuda:0",
    "threads": 12,
    "seed": 42,
    
    # Data Configuration
    "data_dir": "../../data/farzan",
    "data_ds": "manual",
    
    # Model Parameters
    "rows": 100,
    "cols": 100,
    "tokens": 32,
    
    # Vocabulary Parameters
    "vocab_size": 150000,
    "vocab_space": True,
    "vocab_case": "both",
    
    # Training Parameters
    "batch": 40,
    "lr": 5e-3,
    "mu": 0.25,
    "epochs": 20,
    "patience": 2,
    "save_int": 10,
    "save_dir": '../models/'
}

# Define the input configuration for the RNN model
rnn_config = {
    # Environment and Model Info
    "env": "gcp",                
    "approach": "rnn",         
    "model_name": "Rnn2dSquare",
    
    # System Configuration
    "device": "cuda:0",
    "threads": 12,
    "seed": 42,
    
    # Data Configuration
    "data_dir": "../../data/farzan",
    "data_ds": "manual",
    
    # Model Parameters
    "rows": 100,
    "cols": 100,
    "tokens": 32,
    
    # RNN-Specific Parameters
    "hidden_dim": 100,         # Dimension of the hidden state vector
    "rnn_layers": 2,           # Number of RNN layers
    "dropout_rate": 0.05,      # Dropout rate for regularization
    "nonlinearity": "relu",    # Nonlinearity for the RNN (e.g., relu, tanh)
    
    # Vocabulary Parameters
    "vocab_size": 150000,
    "vocab_space": True,
    "vocab_case": "both",
    
    # Training Parameters
    "batch": 10,
    "lr": 7e-5,
    "mu": 0.25,
    "epochs": 20,
    "patience": 3,
    "save_int": 5,
    "save_dir": '../models/'
}

In [44]:
# General imports
import os
import torch
import importlib
import copy
import json
from transformers import AutoTokenizer

# Reload the selfutil module and import required functions
from utils import selfutil
from classes import SpreadsheetDataLoader, BertLoader
importlib.reload(selfutil)
importlib.reload(SpreadsheetDataLoader)
importlib.reload(BertLoader)
from utils.selfutil import set_seed, get_vocab, create_embeddings, get_fileList
from classes.SpreadsheetDataLoader import SpreadsheetDataLoader
from classes.BertLoader import BertLoader



In [47]:
def h_env(setup_config):
    """Helper function to validate and setup environment-related configurations."""
    config = {}
    
    ######## ENVIRONMENT ########
    valid_envs = ["gcp", "bvm", "local", "colab"]
    valid_approaches = ["simple", "saffu", "bert", "rnn"]
    
    if setup_config["env"] not in valid_envs:
        raise ValueError(f"ERR: env must be one of {valid_envs}")
    if setup_config["approach"] not in valid_approaches:
        raise ValueError(f"ERR: approach must be one of {valid_approaches}")
        
    config.update({
        "env": setup_config["env"],
        "approach": setup_config["approach"]
    })
    
    ######## DEVICE ########
    device_config = setup_config["device"]
    if (device_config.startswith("cuda") and torch.cuda.is_available() 
        and int(device_config.split(":")[1]) < torch.cuda.device_count()):
        config["DEVICE"] = torch.device(device_config)
    elif device_config.startswith("mps") and hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
        config["DEVICE"] = torch.device("mps")
    else:
        config["DEVICE"] = torch.device("cpu")
    
    ######## THREADS ########
    if not isinstance(setup_config["threads"], (int, float)):
        raise ValueError("ERR: threads must be a number")
    
    threads = int(setup_config["threads"])
    if (os.cpu_count() - threads) < 4:
        raise ValueError(f"ERR: Must leave at least 4 threads free (requested {threads})")
    config["THREADS"] = max(1, threads)
    
    ######## SEED ########
    config["seed"] = setup_config["seed"]
    set_seed(config["seed"])
    
    return config

def h_model(config, setup_config):
    """Helper function to setup model-related configurations."""
    
    ######## MODEL ########
    # Ensure model_name is always provided
    if "model_name" not in setup_config:
        raise ValueError("ERR: model_name must be provided for all approaches")
    
    # Set model_base based on approach, overriding any provided value if needed
    if config["approach"] in ["simple", "rnn"]:
        config["model_base"] = "glove50"  # Force glove50 for simple/rnn
    elif config["approach"] == "saffu":
        config["model_base"] = "saffu"    # Force saffu
    elif config["approach"] == "bert":
        # Use provided model_base or default to bert-tiny
        config["model_base"] = setup_config.get("model_base", "prajjwal1/bert-tiny")
    
    # Set model_name as provided
    config["model_name"] = setup_config["model_name"]
    
    ######## CONTEXT PARAMS ########
    config.update({
        "rows": setup_config["rows"],
        "cols": setup_config["cols"],
        "tokens": setup_config["tokens"]
    })
    
    return config

def h_data(config, setup_config):
    """Helper function to setup data-related configurations."""
    ######## DATA DIR & DATASET ########
    if not os.path.isdir(setup_config["data_dir"]):
        raise ValueError(f"ERR: data_dir '{setup_config['data_dir']}' is not a valid path")
    
    config.update({
        "data_ds": setup_config["data_ds"],
        "data_dir": setup_config["data_dir"]
    })
    
    ######## DATA DIRECTORIES ########
    # Create directory paths
    train_dir = os.path.join(config["data_dir"], f"{setup_config['data_ds']}_train")
    val_dir = os.path.join(config["data_dir"], f"{setup_config['data_ds']}_val")
    test_dir = os.path.join(config["data_dir"], f"{setup_config['data_ds']}_test")
    
    # Validate directories exist
    missing_dirs = [
        dir_name for dir_name, path in 
        {"train": train_dir, "val": val_dir, "test": test_dir}.items() 
        if not os.path.isdir(path)
    ]
    if missing_dirs:
        raise ValueError(f"ERR: Missing dataset directories: {', '.join(missing_dirs)}")
    
    # Update config after validation
    config.update({
        "train_dir": train_dir,
        "val_dir": val_dir,
        "test_dir": test_dir
    })
    
    return config

def h_vocab(config, setup_config):
    """Helper function to setup vocabulary only for simple/rnn approaches."""
    if config["approach"] not in ["simple", "rnn"]:
        return config
        
    ######## VOCAB ########
    # Validate vocab parameters
    if not isinstance(setup_config["vocab_size"], int) or not 4 <= setup_config["vocab_size"] <= 2000000:
        raise ValueError(f"ERR: vocab_size '{setup_config['vocab_size']}' must be an integer between 4 and 2,000,000")
    
    vocab_space = setup_config.get("vocab_space", True)
    if not isinstance(vocab_space, bool):
        vocab_space = True
        
    vocab_case = setup_config.get("vocab_case", "lower")
    if vocab_case not in ["both", "upper", "lower"]:
        vocab_case = "lower"
    
    # Generate vocab object using train_dir
    config["vocab"] = get_vocab(
        config["train_dir"],
        setup_config["vocab_size"],
        space=vocab_space,
        case=vocab_case,
        threads=config["THREADS"]
    )
    
    ######## WVS ########
    config["wvs"] = create_embeddings(config["vocab"])
    config.update({
        "vocab_size": config["wvs"].shape[0],
        "vocab_space": vocab_space,
        "vocab_case": vocab_case
    })
    
    return config

def h_rnn(config, setup_config):
    """Helper function to setup RNN-specific parameters."""
    if config["approach"] != "rnn":
        return config
        
    ######## RNN PARAMETERS ########
    config.update({
        "hidden_dim": setup_config.get("hidden_dim", 128),
        "rnn_layers": setup_config.get("rnn_layers", 2),
        "dropout_rate": setup_config.get("dropout_rate", 0.05),
        "nonlinearity": setup_config.get("nonlinearity", "relu")
    })
    
    return config

def h_training(config, setup_config):
    """Helper function to setup training parameters and generate save name."""
    ######## TRAINING PARAMS ########
    config.update({
        "batch": setup_config["batch"],
        "lr": setup_config["lr"],
        "mu": setup_config["mu"],
        "epochs": setup_config["epochs"],
        "patience": setup_config["patience"],
        "save_int": setup_config["save_int"],
        "save_dir": setup_config["save_dir"]
    })

    ######## SAVE NAME ########
    # Basic components (common across all approaches)
    env_map = {"gcp": "g", "local": "l", "bvm": "b", "colab": "c"}
    env_abbr = env_map[config["env"]]
    app_prefix = config["approach"][:3]
    base_name = f"{env_abbr}{app_prefix}{config['seed']}"
    
    # Context dims component
    dims = f"{config['model_name']}_{config['data_ds']}_{config['rows']}x{config['cols']}x{config['tokens']}"
    
    # Training params component
    train_params = (f"bsz{config['batch']}lr{config['lr']:.0e}"
                   .replace('e-0', 'e-') + 
                   f"ep{config['epochs']}pa{config['patience']}")

    # Approach-specific components
    if config["approach"] in ["simple", "rnn"]:
        # Vocab string for simple/rnn approaches
        case_prefix = {"both": "b", "upper": "u", "lower": "l"}[config["vocab_case"]]
        space_str = "Sp" if config["vocab_space"] else "Nsp"
        vocab_str = f"{case_prefix}{space_str}{config['vocab_size']//1000}k"
        
        # Additional RNN-specific component
        if config["approach"] == "rnn":
            rnn_str = f"_rnn{config['rnn_layers']}hid{config['hidden_dim']}"
        else:
            rnn_str = ""
            
        save_name = f"{base_name}_{dims}_{vocab_str}_{train_params}{rnn_str}"
    
    elif config["approach"] == "bert":
        # BERT models don't need vocab string
        save_name = f"{base_name}_{dims}_{train_params}"
    
    elif config["approach"] == "saffu":
        # SAFFU models don't need vocab string
        save_name = f"{base_name}_{dims}_{train_params}"
    
    config["save_name"] = save_name
    return config

def h_simpleloader(config):
    """Helper function to setup SpreadsheetDataLoaders for simple/rnn approaches."""
    ######## SIMPLE LOADERS ########
    # Generate file lists
    train_files, _ = get_fileList(config["train_dir"])
    val_files, _ = get_fileList(config["val_dir"])
    test_files, _ = get_fileList(config["test_dir"])

    # Create SpreadsheetDataLoaders
    config.update({
        "train_loader": SpreadsheetDataLoader(
            train_files, config["vocab"], 
            config["rows"], config["cols"], config["tokens"], 
            threads=config["THREADS"]
        ),
        "val_loader": SpreadsheetDataLoader(
            val_files, config["vocab"], 
            config["rows"], config["cols"], config["tokens"], 
            threads=config["THREADS"]
        ),
        "test_loader": SpreadsheetDataLoader(
            test_files, config["vocab"], 
            config["rows"], config["cols"], config["tokens"], 
            threads=config["THREADS"]
        )
    })
    
    return config

def setup_simple_config(setup_config):
    """Sets up the configuration for model training with modular helper functions."""
    ######## ENVIRONMENT ########
    config = h_env(setup_config)
    
    ######## MODEL ########
    config = h_model(config, setup_config)
    
    ######## DATA ########
    config = h_data(config, setup_config)
    
    ######## APPROACH-SPECIFIC SETUP ########
    if config["approach"] in ["simple", "rnn"]:
        ######## VOCAB ########
        config = h_vocab(config, setup_config)
        
        ######## DATALOADERS ########
        config = h_simpleloader(config)
        
        ######## RNN PARAMS ########
        if config["approach"] == "rnn":
            config = h_rnn(config, setup_config)
            
    ######## BERT-SPECIFIC ########
    elif config["approach"] == "bert":
        pass
    
    ######## SAFFU-SPECIFIC ########
    elif config["approach"] == "saffu":
        pass
    
    ######## TRAINING & SAVE NAME ########
    config = h_training(config, setup_config)
    
    return config


def display_config(config):
    """Display the current configuration settings."""
    config_serializable = copy.deepcopy(config)
    config_serializable["DEVICE"] = str(config_serializable["DEVICE"])
    
    # Base configuration that exists for all approaches
    ordered_config = {
        # Environment Info
        "env": config_serializable["env"],
        "approach": config_serializable["approach"],
        
        # Model Info
        "model_base": config_serializable["model_base"],
        "model_name": config_serializable["model_name"],
        
        # Context Parameters
        "rows": config_serializable["rows"],
        "cols": config_serializable["cols"],
        "tokens": config_serializable["tokens"],
        
        # System Configuration
        "DEVICE": config_serializable["DEVICE"],
        "THREADS": config_serializable["THREADS"],
        "seed": config_serializable["seed"],
        
        # Data Configuration
        "data_ds": config_serializable["data_ds"],
        "data_dir": config_serializable["data_dir"],
        "train_dir": config_serializable["train_dir"],
        "val_dir": config_serializable["val_dir"],
        "test_dir": config_serializable["test_dir"]
    }

    # Add vocabulary configuration if it exists (simple/rnn approaches)
    if "vocab" in config_serializable:
        vocab_config = {
            # Vocabulary Configuration
            "vocab_size": config_serializable["vocab_size"],
            "vocab_space": config_serializable["vocab_space"],
            "vocab_case": config_serializable["vocab_case"],
            "vocab": "<Vocab Object>",
            "wvs": "<Embedding Matrix>"
        }
        ordered_config.update(vocab_config)
        
    # Add training configuration for all approaches
    ordered_config.update({
        # Training Configuration
        "batch": config_serializable["batch"],
        "lr": config_serializable["lr"],
        "mu": config_serializable["mu"],
        "epochs": config_serializable["epochs"],
        "patience": config_serializable["patience"],
        "save_int": config_serializable["save_int"],
        "save_dir": config_serializable["save_dir"],
        "save_name": config_serializable["save_name"]
    })

    # Add RNN-specific configuration if it exists
    if config_serializable["approach"] == "rnn":
        rnn_config = {
            # RNN Parameters
            "hidden_dim": config_serializable["hidden_dim"],
            "rnn_layers": config_serializable["rnn_layers"],
            "dropout_rate": config_serializable["dropout_rate"],
            "nonlinearity": config_serializable["nonlinearity"]
        }
        ordered_config.update(rnn_config)

    print(f"\nConfiguration for {config_serializable['approach'].upper()} approach:")
    print(json.dumps(ordered_config, indent=2))


In [46]:
res = setup_simple_config(setup_config)
res2 = setup_simple_config(new_config)
res3 = setup_simple_config(rnn_config)

Getting Vocab: 100%|████████████████████████████| 40/40 [00:03<00:00, 13.02it/s]


40(P) = 40(G) + 0(E)
Unique Tokens: 5593
Vocab Size: 5597


Creating Word Embeddings: 100%|██████████| 5597/5597 [00:00<00:00, 73377.05it/s]


Word Embeddings Shape: torch.Size([5597, 50])


Getting Vocab: 100%|██████████████████████████| 40/40 [00:00<00:00, 1796.10it/s]


40(P) = 40(G) + 0(E)
Unique Tokens: 5593
Vocab Size: 5597


Creating Word Embeddings: 100%|██████████| 5597/5597 [00:00<00:00, 74206.72it/s]

Word Embeddings Shape: torch.Size([5597, 50])

Configuration for BERT approach:
{
  "env": "gcp",
  "approach": "bert",
  "model_base": "prajjwal1/bert-tiny",
  "model_name": "BertCustomAdd",
  "rows": 100,
  "cols": 100,
  "tokens": 32,
  "DEVICE": "cuda:0",
  "THREADS": 12,
  "seed": 42,
  "data_ds": "manual",
  "data_dir": "../../data/farzan",
  "train_dir": "../../data/farzan/manual_train",
  "val_dir": "../../data/farzan/manual_val",
  "test_dir": "../../data/farzan/manual_test",
  "batch": 40,
  "lr": 0.005,
  "mu": 0.25,
  "epochs": 20,
  "patience": 2,
  "save_int": 10,
  "save_dir": "../models/",
  "save_name": "gber42_BertCustomAdd_manual_100x100x32_bsz40lr5e-3ep20pa2"
}

Configuration for SIMPLE approach:
{
  "env": "gcp",
  "approach": "simple",
  "model_base": "glove50",
  "model_name": "SimpleGeluEmbedAdd",
  "rows": 100,
  "cols": 100,
  "tokens": 32,
  "DEVICE": "cuda:0",
  "THREADS": 12,
  "seed": 42,
  "data_ds": "manual",
  "data_dir": "../../data/farzan",
  "train_d




In [48]:
display_config(res)
display_config(res2)
display_config(res3)


Configuration for BERT approach:
{
  "env": "gcp",
  "approach": "bert",
  "model_base": "prajjwal1/bert-tiny",
  "model_name": "BertCustomAdd",
  "rows": 100,
  "cols": 100,
  "tokens": 32,
  "DEVICE": "cuda:0",
  "THREADS": 12,
  "seed": 42,
  "data_ds": "manual",
  "data_dir": "../../data/farzan",
  "train_dir": "../../data/farzan/manual_train",
  "val_dir": "../../data/farzan/manual_val",
  "test_dir": "../../data/farzan/manual_test",
  "batch": 40,
  "lr": 0.005,
  "mu": 0.25,
  "epochs": 20,
  "patience": 2,
  "save_int": 10,
  "save_dir": "../models/",
  "save_name": "gber42_BertCustomAdd_manual_100x100x32_bsz40lr5e-3ep20pa2"
}

Configuration for SIMPLE approach:
{
  "env": "gcp",
  "approach": "simple",
  "model_base": "glove50",
  "model_name": "SimpleGeluEmbedAdd",
  "rows": 100,
  "cols": 100,
  "tokens": 32,
  "DEVICE": "cuda:0",
  "THREADS": 12,
  "seed": 42,
  "data_ds": "manual",
  "data_dir": "../../data/farzan",
  "train_dir": "../../data/farzan/manual_train",
  "val_

In [40]:
display_config(res)
display_config(res2)
display_config(res3)


Configuration Final:
{
  "env": "gcp",
  "approach": "bert",
  "model_base": "prajjwal1/bert-tiny",
  "model_name": "BertCustomAdd",
  "rows": 100,
  "cols": 100,
  "tokens": 32,
  "DEVICE": "cuda:0",
  "THREADS": 12,
  "seed": 42,
  "data_ds": "manual",
  "data_dir": "../../data/farzan",
  "train_dir": "../../data/farzan/manual_train",
  "val_dir": "../../data/farzan/manual_val",
  "test_dir": "../../data/farzan/manual_test"
}

Configuration Final:
{
  "env": "gcp",
  "approach": "simple",
  "model_base": "glove50",
  "model_name": "SimpleGeluEmbedAdd",
  "rows": 100,
  "cols": 100,
  "tokens": 32,
  "DEVICE": "cuda:0",
  "THREADS": 12,
  "seed": 42,
  "data_ds": "manual",
  "data_dir": "../../data/farzan",
  "train_dir": "../../data/farzan/manual_train",
  "val_dir": "../../data/farzan/manual_val",
  "test_dir": "../../data/farzan/manual_test",
  "vocab_size": 5597,
  "vocab_space": true,
  "vocab_case": "both",
  "vocab": "<Vocab Object>",
  "wvs": "<Embedding Matrix>"
}

Configura

# Model Creation

In [4]:
import torch
import torch.nn as nn
from transformers import AutoModel
from tqdm import tqdm


class TestBERT(nn.Module):
    def __init__(self, model_name="bert-base-cased", dropout_rate=0.05):
        super(TestBERT, self).__init__()

        # 1. Load pretrained BERT
        self.bert = AutoModel.from_pretrained(model_name)

        # 2. Define a dropout
        self.dropout = nn.Dropout(dropout_rate)

        # 3. Non-linear activation (GELU)
        self.gelu = nn.GELU()

        # 4. Final predictor (1-dim output per cell)
        self.classifier = nn.Linear(self.bert.config.hidden_size, 1)

    def forward(self, input_ids, attention_mask):

        # 1) Allocate the (batch_size, rows, cols) S_cube
        S_cube = torch.zeros(
            (input_ids.shape[0], input_ids.shape[1], input_ids.shape[2]),
            device=input_ids.device,
        )

        # 2) Loop over cells in row-major order
        for cell in tqdm(
            range(input_ids.shape[1] * input_ids.shape[2]), desc="Forward"
        ):

            # In one shot, store logits → S_cube
            # cell // input_ids.shape[2] = row, cell % input_ids.shape[2] = col
            S_cube[
                :, cell // input_ids.shape[2], cell % input_ids.shape[2]
            ] = self.classifier(
                self.gelu(
                    self.dropout(
                        self.bert(
                            input_ids[
                                :,
                                cell // input_ids.shape[2],
                                cell % input_ids.shape[2],
                                :,
                            ],
                            attention_mask=attention_mask[
                                :,
                                cell // input_ids.shape[2],
                                cell % input_ids.shape[2],
                                :,
                            ],
                        ).pooler_output
                    )
                )
            ).view(
                -1
            )

        return S_cube


# class TestBERT(nn.Module):
#     def __init__(self, model_name="bert-base-cased", dropout_rate=0.05):
#         super(TestBERT, self).__init__()

#         # 1. Load pretrained BERT
#         self.bert = AutoModel.from_pretrained(model_name)

#         # 2. Define a dropout
#         self.dropout = nn.Dropout(dropout_rate)

#         # 3. Non-linear activation (GELU)
#         self.gelu = nn.GELU()

#         # 4. Final predictor (1-dim output per cell)
#         self.classifier = nn.Linear(self.bert.config.hidden_size, 1)

#     def forward(self, input_ids, attention_mask):

#         # 1. Print the overall shapes
#         # print("batch_size:", input_ids.shape[0])
#         # print("rows:",      input_ids.shape[1])
#         # print("cols:",      input_ids.shape[2])
#         # print("tokens:",    input_ids.shape[3])

#         # 2. Initialize S_cube => (batch_size, rows, cols)
#         S_cube = torch.zeros(
#             (input_ids.shape[0], input_ids.shape[1], input_ids.shape[2]),
#             device=input_ids.device
#         )

#         # 3. Loop over all cells
#         for cell in tqdm(range(input_ids.shape[1] * input_ids.shape[2]), desc = 'Forward'):

#             r = cell // input_ids.shape[2]
#             c = cell %  input_ids.shape[2]

#             # Extract the slice for current cell (batch_size x tokens)
#             cell_input_ids  = input_ids[:, r, c, :]
#             cell_attn_mask  = attention_mask[:, r, c, :]

#             # Pass them through the BERT model
#             outputs = self.bert(cell_input_ids, attention_mask=cell_attn_mask)

#             # pooler_out => (batch_size, hidden_dim)
#             pooler_out = outputs.pooler_output

#             # Inlined pipeline: dropout -> GELU -> classifier => (batch_size, 1)
#             logits = self.classifier(self.gelu(self.dropout(pooler_out)))

#             # Flatten (batch_size, 1) => (batch_size,)
#             logits_flat = logits.view(-1)

#             # Populate S_cube => shape: (batch_size, rows, cols)
#             S_cube[:, r, c] = logits_flat

#             # If this is the first cell, do some prints and break
#             if r == 0 and c == 0:
#                 print(f"\nFirst cell => row={r}, col={c}")
#                 print(f"cell_input_ids.shape: {cell_input_ids.shape}")
#                 print(f"cell_attn_mask.shape: {cell_attn_mask.shape}")
#                 print(f"logits.shape: {logits.shape}")
#                 print(f"logits_flat.shape: {logits_flat.shape}")
#                 print(f"S_cube[:, {r}, {c}].shape: {S_cube[:, r, c].shape}")

#                 break  # Stop after the first cell

#         # 4. Print the shape of S_cube
#         # print(f"\nS_cube.shape: {S_cube.shape}")

#         # Return S_cube or None, depending on your use case
#         return S_cube

In [4]:
# ---------------------------------------------------
#  Full Notebook Code with ONLY Mixed Precision (fp16), No DeepSpeed
# ---------------------------------------------------

# 1) Standard imports
import importlib
import sys
import os
import torch
from torch.cuda.amp import autocast

# Append the parent directory to the path to import the necessary modules
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# Import the utilities and the dataloader
from utils import trainutil, inferutil, setuputil

# Now reload the modules to ensure they are up-to-date
importlib.reload(setuputil)
importlib.reload(trainutil)
importlib.reload(inferutil)

# Import the funcs needed from utils
from utils.setuputil import setup_bert_config, display_bert_config
from utils.trainutil import train_model
from utils.inferutil import infer_one, infer_full

# Define the input config file
setup_config = {
    "model_name": "prajjwal1/bert-tiny",
    "device": "cuda:0",
    "threads": 8,
    "seed": 0,
    "data_dir": "../../data/farzan/",
    "data_ds": "manual",
    "rows": 100,
    "cols": 100,
    "tokens": 32
}

# Get the actual to use config file and view
config = setup_bert_config(setup_config)
display_bert_config(config)

# Define local variables as per the variables from config
DEVICE = config['DEVICE']
THREADS = config['THREADS']
train_loader = config['train_loader']
val_loader = config['val_loader']
test_loader = config['test_loader']
model_name = config['model_name']
tokenizer = config['tokenizer']

import torch
import torch.nn as nn
from transformers import AutoModel
from tqdm import tqdm


class TestBERT(nn.Module):
    def __init__(self, model_name="bert-base-cased", dropout_rate=0.05):
        super(TestBERT, self).__init__()

        # 1. Load pretrained BERT
        self.bert = AutoModel.from_pretrained(model_name)

        # Enable gradient checkpointing if desired
        self.bert.gradient_checkpointing_enable()

        # 2. Define a dropout
        self.dropout = nn.Dropout(dropout_rate)

        # 3. Non-linear activation (GELU)
        self.gelu = nn.GELU()

        # 4. Final predictor (1-dim output per cell)
        self.classifier = nn.Linear(self.bert.config.hidden_size, 1)

    def forward(self, input_ids, attention_mask):

        # 1) Allocate the (batch_size, rows, cols) S_cube
        S_cube = torch.zeros(
            (input_ids.shape[0], input_ids.shape[1], input_ids.shape[2]),
            device=input_ids.device,
        )

        # 2) Loop over cells in row-major order
        for cell in tqdm(
            range(input_ids.shape[1] * input_ids.shape[2]), desc="Forward"
        ):

            # cell // input_ids.shape[2] = row, cell % input_ids.shape[2] = col
            S_cube[
                :, cell // input_ids.shape[2], cell % input_ids.shape[2]
            ] = (
                self.classifier(
                    self.gelu(
                        self.dropout(
                            self.bert(
                                input_ids[:, cell // input_ids.shape[2],
                                          cell % input_ids.shape[2], :],
                                attention_mask=attention_mask[:, cell // input_ids.shape[2],
                                                              cell % input_ids.shape[2], :]
                            ).pooler_output
                        )
                    )
                )
                .view(-1)
            )

        return S_cube


# 1) Create model and move to GPU
untrained_model = TestBERT(model_name=model_name).to(DEVICE)

# 2) Single-batch DataLoader
check_loader = torch.utils.data.DataLoader(train_loader, batch_size=1, shuffle=False)
batch = next(iter(check_loader))

ex_xtok = batch["x_tok"].to(DEVICE)
ex_xmask = batch["x_masks"].to(DEVICE)

# 3) FP16 forward pass with torch.cuda.amp
with autocast():
    out = untrained_model.forward(ex_xtok, ex_xmask)

print(out.shape)

Processing files: 100%|████████████████████████████████████████| 40/40 [00:22<00:00,  1.75it/s]



40(P) = 40(G) + 0(E)


Processing files: 100%|██████████████████████████████████████████| 5/5 [00:00<00:00,  9.99it/s]



5(P) = 5(G) + 0(E)


Processing files: 100%|██████████████████████████████████████████| 5/5 [00:00<00:00, 11.98it/s]



5(P) = 5(G) + 0(E)

Final BERT configuration:
{
  "model_name": "prajjwal1/bert-tiny",
  "data_dir": "../../data/farzan/",
  "DEVICE": "cuda:0",
  "THREADS": 8,
  "data_ds": "manual",
  "train_dir": "../../data/farzan/manual_train",
  "val_dir": "../../data/farzan/manual_val",
  "test_dir": "../../data/farzan/manual_test",
  "rows": 100,
  "cols": 100,
  "tokens": 32,
  "tokenizer": "<ModernBert Tokenizer Object>",
  "train_loader": "<Train BertLoader Object>",
  "val_loader": "<Validation BertLoader Object>",
  "test_loader": "<Test BertLoader Object>"
}


RuntimeError: Failed to import transformers.models.bert.modeling_bert because of the following error (look up to see its traceback):
Artifact name: 'trace_shape_events' not registered,please call register_artifact('trace_shape_events') in torch._logging.registrations.

Forward: 100%|███████████████████████████| 10000/10000 [00:24<00:00, 409.54it/s]

torch.Size([1, 100, 100])



