# How to Inspect HuggingFace Model Architecture in Python (Step-by-Step)

# Loading the model

In [16]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda"
model_path = "ibm-granite/granite-4.0-h-350M"
tokenizer = AutoTokenizer.from_pretrained(model_path)
# drop device_map if running on CPU
model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)

# Method 1 — Print the model structure

In [17]:
print(model)

GraniteMoeHybridForCausalLM(
  (model): GraniteMoeHybridModel(
    (embed_tokens): Embedding(100352, 768, padding_idx=100256)
    (layers): ModuleList(
      (0-9): 10 x GraniteMoeHybridDecoderLayer(
        (input_layernorm): GraniteMoeHybridRMSNorm((768,), eps=1e-05)
        (post_attention_layernorm): GraniteMoeHybridRMSNorm((768,), eps=1e-05)
        (shared_mlp): GraniteMoeHybridMLP(
          (activation): SiLUActivation()
          (input_linear): Linear(in_features=768, out_features=4096, bias=False)
          (output_linear): Linear(in_features=2048, out_features=768, bias=False)
        )
        (mamba): GraniteMoeHybridMambaLayer(
          (act): SiLUActivation()
          (conv1d): Conv1d(1792, 1792, kernel_size=(4,), stride=(1,), padding=(3,), groups=1792)
          (in_proj): Linear(in_features=768, out_features=3376, bias=False)
          (norm): GraniteMoeHybridRMSNormGated()
          (out_proj): Linear(in_features=1536, out_features=768, bias=False)
        )
      

# Method 2 — Use module introspection & per-layer inspection

In [18]:
for name, module in model.named_modules():
    print(name, "->", module)


 -> GraniteMoeHybridForCausalLM(
  (model): GraniteMoeHybridModel(
    (embed_tokens): Embedding(100352, 768, padding_idx=100256)
    (layers): ModuleList(
      (0-9): 10 x GraniteMoeHybridDecoderLayer(
        (input_layernorm): GraniteMoeHybridRMSNorm((768,), eps=1e-05)
        (post_attention_layernorm): GraniteMoeHybridRMSNorm((768,), eps=1e-05)
        (shared_mlp): GraniteMoeHybridMLP(
          (activation): SiLUActivation()
          (input_linear): Linear(in_features=768, out_features=4096, bias=False)
          (output_linear): Linear(in_features=2048, out_features=768, bias=False)
        )
        (mamba): GraniteMoeHybridMambaLayer(
          (act): SiLUActivation()
          (conv1d): Conv1d(1792, 1792, kernel_size=(4,), stride=(1,), padding=(3,), groups=1792)
          (in_proj): Linear(in_features=768, out_features=3376, bias=False)
          (norm): GraniteMoeHybridRMSNormGated()
          (out_proj): Linear(in_features=1536, out_features=768, bias=False)
        )
  

In [19]:
for name, module in model.named_children():
    print(name, module)


model GraniteMoeHybridModel(
  (embed_tokens): Embedding(100352, 768, padding_idx=100256)
  (layers): ModuleList(
    (0-9): 10 x GraniteMoeHybridDecoderLayer(
      (input_layernorm): GraniteMoeHybridRMSNorm((768,), eps=1e-05)
      (post_attention_layernorm): GraniteMoeHybridRMSNorm((768,), eps=1e-05)
      (shared_mlp): GraniteMoeHybridMLP(
        (activation): SiLUActivation()
        (input_linear): Linear(in_features=768, out_features=4096, bias=False)
        (output_linear): Linear(in_features=2048, out_features=768, bias=False)
      )
      (mamba): GraniteMoeHybridMambaLayer(
        (act): SiLUActivation()
        (conv1d): Conv1d(1792, 1792, kernel_size=(4,), stride=(1,), padding=(3,), groups=1792)
        (in_proj): Linear(in_features=768, out_features=3376, bias=False)
        (norm): GraniteMoeHybridRMSNormGated()
        (out_proj): Linear(in_features=1536, out_features=768, bias=False)
      )
    )
    (10): GraniteMoeHybridDecoderLayer(
      (input_layernorm): Gra

# Method 3 — Use a “model summary” tool to get a tabular overview

In [20]:
from torchinfo import summary

# For example (you might need to adjust input shape / dtype properly):
summary(model)  # e.g. batch_size=1, seq_length=10


Layer (type:depth-idx)                                            Param #
GraniteMoeHybridForCausalLM                                       --
├─GraniteMoeHybridModel: 1-1                                      --
│    └─Embedding: 2-1                                             77,070,336
│    └─ModuleList: 2-2                                            --
│    │    └─GraniteMoeHybridDecoderLayer: 3-1                     8,503,184
│    │    └─GraniteMoeHybridDecoderLayer: 3-2                     8,503,184
│    │    └─GraniteMoeHybridDecoderLayer: 3-3                     8,503,184
│    │    └─GraniteMoeHybridDecoderLayer: 3-4                     8,503,184
│    │    └─GraniteMoeHybridDecoderLayer: 3-5                     8,503,184
│    │    └─GraniteMoeHybridDecoderLayer: 3-6                     8,503,184
│    │    └─GraniteMoeHybridDecoderLayer: 3-7                     8,503,184
│    │    └─GraniteMoeHybridDecoderLayer: 3-8                     8,503,184
│    │    └─GraniteMoeHybridDecode

# Tip

In [21]:
config = model.config
print(config)


GraniteMoeHybridConfig {
  "architectures": [
    "GraniteMoeHybridForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "attention_multiplier": 0.015625,
  "bos_token_id": 100257,
  "dtype": "float32",
  "embedding_multiplier": 12,
  "eos_token_id": 100257,
  "hidden_act": "silu",
  "hidden_size": 768,
  "init_method": "mup",
  "initializer_range": 0.1,
  "intermediate_size": 2048,
  "layer_types": [
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "attention",
    "mamba",
    "mamba",
    "attention",
    "mamba",
    "mamba",
    "mamba",
    "attention",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "attention",
    "mamba",
    "mamba",
    "mamba",
    "mamba"
  ],
  "logits_scaling": 3,
  "mamba_chunk_size": 256,
  "mamba_conv_bias": true,
  "mamba_d_conv": 4,
  "mamba_d_head": 32,
  "mamba_d_state":