In [1]:
import warnings
warnings.filterwarnings("ignore")

import torch

import lovely_tensors as lt
lt.monkey_patch()

In [2]:
from gliclass import GLiClassModel
from transformers import AutoTokenizer

import gliclass
import transformers

print(f"transformers version: {transformers.__version__}")


transformers version: 4.49.0


In [3]:
model = GLiClassModel.from_pretrained("knowledgator/gliclass-modern-base-v3.0")
model

GLiClassModel(
  (model): GLiClassUniEncoder(
    (text_projector): FeaturesProjector(
      (linear_1): Linear(in_features=768, out_features=768, bias=True)
      (act): GELUActivation()
      (linear_2): Linear(in_features=768, out_features=768, bias=True)
    )
    (classes_projector): FeaturesProjector(
      (linear_1): Linear(in_features=768, out_features=768, bias=True)
      (act): GELUActivation()
      (linear_2): Linear(in_features=768, out_features=768, bias=True)
    )
    (pooler): FirstTokenPooling1D()
    (scorer): ScorerDot()
    (dropout): Dropout(p=0.15, inplace=False)
    (encoder_model): ModernBertModel(
      (embeddings): ModernBertEmbeddings(
        (tok_embeddings): Embedding(50370, 768, padding_idx=50283)
        (norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
      (layers): ModuleList(
        (0): ModernBertEncoderLayer(
          (attn_norm): Identity()
          (attn): ModernBertAttenti

In [4]:
[l[0] for l in model.named_parameters()]

['model.logit_scale',
 'model.text_projector.linear_1.weight',
 'model.text_projector.linear_1.bias',
 'model.text_projector.linear_2.weight',
 'model.text_projector.linear_2.bias',
 'model.classes_projector.linear_1.weight',
 'model.classes_projector.linear_1.bias',
 'model.classes_projector.linear_2.weight',
 'model.classes_projector.linear_2.bias',
 'model.encoder_model.embeddings.tok_embeddings.weight',
 'model.encoder_model.embeddings.norm.weight',
 'model.encoder_model.layers.0.attn.Wqkv.weight',
 'model.encoder_model.layers.0.attn.Wo.weight',
 'model.encoder_model.layers.0.mlp_norm.weight',
 'model.encoder_model.layers.0.mlp.Wi.weight',
 'model.encoder_model.layers.0.mlp.Wo.weight',
 'model.encoder_model.layers.1.attn_norm.weight',
 'model.encoder_model.layers.1.attn.Wqkv.weight',
 'model.encoder_model.layers.1.attn.Wo.weight',
 'model.encoder_model.layers.1.mlp_norm.weight',
 'model.encoder_model.layers.1.mlp.Wi.weight',
 'model.encoder_model.layers.1.mlp.Wo.weight',
 'model.en

In [7]:
# Compute number of embedding, attention, and other parameters
embedding_params = 0
attention_params = 0
other_params = 0
for name, param in model.named_parameters():
    lname = name.lower()
    if "embed" in lname:
        embedding_params += param.numel()
    elif "attn" in lname or "attention" in lname:
        attention_params += param.numel()
    else:
        other_params += param.numel()
total_params = embedding_params + attention_params + other_params
print(f"Total parameters: {total_params:,}")
print(f"Embedding parameters: {embedding_params:,} ({embedding_params/total_params*100:.2f}%)")
print(f"Attention parameters: {attention_params:,} ({attention_params/total_params*100:.2f}%)")
print(f"Other parameters: {other_params:,} ({other_params/total_params*100:.2f}%)")

Total parameters: 151,378,177
Embedding parameters: 38,684,928 (25.56%)
Attention parameters: 51,920,640 (34.30%)
Other parameters: 60,772,609 (40.15%)
