In [1]:
from transformers import AutoModel, AutoTokenizer
import torch

# モデルをロード
model_path = "sbintuitions/modernbert-ja-30m"
model = AutoModel.from_pretrained(model_path)


  from .autonotebook import tqdm as notebook_tqdm
You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.


In [2]:
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [None]:
# モデルの構造を確認
print(f"元のモデル構造: {model}")

# 残したいレイヤーのインデックス
keep_layers = [0]

# モデルのレイヤー構成を取得（この場合は model.layers）
all_layers = model.layers

# 新しいレイヤーリストを作成
new_layers = torch.nn.ModuleList([all_layers[i] for i in keep_layers])

# 元のレイヤーを新しいレイヤーで置き換え
model.layers = new_layers

# レイヤー数の設定を更新（設定がある場合）
if hasattr(model.config, "num_hidden_layers"):
    model.config.num_hidden_layers = len(keep_layers)

print(f"修正後のモデル構造: {model}")

元のモデル構造: ModernBertModel(
  (embeddings): ModernBertEmbeddings(
    (tok_embeddings): Embedding(102400, 256, padding_idx=3)
    (norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    (drop): Dropout(p=0.0, inplace=False)
  )
  (layers): ModuleList(
    (0): ModernBertEncoderLayer(
      (attn_norm): Identity()
      (attn): ModernBertAttention(
        (Wqkv): Linear(in_features=256, out_features=768, bias=False)
        (rotary_emb): ModernBertUnpaddedRotaryEmbedding(dim=64, base=160000.0, scale_base=None)
        (Wo): Linear(in_features=256, out_features=256, bias=False)
        (out_drop): Identity()
      )
      (mlp_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (mlp): ModernBertMLP(
        (Wi): Linear(in_features=256, out_features=2048, bias=False)
        (act): GELUActivation()
        (drop): Dropout(p=0.0, inplace=False)
        (Wo): Linear(in_features=1024, out_features=256, bias=False)
      )
    )
    (1-2): 2 x ModernBertEncoderLayer(


In [4]:
model

ModernBertModel(
  (embeddings): ModernBertEmbeddings(
    (tok_embeddings): Embedding(102400, 256, padding_idx=3)
    (norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    (drop): Dropout(p=0.0, inplace=False)
  )
  (layers): ModuleList(
    (0): ModernBertEncoderLayer(
      (attn_norm): Identity()
      (attn): ModernBertAttention(
        (Wqkv): Linear(in_features=256, out_features=768, bias=False)
        (rotary_emb): ModernBertUnpaddedRotaryEmbedding(dim=64, base=160000.0, scale_base=None)
        (Wo): Linear(in_features=256, out_features=256, bias=False)
        (out_drop): Identity()
      )
      (mlp_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (mlp): ModernBertMLP(
        (Wi): Linear(in_features=256, out_features=2048, bias=False)
        (act): GELUActivation()
        (drop): Dropout(p=0.0, inplace=False)
        (Wo): Linear(in_features=1024, out_features=256, bias=False)
      )
    )
    (1-2): 2 x ModernBertEncoderLayer(
      (at

In [5]:
layer_name = "layer_" + "_".join([str(i) for i in keep_layers])
save_model_name = f"modernbert-ja-30m-{layer_name}"
save_model_name


'modernbert-ja-30m-layer_0_1_2'

In [None]:
# err



NameError: name 'err' is not defined

In [7]:
model.push_to_hub(
    save_model_name,
    private=True,
)

model.safetensors: 100%|██████████| 117M/117M [00:11<00:00, 10.1MB/s] 


CommitInfo(commit_url='https://huggingface.co/hotchpotch/modernbert-ja-30m-layer_0_1_2/commit/59a7cbae4090c494a16478f8133bb2cb33346641', commit_message='Upload model', commit_description='', oid='59a7cbae4090c494a16478f8133bb2cb33346641', pr_url=None, repo_url=RepoUrl('https://huggingface.co/hotchpotch/modernbert-ja-30m-layer_0_1_2', endpoint='https://huggingface.co', repo_type='model', repo_id='hotchpotch/modernbert-ja-30m-layer_0_1_2'), pr_revision=None, pr_num=None)

In [8]:
tokenizer.push_to_hub(
    save_model_name,
    private=True,
)

tokenizer.model: 100%|██████████| 1.83M/1.83M [00:00<00:00, 4.17MB/s]


CommitInfo(commit_url='https://huggingface.co/hotchpotch/modernbert-ja-30m-layer_0_1_2/commit/38fd4ea4c0b37bd21b3310a6363a26a59c63b107', commit_message='Upload tokenizer', commit_description='', oid='38fd4ea4c0b37bd21b3310a6363a26a59c63b107', pr_url=None, repo_url=RepoUrl('https://huggingface.co/hotchpotch/modernbert-ja-30m-layer_0_1_2', endpoint='https://huggingface.co', repo_type='model', repo_id='hotchpotch/modernbert-ja-30m-layer_0_1_2'), pr_revision=None, pr_num=None)