In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

torch.__version__

'2.9.0+cu126'

In [7]:
# GPU 사용 가능 여부 확인
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"사용 중인 장치: {device}")
print(f"GPU 이름: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU만 사용 가능'}")

사용 중인 장치: cuda
GPU 이름: NVIDIA GeForce RTX 3060 Ti


In [12]:
pip install transformers

Collecting transformers
  Downloading transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Collecting pyyaml>=5.1 (from transformers)
  Downloading pyyaml-6.0.3-cp312-cp312-win_amd64.whl.metadata (2.4 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2025.10.23-cp312-cp312-win_amd64.whl.metadata (41 kB)
Collecting requests (from transformers)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Downloading tokenizers-0.22.1-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Using cached safetensors-0.6.2-cp38-abi3-win_amd64.whl.metadata (4.1 kB)
Collecting tqdm>=4.27 (from transformers)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting charset_normalizer<4,>=2 (from requests->transformers)
  Downlo

In [13]:
from transformers import BertTokenizer

# 1. 설정
cfg = {
    "vocab_size": 30522,
    "max_len": 64,
    "hidden_size": 256,
    "type_vocab_size": 2,
    "dropout": 0.1
}

# 2. 임베딩 정의
class BertEmbedding(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.emb_word = nn.Embedding(cfg["vocab_size"], cfg["hidden_size"])
        self.emb_pos  = nn.Embedding(cfg["max_len"], cfg["hidden_size"])
        self.emb_type = nn.Embedding(cfg["type_vocab_size"], cfg["hidden_size"])
        self.ln = nn.LayerNorm(cfg["hidden_size"])
        self.do = nn.Dropout(cfg["dropout"])

    def forward(self, input_ids, token_type_ids):
        B, T = input_ids.shape
        device = input_ids.device
        pos = torch.arange(T, device=device).unsqueeze(0).expand(B, T)
        w = self.emb_word(input_ids)
        p = self.emb_pos(pos)
        t = self.emb_type(token_type_ids)
        x = self.ln(w + p + t)
        x = self.do(x)
        return x

emb = BertEmbedding(cfg)

# 3. 인터넷 예시 문장
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
text = "[CLS] i like coffee [SEP] but my friend likes tea [SEP]"

tokens = tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=16)
print({k: v.shape for k, v in tokens.items()})  # input_ids, token_type_ids, attention_mask

# 4. 임베딩 통과
with torch.no_grad():
    out = emb(tokens["input_ids"], tokens["token_type_ids"])
print("Output shape:", out.shape)  # (1, 16, 256)

# 5. 확인
print("첫 5개 토큰:", tokenizer.convert_ids_to_tokens(tokens["input_ids"][0][:5]))
print("임베딩 첫 토큰 벡터 norm:", out[0,0].norm().item())


  from .autonotebook import tqdm as notebook_tqdm


{'input_ids': torch.Size([1, 16]), 'token_type_ids': torch.Size([1, 16]), 'attention_mask': torch.Size([1, 16])}
Output shape: torch.Size([1, 16, 256])
첫 5개 토큰: ['[CLS]', '[CLS]', 'i', 'like', 'coffee']
임베딩 첫 토큰 벡터 norm: 16.568382263183594


In [None]:
from transformers import BertTokenizer

# 1. 설정
cfg = {
    "vocab"
}