In [None]:
!pip install -U transformers

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "microsoft/Phi-4-mini-reasoning"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

In [None]:
#모델 아키텍처
print(model)

In [None]:
#모델 설정 확인
print(model.config)

In [None]:
for name, module in model.named_modules():
    print(f"{name}: {module.__class__.__name__}")

In [None]:
print(tokenizer)
print(f"Vocab size: {tokenizer.vocab_size}")
print(f"Model max length: {tokenizer.model_max_length}")

In [None]:
print(f"BOS token: {tokenizer.bos_token}")
print(f"EOS token: {tokenizer.eos_token}")
print(f"PAD token: {tokenizer.pad_token}")
print(f"UNK token: {tokenizer.unk_token}")

In [None]:
# Chat template 확인
if hasattr(tokenizer, 'chat_template'):
    print(f"Chat template: {tokenizer.chat_template}")

In [None]:
# Phi-4가 지원하는 대화 형식 테스트
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello, how are you?"}
]

# Chat template 적용
formatted_input = tokenizer.apply_chat_template(
    messages,
    tokenize=False,  # 텍스트로 확인
    add_generation_prompt=True
)
print("Formatted input:")
print(formatted_input)

In [None]:
# 토큰화된 형태 확인
tokenized_input = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
)
print(f"\nTokenized shape: {tokenized_input.shape}")

In [None]:
# 전체 파라미터 수
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Model dtype: {model.dtype}")

# 레이어별 파라미터 수
for name, param in model.named_parameters():
    print(f"{name}: {param.shape} ({param.numel():,} params)")