In [12]:
import torch
from pathlib import Path
from utils import download_qwen3_small, Qwen3Tokenizer
from qwen3 import Qwen3Model, QWEN_CONFIG_06_B
import torchinfo

In [13]:
def set_device() -> torch.device:
    if torch.cuda.is_available():
        return torch.device(device="cuda")
    elif torch.backends.mps.is_available():
        return torch.device(device="mps")
    else:
        return torch.device(device="cpu")


device = set_device()
print(f"Using device: {device}")

Using device: mps


# 2.4 Preparing input texts for LLMs

In [14]:
tokenizer_file_path = Path("qwen3") / "tokenizer-base.json"
tokenizer = Qwen3Tokenizer(tokenizer_file_path=tokenizer_file_path)

In [15]:
prompt = "Explain large language models."
input_token_ids_list = tokenizer.encode(prompt)

print(input_token_ids_list)

[840, 20772, 3460, 4128, 4119, 13]


In [16]:
text = tokenizer.decode(input_token_ids_list)
print(text)

Explain large language models.


In [17]:
for i in input_token_ids_list:
    print(f"{[i]} --> {tokenizer.decode([i])}")

[840] --> Ex
[20772] --> plain
[3460] -->  large
[4128] -->  language
[4119] -->  models
[13] --> .


Exercise 2.1: Encoding unknown words

In [18]:
french_token_ids_list = tokenizer.encode(prompt="Coucou, tu veux voir ma bite?")

for i in french_token_ids_list:
    print(f"{[i]} --> {tokenizer.decode([i])}")

[68210] --> Cou
[22249] --> cou
[11] --> ,
[9765] -->  tu
[5208] -->  ve
[2200] --> ux
[45031] -->  voir
[7491] -->  ma
[22721] -->  bite
[30] --> ?


# 2.5 Loading pre-trained models

In [19]:
download_qwen3_small(kind="base", tokenizer_only=False, out_dir="qwen3")

✓ qwen3/qwen3-0.6B-base.pth already up-to-date
✓ qwen3/tokenizer-base.json already up-to-date


In [20]:
model_path = Path("qwen3") / "qwen3-0.6B-base.pth"
model = Qwen3Model(cfg=QWEN_CONFIG_06_B)

In [21]:
text = "Hello, how are you today?"

ids = tokenizer.encode(text)
input_ids = torch.tensor(ids, dtype=torch.long).unsqueeze(0)
input_ids = input_ids.to(device)

In [22]:
model.load_state_dict(torch.load(model_path))
model.to(device)

torchinfo.summary(
    model=model,
    input_data=input_ids,
    verbose=0,
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"]
)

Layer (type (var_name))                       Input Shape          Output Shape         Param #              Trainable
Qwen3Model (Qwen3Model)                       [1, 7]               [1, 7, 151936]       --                   True
├─Embedding (tok_emb)                         [1, 7]               [1, 7, 1024]         155,582,464          True
├─ModuleList (trf_blocks)                     --                   --                   --                   True
│    └─TransformerBlock (0)                   [1, 7, 1024]         [1, 7, 1024]         --                   True
│    │    └─RMSNorm (norm1)                   [1, 7, 1024]         [1, 7, 1024]         1,024                True
│    │    └─GroupedQueryAttention (att)       [1, 7, 1024]         [1, 7, 1024]         6,291,712            True
│    │    └─RMSNorm (norm2)                   [1, 7, 1024]         [1, 7, 1024]         1,024                True
│    │    └─FeedForward (ff)                  [1, 7, 1024]         [1, 7, 1024]    

# 2.6 Understanding the sequential LLM text generation process