In [None]:
# üöÄ Install required packages for running the model
# - transformers: for loading and running LLMs
# - datasets: optional, for dataset handling
# - peft: for later LoRA experiments
# - accelerate: for optimized GPU usage in Colab
!pip install -q transformers datasets peft accelerate

In [None]:
# üîç Check GPU availability in Colab
import torch
if torch.cuda.is_available():
    print("GPU is available:", torch.cuda.get_device_name(0))
else:
    print("GPU not available. Make sure Runtime -> Change runtime type -> GPU is selected")

GPU not available. Make sure Runtime -> Change runtime type -> GPU is selected


In [2]:
# üîç Check GPU properties and memory in Colab
if torch.cuda.is_available():
    device = torch.cuda.current_device()
    print(f"‚úÖ GPU is available: {torch.cuda.get_device_name(device)}")
    print(f"  - CUDA version: {torch.version.cuda}")
    print(f"  - PyTorch version: {torch.__version__}")

    # GPU memory stats
    total_mem = torch.cuda.get_device_properties(device).total_memory / 1e9
    reserved_mem = torch.cuda.memory_reserved(device) / 1e9
    allocated_mem = torch.cuda.memory_allocated(device) / 1e9
    free_mem = reserved_mem - allocated_mem

    print(f"  - Total memory: {total_mem:.2f} GB")
    print(f"  - Reserved memory: {reserved_mem:.2f} GB")
    print(f"  - Allocated memory: {allocated_mem:.2f} GB")
    print(f"  - Free memory in reserved pool: {free_mem:.2f} GB")
else:
    print("‚ùå GPU not available. Make sure Runtime -> Change runtime type -> GPU is selected")

‚úÖ GPU is available: Tesla T4
  - CUDA version: 12.8
  - PyTorch version: 2.10.0+cu128
  - Total memory: 15.64 GB
  - Reserved memory: 0.00 GB
  - Allocated memory: 0.00 GB
  - Free memory in reserved pool: 0.00 GB


In [3]:
# üì¶ Import libraries
from transformers import AutoModelForCausalLM, AutoTokenizer

In [4]:
MODEL_ID = "Qwen/Qwen2-1.5B-Instruct" # Change to your model ID if needed  "Qwen/Qwen2-7B-Instruct"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
# ==============================
# üîπ Load tokenizer
# ==============================
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token  # Ensure padding token is set

# ==============================
# üîπ Load model
# ==============================
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16 if DEVICE=="cuda" else torch.float32,
    device_map="auto" if DEVICE=="cuda" else None,
    trust_remote_code=True
)

# Set model to evaluation mode
model.eval()

print(f"Model {MODEL_ID} loaded on {DEVICE} ‚úÖ")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]



merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/338 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

Model Qwen/Qwen2-1.5B-Instruct loaded on cuda ‚úÖ


In [None]:
# ==============================
# üîπ Inference helper function
# ==============================
def generate_text(prompt, max_length=128, temperature=0.7):
    """
    Generates text from a given prompt using the loaded model.
    """
    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        temperature=temperature,
        do_sample=True
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
# Example usage
sample_prompt = "Explain in simple terms how a transformers work."  # "Explain in simple terms how a DL transformers model work."
print(generate_text(sample_prompt))

# üîπ Transformer Architecture Overview

A Transformer processes text in the following general flow:

1. **Tokenization:** Text is split into tokens (subwords) and mapped to integer IDs.
2. **Embeddings:** Each token ID is converted into a high-dimensional embedding vector.
3. **Positional Encoding:** Adds information about token positions in the sequence.
4. **Encoder / Decoder Layers:** Composed of multi-head self-attention and feed-forward layers.
5. **Output Projection:** Produces logits for each token in the vocabulary.
6. **Softmax & Generation:** Converts logits into probabilities to predict the next token.

---

<img src="images/transformer_flow.png" alt="Simplified Transformer Flow" width="800"/>

In [6]:
# ==============================
# üîπ Access tokenizer vocabulary
# ==============================
vocab = tokenizer.get_vocab()  # Dictionary: token -> ID
print(f"Vocabulary size: {len(vocab)} tokens")

Vocabulary size: 151646 tokens


In [13]:
import random

def sample_vocab(tokenizer, N=10):
    """
    Randomly select N items from the tokenizer's vocabulary and print them.
    """
    vocab = tokenizer.get_vocab()  # token -> ID
    sample_tokens = random.sample(list(vocab.items()), N)

    print(f"Randomly selected {N} vocabulary tokens and their IDs:\n")
    for token, token_id in sample_tokens:
        print(f"{token_id:>5} ‚Üí {token}")

# Example: show 15 random tokens
sample_vocab(tokenizer, N=100)

Randomly selected 100 vocabulary tokens and their IDs:

53026 ‚Üí ƒ†discrepan
56980 ‚Üí +=(
103937 ‚Üí √•¬π¬∂√§¬∏ƒ∂
119525 ‚Üí √©¬¨ƒµ
99590 ‚Üí √ß≈Éƒ∂
90066 ‚Üí √Ñ¬±kl
39095 ‚Üí ƒ†√§¬∏ƒ≠
122136 ‚Üí √¶¬∞ƒ∑
106127 ‚Üí √¶¬¨¬ß√ßƒΩ≈Å
119369 ‚Üí √¶ƒØƒ∞
125063 ‚Üí √†¬∏ƒ™√†¬∏¬±√†¬∏ƒ∂
134954 ‚Üí √≠ƒ±ƒ´√™¬∞ƒ¢
28529 ‚Üí ƒ†vendors
146950 ‚Üí √¨¬™ƒ∫
54760 ‚Üí /session
69608 ‚Üí \Session
119776 ‚Üí √•¬¶¬©
109522 ‚Üí √®¬øƒª√§¬πƒ™√•¬§¬ß
 2522 ‚Üí Status
87527 ‚Üí ƒ†ksi
57659 ‚Üí ƒ†Houses
41069 ‚Üí .Globalization
86918 ‚Üí ƒ†fuller
146816 ‚Üí √∞ƒøƒπƒ∂
142269 ‚Üí ƒ†wida√Ñƒ©
131623 ‚Üí √óƒ∏√óƒ∑√óƒ¥
126819 ‚Üí √ê¬º√ê¬µ√ëƒ£√ëƒ§
134794 ‚Üí √¨¬£¬º√´ƒ¨ƒ∂
145365 ‚Üí √´ƒ§ƒØ
12408 ‚Üí ƒ†trees
62616 ‚Üí ƒ†desarrollo
139755 ‚Üí ƒ†√ëƒ¢√ê¬µ√ê¬∞√ê¬ª√ê¬∏√ê¬∑√ê¬∞√ëƒ®√ê¬∏
54965 ‚Üí study
12845 ‚Üí ƒ†spell
147963 ‚Üí √∞≈Åƒ∑¬∏
34158 ‚Üí ƒ†Redis
55688 ‚Üí ardash
95662 ‚Üí <Service
58376 ‚Üí _MATRIX
84214 ‚Üí ƒ†ubuntu
98517 ‚Üí (workspace
27567 ‚Üí ƒ†SqlDbType
131368 ‚Üí ƒ†tr√É¬°ch
64769 ‚Üí MERCHANTABILITY
84808 ‚Üí ƒ†Home

In [12]:
import random

def sample_vocab_by_language(tokenizer, N=10, language="en"):
    """
    üîπ Randomly select N tokens from the tokenizer vocabulary filtered by language.

    Parameters:
    - tokenizer: the HuggingFace tokenizer object
    - N: number of tokens to sample
    - language: target language, e.g., "en" for English, "fr" for French, "es" for Spanish

    Note:
    - Tokenizers often use subwords (BPE / SentencePiece), so tokens may not be full words.
    - This function approximates language filtering by checking character codes.
    """

    # 1Ô∏è‚É£ Get the full vocabulary: a dictionary {token_string: token_id}
    vocab = tokenizer.get_vocab()

    # 2Ô∏è‚É£ Filter tokens based on language
    if language == "en":
        # Keep only ASCII characters ‚Üí mostly English tokens/subwords
        filtered_tokens = [
            item for item in vocab.items()
            if all(ord(c) < 128 for c in item[0])
        ]
    elif language == "fr" or language == "es":
        # Keep tokens containing non-ASCII characters ‚Üí likely accented letters
        filtered_tokens = [
            item for item in vocab.items()
            if any(ord(c) > 127 for c in item[0])
        ]
    else:
        # No filtering, keep all tokens
        filtered_tokens = list(vocab.items())

    # 3Ô∏è‚É£ Randomly select N tokens from the filtered list
    # If filtered list is smaller than N, take all tokens
    sample_tokens = random.sample(filtered_tokens, min(N, len(filtered_tokens)))

    # 4Ô∏è‚É£ Print the results in a student-friendly way
    print(f"Randomly selected {len(sample_tokens)} '{language}' vocabulary tokens:\n")
    for token, token_id in sample_tokens:
        print(f"{token_id:>5} ‚Üí {token}")

# ==============================
# üîπ Example Usage for Students
# ==============================
# Show 10 English tokens
sample_vocab_by_language(tokenizer, N=10, language="en")

# Show 5 French tokens
sample_vocab_by_language(tokenizer, N=5, language="fr")

Randomly selected 10 'en' vocabulary tokens:

28546 ‚Üí ties
97068 ‚Üí RDD
55534 ‚Üí _Null
92952 ‚Üí +z
26846 ‚Üí asje
65561 ‚Üí _math
 5131 ‚Üí ox
86582 ‚Üí =key
55061 ‚Üí _UTF
 4417 ‚Üí .error
Randomly selected 5 'fr' vocabulary tokens:

113258 ‚Üí √§¬∏¬Ω√¶¬±≈Å
14198 ‚Üí ƒ†interpret
35973 ‚Üí ƒ†"`
93513 ‚Üí ƒ†Spoon
107829 ‚Üí √¶¬∂ƒ™√®¬¥¬π√®ƒ¢ƒß√ßƒºƒ¶
