In [1]:
import torch

def frobenius_distance(A, B):
    # Returns the Euclidean distance between two matrices
    return torch.linalg.matrix_norm(A - B, ord='fro')

**DeepSeek-R1-Distill-Qwen-1.5B**

In [2]:
import torch
import os
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM

#should try other maybe larger models
tokenizer_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
model_name = tokenizer_name

# Load the pre-trained model
model = AutoModelForCausalLM.from_pretrained(model_name)

# extract embedding
embeddings = model.model.embed_tokens.weight

# extract unembedding
unembedding = model.lm_head.weight

print(f"Embedding matrix Shape:   {embeddings.shape}")
print(f"Unembedding matrix Shape: {unembedding.shape}")

#comparing the embedding matrix and the unembedding matrix
if embeddings.shape == unembedding.t().shape:
    print("- The matrices have transposed shapes.")
elif embeddings.shape == unembedding.shape:
    print("- The matrices have the SAME shape (not transposed).")
else:
    print("- The matrices have different shapes.")
    
if embeddings is unembedding:
    print("- Weights are TIED (Same object in memory).")
elif torch.allclose(embeddings, unembedding, atol=1e-5):
    print("- Weights are distinct objects but are the exact same.")
else:
    print("- Weights are UNTIED and distinct.")

print(f"DeepSeek-R1-Distill-Qwen-1.5B Config 'tie_word_embeddings':     {model.config.tie_word_embeddings}")

  from .autonotebook import tqdm as notebook_tqdm


Embedding matrix Shape:   torch.Size([151936, 1536])
Unembedding matrix Shape: torch.Size([151936, 1536])
- The matrices have the SAME shape (not transposed).
- Weights are UNTIED and distinct.
DeepSeek-R1-Distill-Qwen-1.5B Config 'tie_word_embeddings':     False


In [3]:
distance = frobenius_distance(embeddings, unembedding)
print(f"The frobenius distance from the embedding to the unembedding matrix of DeepSeek-R1-Distill-Qwen-1.5B is {distance}.")

emb_mat_norm = torch.linalg.matrix_norm(embeddings, ord='fro')
rel_distance = distance/emb_mat_norm

print(f"The relative frobenius distance from the embedding to the unembedding matrix of DeepSeek-R1-Distill-Qwen-1.5B is {rel_distance}.")

The frobenius distance from the embedding to the unembedding matrix of DeepSeek-R1-Distill-Qwen-1.5B is 106.73193359375.
The relative frobenius distance from the embedding to the unembedding matrix of DeepSeek-R1-Distill-Qwen-1.5B is 0.2553161084651947.


**Qwen/Qwen3-4B-Instruct-2507**

In [4]:
import torch
import os
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM

#should try other maybe larger models
tokenizer_name = "Qwen/Qwen3-4B-Instruct-2507"
model_name = tokenizer_name

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

# Load the pre-trained model
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto")

# extract embedding
embeddings = model.model.embed_tokens.weight

# extract unembedding
unembedding = model.get_output_embeddings().weight

print(f"Embedding matrix Shape:   {embeddings.shape}")
print(f"Unembedding matrix Shape: {unembedding.shape}")

#comparing the embedding matrix and the unembedding matrix
if embeddings.shape == unembedding.t().shape:
    print("- The matrices have transposed shapes.")
elif embeddings.shape == unembedding.shape:
    print("- The matrices have the SAME shape (not transposed).")
else:
    print("- The matrices have different shapes.")
    
if embeddings is unembedding:
    print("- Weights are TIED (Same object in memory).")
elif torch.allclose(embeddings, unembedding, atol=1e-5):
    print("- Weights are distinct objects but are the exact same.")
else:
    print("- Weights are UNTIED and distinct.")

print(f"Qwen/Qwen3-4B-Instruct-2507 Config 'tie_word_embeddings':     {model.config.tie_word_embeddings}")

`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|██████████| 3/3 [00:08<00:00,  2.94s/it]
Some parameters are on the meta device because they were offloaded to the disk.


Embedding matrix Shape:   torch.Size([151936, 2560])
Unembedding matrix Shape: torch.Size([151936, 2560])
- The matrices have the SAME shape (not transposed).
- Weights are TIED (Same object in memory).
Qwen/Qwen3-4B-Instruct-2507 Config 'tie_word_embeddings':     True


In [5]:
distance = frobenius_distance(embeddings, unembedding)
print(f"The frobenius distance from the embedding to the unembedding matrix of Qwen/Qwen3-4B-Instruct-2507 is {distance}.")

The frobenius distance from the embedding to the unembedding matrix of Qwen/Qwen3-4B-Instruct-2507 is 0.0.
