In [47]:
from transformers import Qwen2Model, AutoTokenizer, Qwen2ForCausalLM
import torch

llm_path = "Qwen/Qwen2.5-7B"
tokenizer = AutoTokenizer.from_pretrained(llm_path)
llm_decoder = Qwen2ForCausalLM.from_pretrained(llm_path)

sentence = "Hi How are you? My name is Al"
tokens = tokenizer(sentence, return_tensors="pt", padding="max_length", max_length=100, truncation=True)

with torch.no_grad():
    output = llm_decoder(**tokens)


Downloading shards: 100%|███████████████████████████████████████████████████| 4/4 [06:03<00:00, 90.78s/it]
Loading checkpoint shards: 100%|████████████████████████████████████████████| 4/4 [00:10<00:00,  2.73s/it]


In [54]:
from transformers import AutoTokenizer, Qwen2ForCausalLM
import torch

def analyze_predictions(sentence, start_pos, end_pos, llm_path="Qwen/Qwen2.5-7B"):
    # Initialize tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(llm_path)
    model = Qwen2ForCausalLM.from_pretrained(llm_path)
    
    # Tokenize input with padding
    tokens = tokenizer(sentence, 
                      return_tensors="pt", 
                      padding="max_length", 
                      max_length=100, 
                      truncation=True)
    
    # Get the padding token ID
    pad_token_id = tokenizer.pad_token_id
    
    # Get actual input tokens for comparison
    input_ids = tokens['input_ids'][0].tolist()
    actual_length = len(tokenizer.encode(sentence))
    
    print(f"Original sentence: {sentence}")
    print(f"Actual token length: {actual_length}")
    print(f"Padding token ID: {pad_token_id}")
    print("\nInput tokens:")
    print(tokenizer.convert_ids_to_tokens(input_ids[:actual_length]))
    
    # Generate predictions
    with torch.no_grad():
        outputs = model(**tokens)
    
    # Analyze predictions for specified range
    predictions = []
    probabilities = []
    
    for i in range(start_pos, end_pos):
        logits = outputs.logits[0, i, :]
        probs = torch.softmax(logits, dim=0)
        
        # Get top 3 predictions
        top_probs, top_tokens = torch.topk(probs, 3)
        
        pred_token = torch.argmax(logits).item()
        pad_prob = probs[pad_token_id].item()
        
        predictions.append({
            'position': i,
            'predicted_token': tokenizer.decode([pred_token]),
            'predicted_id': pred_token,
            'pad_probability': pad_prob,
            'top_3': [(tokenizer.decode([t.item()]), p.item()) 
                     for t, p in zip(top_tokens, top_probs)]
        })
    
    return predictions

# Analyze both ranges from your example
sentence = "Hi How are you? My name is Al"
early_preds = analyze_predictions(sentence, 0, 9)
late_preds = analyze_predictions(sentence, 90, 100)

print("\nEarly predictions (positions 0-8):")
for pred in early_preds:
    print(f"\nPosition {pred['position']}:")
    print(f"Predicted token: '{pred['predicted_token']}'")
    print(f"Pad token probability: {pred['pad_probability']:.6f}")
    print("Top 3 predictions:")
    for token, prob in pred['top_3']:
        print(f"  '{token}': {prob:.6f}")

print("\nLate predictions (positions 90-99):")
for pred in late_preds:
    print(f"\nPosition {pred['position']}:")
    print(f"Predicted token: '{pred['predicted_token']}'")
    print(f"Pad token probability: {pred['pad_probability']:.6f}")
    print("Top 3 predictions:")
    for token, prob in pred['top_3']:
        print(f"  '{token}': {prob:.6f}")

Loading checkpoint shards: 100%|████████████████████████████████████████████| 4/4 [00:01<00:00,  2.17it/s]


Original sentence: Hi How are you? My name is Al
Actual token length: 9
Padding token ID: 151643

Input tokens:
['Hi', 'ĠHow', 'Ġare', 'Ġyou', '?', 'ĠMy', 'Ġname', 'Ġis', 'ĠAl']


Loading checkpoint shards: 100%|████████████████████████████████████████████| 4/4 [00:06<00:00,  1.53s/it]


Original sentence: Hi How are you? My name is Al
Actual token length: 9
Padding token ID: 151643

Input tokens:
['Hi', 'ĠHow', 'Ġare', 'Ġyou', '?', 'ĠMy', 'Ġname', 'Ġis', 'ĠAl']

Early predictions (positions 0-8):

Position 0:
Predicted token: ' I'
Pad token probability: 0.000032
Top 3 predictions:
  ' I': 0.200899
  ',': 0.121025
  ' i': 0.086963

Position 1:
Predicted token: ' can'
Pad token probability: 0.000000
Top 3 predictions:
  ' can': 0.354732
  ' to': 0.200555
  ' do': 0.155959

Position 2:
Predicted token: ' you'
Pad token probability: 0.000000
Top 3 predictions:
  ' you': 0.937362
  ' u': 0.015357
  ' You': 0.013532

Position 3:
Predicted token: ' doing'
Pad token probability: 0.000565
Top 3 predictions:
  ' doing': 0.119657
  '?
': 0.113948
  '?': 0.109641

Position 4:
Predicted token: ' I'
Pad token probability: 0.001637
Top 3 predictions:
  ' I': 0.275233
  ' My': 0.040477
  ' Can': 0.037609

Position 5:
Predicted token: ' name'
Pad token probability: 0.000003
Top 3 pred

In [53]:
tokenizer.decode(tokenizer.eos_token_id)

'<|endoftext|>'

In [48]:
tokens.input_ids

tensor([[ 13048,   2585,    525,    498,     30,   3017,    829,    374,   1674,
         151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
         151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
         151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
         151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
         151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
         151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
         151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
         151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
         151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
         151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
         151643]])

In [50]:
tokenizer.decode(torch.argmax(output.logits[:,-1,:]))

' I'

In [51]:
tokenizer.decode(torch.argmax(output.logits[:,8,:]))

'ina'

In [39]:
output.logits.size()

torch.Size([1, 100, 152064])

In [42]:
out_tokens = []
for i in range(9):
    a = torch.argmax(output.logits[:,i,:])
    a = a.detach().cpu().numpy()
    out_tokens.append(a)
tokenizer.decode(out_tokens)

'， are you? I name is John1'

In [41]:
out_tokens = []
for i in range(90, 100):
    a = torch.argmax(output.logits[:,i,:])
    a = a.detach().cpu().numpy()
    out_tokens.append(a)
tokenizer.decode(out_tokens)

' .  is is., My" '