In [None]:
import torch as t
from nnsight import LanguageModel
import argparse
import itertools
import os
import random
import json
import torch.multiprocessing as mp
import time
import huggingface_hub
from datasets import config
from transformers import GPTNeoXForCausalLM, AutoTokenizer


model_name = "EleutherAI/pythia-1b"
device = "cuda"
hf_model = GPTNeoXForCausalLM.from_pretrained(
    model_name,
    revision="step0",
    cache_dir="./pythia-1b/step0",
).to(device)

model = LanguageModel(model_name, dispatch=True, device_map=device)

In [None]:
# print(model)
# print(hf_model)

In [None]:
import torch

def copy_weights(source_model, target_model):
    """
    Copy weights from source model to target model, handling the slight structural differences
    between HuggingFace and LanguageModel implementations.
    """
    # Create state dict mapping
    source_state = source_model.state_dict()
    target_state = target_model.state_dict()
    
    # Copy matching parameters
    for target_key in target_state:
        if target_key in source_state:
            target_state[target_key].copy_(source_state[target_key])
    
    # Load the updated state dict
    target_model.load_state_dict(target_state)

def verify_weights(model1, model2, test_inputs):
    """
    Verify that two models produce the same logits given the same input.
    Returns True if outputs match within tolerance, False otherwise.
    
    Args:
        model1: First model (HuggingFace implementation)
        model2: Second model (LanguageModel implementation)
        test_inputs: List of input strings to test
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    # Set both models to eval mode
    model1.eval()
    model2.eval()
    
    matches = []
    with torch.no_grad():
        for input_text in test_inputs:
            # Tokenize input
            inputs = tokenizer(input_text, return_tensors="pt").to(device)
            
            # Get outputs from both models
            output1 = model1(**inputs).logits
            output2 = model2.forward(**inputs).logits
            
            # Compare outputs
            max_diff = torch.max(torch.abs(output1 - output2))
            matches.append(max_diff < 1e-5)
            
            print(f"Max difference in logits for '{input_text}': {max_diff:.2e}")
    
    return all(matches)

# Example usage
test_inputs = [
    "The quick brown fox",
    "Machine learning is",
    "Python programming"
]

# Copy weights
print("Copying weights...")
copy_weights(hf_model, model)

del hf_model

# Verify the transfer
print("\nVerifying weight transfer...")
success = verify_weights(hf_model, model, test_inputs)
print(f"\nWeight transfer {'successful' if success else 'failed'}")

In [None]:
import torch as t
# from nnsight import LanguageModel
import argparse
import itertools
import os
import random
import json
import torch.multiprocessing as mp
import time
import huggingface_hub
from datasets import config
import transformer_lens.loading_from_pretrained
from transformers import GPTNeoXForCausalLM, AutoTokenizer
from transformer_lens import HookedTransformer
import transformer_lens


model_name = "EleutherAI/pythia-1b"
device = "cuda"
hf_model = GPTNeoXForCausalLM.from_pretrained(
    model_name,
    revision="step0",
    cache_dir="./pythia-1b/step0",
).to(device, dtype=t.bfloat16)


model_name_2 = f"{model_name}"

model = HookedTransformer.from_pretrained_no_processing(model_name=model_name_2, checkpoint_index=0, dtype="bfloat16")


model = model.to(device)

In [None]:
test_input = "Hello, my name is"
tokenizer = AutoTokenizer.from_pretrained(model_name)
inputs = tokenizer(test_input, return_tensors="pt").to(device)

output1 = hf_model(**inputs).logits
print(output1)

output2 = model(inputs["input_ids"])

print(output2)
print(output1.shape, output2.shape)
print(output2.dtype)

print(t.allclose(output1, output2, atol=1e-3))

diff = t.abs(output1 - output2)
print(diff.max())