In [3]:
import random
import torch
import copy

from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig

In [4]:
gpu = False

In [None]:
if gpu:
    torch.set_default_device("cuda")
    
model = AutoModelForCausalLM.from_pretrained("huggyllama/llama-7b")
tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b")
config = AutoConfig.from_pretrained("huggyllama/llama-7b")

In [6]:
def test_model(tokenizer, model, queries, max_length=50, gpu=True):
    for query in queries:
        print("##########################")
        inputs = tokenizer(query, return_tensors="pt", return_attention_mask=False)
        if gpu:
            inputs = inputs.to("cuda:0")
        out = model.generate(**inputs, max_length=max_length)
        text = tokenizer.batch_decode(out)[0]
        print(text)

In [7]:
test_query = ["User: Hi, how are you today? AI:"]
animal_queries = [
    "What sound does the dog make?\n\nAnswer:",
    "What is a dog?\n\nAnswer:",
    "What sound does the cat make?\n\nAnswer:",
    "What is a cat?\n\nAnswer:",
    "What is a the difference between a cat and a dog?\n\nAnswer:",
]
city_queries = [
    "Where is London?\n\nAnswer:",
    "Where is Paris?\n\nAnswer:",
    "What is the captial of the United Kingdom?\n\nAnswer:",
    "What is the capital of France?\n\nAnswer:",
]

In [8]:
model = model.eval()

In [9]:
emb_weights = model.get_input_embeddings().weight
old_embedding = model.get_input_embeddings()
new_embedding = copy.deepcopy(old_embedding)

## Add noise to embeddings

In [20]:
new_embedding.weight = torch.nn.Parameter(( torch.randn_like(emb_weights) * 0.01 ) + emb_weights)

In [47]:
model.set_input_embeddings(old_embedding)
test_model(tokenizer, model, test_query, gpu=gpu)

##########################
<s> User: Hi, how are you today? AI: I'm fine, and you? User: I'm fine too. AI: I'm glad to hear that. User: I'm going to the cinema.


In [52]:
model.set_input_embeddings(new_embedding)
test_model(tokenizer, model, test_query, gpu=gpu)

##########################
<s> User: Hi, how are you today? AI: I'm fine, and you?
User: I'm fine, and you?
User: I'm fine, and you? AI: I'm fine


## Swap embedding word representations

In [10]:
word_inputs = tokenizer(["Dog dog", "Cat cat"], return_tensors="pt", return_attention_mask=False)
if gpu:
    word_inputs = word_inputs.to("cuda:0")
dog_indexes = word_inputs["input_ids"][0][1:]
cat_indexes = word_inputs["input_ids"][1][1:]
print(word_inputs["input_ids"])

tensor([[    1, 18776, 11203],
        [    1, 10459,  6635]])


In [18]:
dog_weights = old_embedding.weight[dog_indexes]
cat_weights = old_embedding.weight[cat_indexes]

new_weights = old_embedding.weight.clone()
new_weights[dog_indexes] = cat_weights
new_weights[cat_indexes] = dog_weights

new_embedding.weight = torch.nn.Parameter( new_weights )

In [19]:
model.set_input_embeddings(new_embedding)
test_model(tokenizer, model, animal_queries, gpu=gpu)

##########################
<s> What sound does the dog make?

Answer: The cat barks, the cat barks, the dog meows.

Comment: I'm not sure that's what the OP is looking for.

Comment:
##########################
<s> What is a dog?

Answer: A cat is a mammal, a cat is a canine, a cat is a carnivore, a dog is a mammal, a cat is a canine, a
##########################
<s> What sound does the cat make?

Answer: The dog makes a \strong{meow} and the dog makes a \strong{mew}.

The dog makes a \strong{meow} and the cat makes a
##########################
<s> What is a cat?

Answer: A dog is a cat.

Comment: I'm not sure that's what the OP is asking.

Comment: @Jim: I think it is.

Comment
##########################
<s> What is a the difference between a cat and a dog?

Answer: A dog is a mammal, a dog is a domesticated animal, a cat is a mammal, a dog is a domesticated animal, a
