In [1]:
import torch
from llm2vec import LLM2Vec
import json

from transformers import AutoTokenizer, AutoModel, AutoConfig
from peft import PeftModel
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '4,5,6,7'

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
access_token = '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'

In [3]:
ENV_DESCRIPTION = "Clean up is a public goods dilemma in which agents get a reward for consuming apples,\n but must use a cleaning beam to clean a river in order for apples to grow. While an agent is cleaning the river, other agents can exploit it by consuming the apples that appear.\n\n"
INSTRUCTION = "The environment is a 7 x 7 grid which comprises water and apples and 2 agents.\n Environment is indexed from 0-6. Apples can grow in positions [[5,0],[5,1],[5,2],[5,3],[5,4],[5,5],[5,6],[6,0],[6,1],[6,2],[6,3],[6,4],[6,5],[6,6]].\n Water is in [[0,0],[0,1],[0,2],[1,0],[1,1],[2,0],[2,1],[3,0]]. The agents can move in the 4 cardinal directions. The agents can consume apples and clean the river. The agents must learn to cooperate to maximize their reward.\n\n"
INSTRUCTION += "Your task is to encode the given trajectory that is obtained from a policy model for each agent.\n The trajectory is in the format : (Step No.) - Agent 1 (x,y,orientation(NSEW),action,reward) - Agent 2 (x,y,orientation,action,reward) - Clean Water[(x,y)] - Unclean Water [(x,y)] - Apples[(x,y)]. Note that Clean Water, Unclean Water and Apples are a list of coordinates. The trajectory is a list of such steps.\n\n"

In [4]:
def get_water_coords(observations):
    water_coords = [[0,0],[0,1],[0,2],[1,0],[1,1],[2,0],[2,1],[3,0]]
    clean_list = []
    unclean_list = []
    for j, clean in enumerate(observations):
        if clean:
            clean_list.append(water_coords[j])
        else:
            unclean_list.append(water_coords[j])
    return clean_list, unclean_list

def get_apple_coords(observations):
    growable_coords = [[5,0],[5,1],[5,2],[5,3],[5,4],[5,5],[5,6],[6,0],[6,1],[6,2],[6,3],[6,4],[6,5],[6,6]]
    apple_list = []
    for j, apple in enumerate(observations):
        if apple:
            apple_list.append(growable_coords[j])
    return apple_list

def convert_orientation_to_text(orientation):
    # print(orientation)
    if orientation[0]:
        return "North"
    elif orientation[2]:
        return "South"
    elif orientation[1]:
        return "East"
    elif orientation[3]:
        return "West"
    else:
        raise ValueError("Invalid orientation value")
    
def convert_transition_to_text(transition):
    actions = transition['action']
    observations = transition['observation'][0]
    agent1_pos = observations[0:2]
    agent1_orientation = convert_orientation_to_text(observations[2:6])
    agent2_pos = observations[6:8]
    agent2_orientation = convert_orientation_to_text(observations[8:12])
    
    clean_water, unclean_water = get_water_coords(observations[12:20])
    apple_list = get_apple_coords(observations[20:34])
    rewards = transition['reward']
    # print(actions)
    # print(rewards)
    
    text = "Agent 1 ({} ,{}, {}, {}, {} ".format(agent1_pos[0], agent1_pos[1], agent1_orientation, actions[0], rewards[0])
    text += " - Agent 2 ({} ,{}, {}, {}, {} )".format(agent2_pos[0], agent2_pos[1], agent2_orientation, actions[1], rewards[1])
    text += " - Clean Water: " + str(clean_water)
    text += " - Unclean Water: " + str(unclean_water)
    text += " - Apples: " + str(apple_list) + "  "
    return text
    
def convert_traj_to_text(traj):
    text = ""
    for i, transition in enumerate(traj):
        text += "Step" + str(i) + " - " + convert_transition_to_text(transition) + "\n"
        
    return text

def get_full_trajectory_text(traj):
    text = convert_traj_to_text(traj)
    return ENV_DESCRIPTION + INSTRUCTION + text + "."

def read_trajectory(filename):
    with open(filename, 'r') as file:
        data = json.load(file)
    return data

In [5]:
traj = read_trajectory('Traj/0325-L-BRDiv-Clean-seed4_60m_collection_tperupdate_tolerance6_rl_project0/0_0_trajectory.json')
text = get_full_trajectory_text(traj)
print(text)

Clean up is a public goods dilemma in which agents get a reward for consuming apples,
 but must use a cleaning beam to clean a river in order for apples to grow. While an agent is cleaning the river, other agents can exploit it by consuming the apples that appear.

The environment is a 7 x 7 grid which comprises water and apples and 2 agents.
 Environment is indexed from 0-6. Apples can grow in positions [[5,0],[5,1],[5,2],[5,3],[5,4],[5,5],[5,6],[6,0],[6,1],[6,2],[6,3],[6,4],[6,5],[6,6]].
 Water is in [[0,0],[0,1],[0,2],[1,0],[1,1],[2,0],[2,1],[3,0]]. The agents can move in the 4 cardinal directions. The agents can consume apples and clean the river. The agents must learn to cooperate to maximize their reward.

Your task is to encode the given trajectory that is obtained from a policy model for each agent.
 The trajectory is in the format : (Step No.) - Agent 1 (x,y,orientation(NSEW),action,reward) - Agent 2 (x,y,orientation,action,reward) - Clean Water[(x,y)] - Unclean Water [(x,y)] 

In [6]:
def embed_trajectory(model, traj):
    if isinstance(traj, list):
        text = []
        for traj_ in traj:
            text.append(get_full_trajectory_text(traj_))
            print("Length of Text: {}".format(len(text)))
        return model.encode(text)
    else:
        text = get_full_trajectory_text(traj)
        print("Length of Text: {}".format(len(text)))
        return model.encode([text])

def init_model(model_type):
    # model = LLM2Vec.from_pretrained(
    #     model_type,
    #     peft_model_name_or_path="McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised",
    #     device_map="cuda" if torch.cuda.is_available() else "cpu",
    #     torch_dtype=torch.float32,
    # )
    
    # Loading base Mistral model, along with custom code that enables bidirectional connections in decoder-only LLMs. MNTP LoRA weights are merged into the base model.
    with torch.no_grad():
        tokenizer = AutoTokenizer.from_pretrained(
            "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
            token = access_token
        )
        config = AutoConfig.from_pretrained(
            "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp", trust_remote_code=True,
            token = access_token
        )
        model = AutoModel.from_pretrained(
            "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
            trust_remote_code=True,
            config=config,
            torch_dtype=torch.bfloat16,
            token = access_token
        )
        model = PeftModel.from_pretrained(
            model,
            "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
            token = access_token
        )
        model = model.merge_and_unload()  # This can take several minutes on cpu

        # # Loading supervised model. This loads the trained LoRA weights on top of MNTP model. Hence the final weights are -- Base model + MNTP (LoRA) + supervised (LoRA).
        # model = PeftModel.from_pretrained(
        #     model, "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised",
        #     token = access_token
        #)

    # Wrapper for encoding and pooling operations
    l2v = LLM2Vec(model, tokenizer, pooling_mode="last_token", max_length=128000)
    return l2v


In [7]:
model = init_model("McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp")

A new version of the following files was downloaded from https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp:
- attn_mask_utils.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp:
- modeling_mistral_encoder.py
- attn_mask_utils.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
Downloading shards: 100%|██████████| 3/3 [00:00<00:00, 15.41it/s]
Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00,  1.74it/s]
Some weights of the model checkpoint at mistralai/Mistral-7B-Instruct-v0.2 were not used when initializing MistralEncoderModel: ['lm_head.weight']
- This IS expected if you are initializing MistralEncoderModel from the checkpoint of 

In [8]:
traj00 = read_trajectory('Traj/optimal/0_0_trajectory.json')
print(len(traj00))

500


In [19]:

traj00 = read_trajectory('Traj/0325-L-BRDiv-Clean-seed4_60m_collection_tperupdate_tolerance6_rl_project40/0_0_trajectory.json')
traj01 = read_trajectory('Traj/0325-L-BRDiv-Clean-seed4_60m_collection_tperupdate_tolerance6_rl_project45/0_0_trajectory.json')
traj10 = read_trajectory('Traj/0325-L-BRDiv-Clean-seed4_60m_collection_tperupdate_tolerance6_rl_project0/1_0_trajectory.json')
traj11 = read_trajectory('Traj/0325-L-BRDiv-Clean-seed4_60m_collection_tperupdate_tolerance6_rl_project50/1_1_trajectory.json')

traj00_embedding = embed_trajectory(model, [traj00])
traj01_embedding = embed_trajectory(model, [traj01])
traj10_embedding = embed_trajectory(model, [traj10])
traj11_embedding = embed_trajectory(model, [traj11])

# traj_all_embedding = embed_trajectory(model, [traj00, traj01, traj10, traj11,
#                                               traj00, traj01, traj10, traj11,
#                                               traj00, traj01, traj10, traj11,
#                                               traj00, traj01, traj10, traj11])


Length of Text: 1


100%|██████████| 1/1 [00:00<00:00, 13888.42it/s]


Length of Text: 1


100%|██████████| 1/1 [00:00<00:00, 15534.46it/s]


Length of Text: 1


100%|██████████| 1/1 [00:00<00:00, 15363.75it/s]


Length of Text: 1


100%|██████████| 1/1 [00:00<00:00, 8630.26it/s]


In [15]:
print(traj_all_embedding.shape)

NameError: name 'traj_all_embedding' is not defined

In [20]:
print(traj00_embedding)
print(traj01_embedding.shape)
print(traj10_embedding.shape)
matrix = torch.stack([traj00_embedding, traj01_embedding, traj10_embedding, traj11_embedding],dim=0).squeeze()
matrix_norm = torch.nn.functional.normalize(matrix,p=2,dim=1)
cos_sim = torch.mm(matrix_norm, matrix_norm.transpose(0,1))
print(cos_sim.cpu().numpy())

tensor([[ 2.7500, -2.0938, -2.6719,  ...,  7.5625, -0.8086, -3.3594]])
torch.Size([1, 4096])
torch.Size([1, 4096])
[[1.0000006  1.0000006  0.5006818  0.60177374]
 [1.0000006  1.0000006  0.5006818  0.60177374]
 [0.5006818  0.5006818  1.000001   0.7605797 ]
 [0.60177374 0.60177374 0.7605797  1.0000006 ]]


In [None]:
# Encoding queries using instructions
instruction = (
    "Given a web search query, retrieve relevant passages that answer the query:"
)
queries = [
    [instruction, "how much protein should a female eat"],
    [instruction, "summit define"],
]
q_reps = model.encode(queries)

# Encoding documents. Instruction are not required for documents
documents = [
    "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
    "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.",
]
d_reps = model.encode(documents)
print(d_reps.size())
# Compute cosine similarity
q_reps_norm = torch.nn.functional.normalize(q_reps, p=2, dim=1)
d_reps_norm = torch.nn.functional.normalize(d_reps, p=2, dim=1)

cos_sim = torch.mm(q_reps_norm, d_reps_norm.transpose(0, 1))

print(cos_sim)

100%|██████████| 1/1 [00:00<00:00, 10618.49it/s]


100%|██████████| 1/1 [00:00<00:00, 8128.50it/s]


torch.Size([2, 4096])
tensor([[0.5483, 0.5006],
        [0.4225, 0.5623]])
