In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM
import torch


messages = [
            {
                "role": "system",
                "content": "You are an expert in Dialogue State Tracking(DST) and python coding. And also you are a talented prompt engineer. \n Please generate the shots which are the most "
            },
            {
                "role": "user",
                "content": "import abc\nfrom dataclasses import dataclass\nfrom typing import Literal, Union\n\nPriceRange = Literal[\"dontcare\", \"cheap\", \"moderate\", \"expensive\"]\nHotelType = Literal[\"hotel\", \"guest house\", \"dontcare\"]\nOption = Literal[\"yes\", \"no\", \"dontcare\"]\nDayOfWeek = Literal[\"monday\", \"tuesday\", \"wednesday\", \"thursday\", \"friday\", \"saturday\", \"sunday\"]\nArea = Literal[\"dontcare\", \"centre\", \"east\", \"north\", \"south\", \"west\"]\n\n\n@dataclass\nclass Hotel:\n    name: str = None\n    price_range: PriceRange = None\n    type: HotelType = None\n    parking: Option = None\n    book_number_of_days: int = None\n    book_day: DayOfWeek = None\n    book_people: int = None\n    area: Area = None\n    stars: Union[int, Literal[\"dontcare\"]] = None  # between 0 and 5 or dontcare\n    internet: Option = None\n\n\n@dataclass\nclass Train:\n    destination: str = None\n    leave_from: str = None\n    day: DayOfWeek = None\n    book_people: int = None\n    depart_time: str = None  # hh:mm or dontcare\n    arrive_by_time: str = None  # hh:mm or dontcare\n\n\nAttractionType = Literal[\"architecture\", \"boat\", \"church\", \"cinema\", \"college\", \"concert hall\", \"entertainment\",\n                         \"hotspot\", \"multiple sports\", \"museum\", \"nightclub\", \"park\", \"special\", \"swimming pool\",\n                         \"theatre\", \"dontcare\"]\n\n\n@dataclass\nclass Attraction:\n    name: str = None\n    area: Area = None\n    type: AttractionType = None\n\n\n@dataclass\nclass Restaurant:\n    name: str = None\n    food_type: str = None\n    price_range: PriceRange = None\n    area: Area = None\n    book_time: str = None  # hh:mm or dontcare\n    book_day: DayOfWeek = None\n    book_people: int = None\n\n\n@dataclass\nclass Taxi:\n    destination: str = None\n    leave_from: str = None\n    depart_time: str = None  # hh:mm or dontcare\n    arrive_by_time: str = None  # hh:mm or dontcare\n\n\n@dataclass\nclass BeliefState:\n    hotel: Hotel = None\n    train: Train = None\n    attraction: Attraction = None\n    restaurant: Restaurant = None\n    taxi: Taxi = None\n\n\nclass DialogueAgent(abc.ABC):\n\n    state: BeliefState\n\n    @abc.abstractmethod\n    def find_hotel(self, name: str = None, price_range: PriceRange = None, type: HotelType = None,\n                    parking: Option = None, book_number_of_days: int = None, book_day: DayOfWeek = None,\n                    book_people: int = None, area: Area = None, stars: Union[int, Literal[\"dontcare\"]] = None,\n                    internet: Option = None) -> Hotel:\n        pass\n\n    @abc.abstractmethod\n    def find_train(self, destination: str = None, leave_from: str = None, day: DayOfWeek = None,\n                   book_people: int = None, depart_time: str = None, arrive_by_time: str = None) -> Train:\n        pass\n\n    @abc.abstractmethod\n    def find_attraction(self, name: str = None, area: Area = None, type: AttractionType = None) -> Attraction:\n        pass\n\n    @abc.abstractmethod\n    def find_restaurant(self, name: str = None, food_type: str = None, price_range: PriceRange = None,\n                        area: Area = None, book_time: str = None, book_day: DayOfWeek = None,\n                        book_people: int = None, ) -> Restaurant:\n        pass\n\n    @abc.abstractmethod\n    def find_taxi(self, destination: str = None, leave_from: str = None, depart_time: str = None,\n                  arrive_by_time: str = None) -> Taxi:\n        pass\n\n    def get_state(self) -> BeliefState:\n        return state\n\n\nif __name__ == '__main__':\n    agent = DialogueAgent()\n    state = BeliefState()\n\n    \n\n    #### Example 1 ####\n    agent.state = BeliefState.from_dict({\"hotel\": {\"area\": \"north\", \"parking\": \"yes\", \"price_range\": \"moderate\", \"stars\": 4, \"internet\": \"yes\"}})\n    print(\"system: there are 6 hotel -s in the north part of the city that fall in the moderate price range , have free wifi and parking , and have 4 stars . any other specification -s ?\")\n    print(\"user: i am not looking to book yet , can you tell me their price range -s ?\")\n"
            },
            {
                "role": "assistant",
                "content": "    agent.state.update({})  # no change\n"
            },
            {
                "role": "user",
                "content": "\n    #### Example 11 ####\n    agent.state = BeliefState.from_dict({})\n    print(\"user: i looking for information about a hotel in the moderate price range that include -s free wifi .\")\n"
            }
        ]


In [5]:
model_id = "meta-llama/Meta-Llama-3-70B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = LlamaForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Downloading shards:   0%|          | 0/30 [00:00<?, ?it/s]

model-00002-of-00030.safetensors:  93%|#########3| 4.34G/4.66G [00:00<?, ?B/s]

model-00003-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00005-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00006-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00007-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00008-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00009-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00010-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00011-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00012-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00013-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00014-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00015-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00016-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00017-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00018-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00019-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00020-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00021-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00022-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00023-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00024-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00025-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00026-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00027-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

model-00028-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00029-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00030-of-00030.safetensors:   0%|          | 0.00/2.10G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/30 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]



In [6]:
stop_sequences = ['--', '\n', ';', '#']
terminators =  [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")    
]
input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

sequences, logits = [], []
for _ in range(2):
    # result = openai.completions.create(**args)
    result = model.generate(
        input_ids,
        max_new_tokens=120,
        eos_token_id=terminators,
        do_sample=True,
        top_p=0.9,
        output_scores= True,
        return_dict_in_generate=True,
        stop_strings=stop_sequences,
        tokenizer=tokenizer,
        output_logits=True,
        )

    sequences.append(result['sequences'][0])
    logits.append(result['logits'])


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


OutOfMemoryError: CUDA out of memory. Tried to allocate 448.00 MiB. GPU 

In [76]:
[len(logit) for logit in logits]

[24, 20]

In [82]:
[torch.stack(logit).softmax(dim=-1).max(dim=-1)[0].log().sum().item() for logit in logits]

[-0.6234351992607117, -0.6008124947547913]

: 

In [74]:
dict(zip(
    [tokenizer.decode(seq[input_len:], skip_special_tokens=True) for seq in sequences],
    [torch.stack(logit).softmax(dim=-1).max(dim=-1)[0].log().sum(0).item() for logit in logits]))

{'agent.state.hotel = agent.find_hotel(price_range="moderate", internet="yes", type="hotel")': -0.6234351992607117,
 'agent.state.hotel = agent.find_hotel(price_range="moderate", internet="yes")': -0.6008124947547913}

In [62]:
result['sequences'].shape

torch.Size([1, 1915])

In [59]:
input_len = input_ids.shape[-1]

dict(
    zip(
        [tokenizer.decode(seq[input_len:], skip_special_tokens=True) for seq in result['sequences']] ,
        torch.stack(result['scores']).softmax(dim=-1).max(dim=-1)[0].log().sum(0)
        )
    )

{'agent.state.hotel = agent.find_hotel(price_range="moderate", internet="yes", type="hotel")': tensor(-0.6234, device='cuda:0')}

In [57]:
torch.stack(result['scores']).softmax(dim=-1).max(dim=-1)[0].log().sum(0)

tensor([-0.6234], device='cuda:0')

In [36]:
# list of torch.tensor to a tensor
from torch import nn
torch.max(nn.functional.softmax(torch.stack(result['logits']), dim=-1), dim=-1)[0].log().sum().item()

-0.6234351992607117

In [42]:
torch.stack(result['logits']).softmax(dim=-1).max(dim=-1)[0].log().sum().item()

-0.6234351992607117

In [None]:
from transformers import StoppingCriteria

stop_sequences =  [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")    
]
input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

result_2 = model.generate(
    input_ids,
    max_new_tokens=120,
    eos_token_id=stop_sequences,
    stop_strings=["\n\n", "#", "print("],
    do_sample=False,
    temperature=0.0,
    output_scores= True,
    return_dict_in_generate=True,
    tokenizer=tokenizer
)
tokenizer.decode(result_2['sequences'][0][input_ids.shape[-1]:], skip_special_tokens=True)

NameError: name 'result_2' is not defined

In [None]:
result_2['sequences'][0][input_ids.shape[-1]:].__len__()

In [None]:
result_2['sequences'][0][input_ids.shape[-1]:]

In [None]:
from torch import nn
a = 1
a *= max(nn.functional.softmax(logits) for logits in result_2['scores'])

In [None]:
a = 1
for logits in result_2['scores']:
    a*=torch.max(nn.functional.softmax(logits, dim=-1))

In [None]:
a

In [None]:
result_2['sequences'], outputs[0][input_ids.shape[-1]:], response

In [None]:
tokenizer.decode(result_2['sequences'][0][input_ids.shape[-1]:], skip_special_tokens=False), 

In [None]:
result_2['logits']