In [2]:
import os
import getpass
import subprocess
from pathlib import Path

In [3]:
# from huggingface_hub import hf_hub_download
from tqdm.auto import tqdm
import torch

In [4]:
import transformers
# transformers.logging.set_verbosity_error()
from transformers import LlamaTokenizer, LlamaForCausalLM, LlamaModel, LlamaConfig

In [5]:
import pprint
pp = pprint.PrettyPrinter(indent=4)

#### Llama2 Config

In [6]:
model_id = "meta-llama/Llama-2-13b-chat-hf"
model_cache = os.path.join(os.path.expanduser("~"), f"models_hf/{model_id.split('/')[0]}")

#### HuggingFace Config

In [7]:
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

#### BitsAndBytes Config

#### Tokenizer Initalization

In [8]:
tokenizer = LlamaTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth,
    legacy=False,
    add_bos_token=True,
    add_eos_token=False,
)



#### Model Initalization

In [30]:
model = LlamaForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
#     quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth,
)

The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.


TypeError: LlamaForCausalLM.__init__() got an unexpected keyword argument 'repetition_penalty'

In [29]:
llama2_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task='text-generation',
    temperature=0.0,  # [0.0, 1.0]
    do_sample=False, # Turns off logit-based sampling; enable if setting `temperature` > 0
    max_new_tokens=512,  # Max number of tokens to generate
    top_k=10,
    top_p=0.92,
    # repetition_penalty=1.02,
    torch_dtype=torch.float16,
    device_map="auto"
)

In [13]:
sentences =(
    "Our customers come from the following countries : UK , USA , Spain , France , Italy , Germany , China , Hong Kong , Sweden , Norway , Netherlands , Austria , Belgium , Switzerland , Czech Republic , Finland , Canada , Russia , Ukraine , Denmark , Ireland , South Korea and Liechtenstein ."
, "CapMan 's first real estate fund , which had a total investment capacity of ( EURO ) 500 million and closed in June 2005 , invested in commercial properties in the Helsinki metropolitan area ."
, "The value of the contracts is about EUR 3.3 mn ."
           )

In [15]:
def llama2_prompt_generator(sentences: list[str]):
    B_INST, E_INST = "[INST]", "[/INST]"
    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
    BOS, EOS = "<s>", "</s>"
    SYS_PROMPT = f""""Discard all the previous instructions. Below is an instruction that describes a task. Write a response that appropriately completes the request."""
    INST_PROMPT = f"""Behave like you are an expert sentence sentiment classifier. Classify the following sentence into 'NEGATIVE', 'POSITIVE', or 'NEUTRAL' class. Label 'NEGATIVE' if it is corresponding to negative sentiment, 'POSITIVE' if it is corresponding to positive sentiment, or 'NEUTRAL' if the sentiment is neutral. Provide the label in the first line and provide a short explanation in the second line. The sentence: """

    if not sentences or not all(isinstance(sentence, str) for sentence in sentences):
        raise ValueError("Input must be a non-empty list of strings.")

    prompts = []
    for sentence in sentences:
        prompt = B_INST + B_SYS + SYS_PROMPT + E_SYS + INST_PROMPT + sentence + E_INST
        # prompt_content = ' '.join(B_INST, B_SYS, SYS_PROMPT, E_SYS, INST_PROMPT, sentence, E_INST) # TODO: determine if whitespace hurts the results
        # formatted_prompt = f"{BOS}{B_INST} {prompt_content} {E_INST}{EOS}" # i do not think BOS/EOS should be used with HF tokenizer
        # formatted_prompt = f"""{prompt_content}"""
        prompts.append(prompt)

    return prompts

In [16]:
prompts = llama2_prompt_generator(sentences)

In [17]:
prompts[0]

"[INST]<<SYS>>\nDiscard all the previous instructions. Below is an instruction that describes a task. Write a response that appropriately completes the request.\n<</SYS>>\n\nBehave like you are an expert sentence sentiment classifier. Classify the following sentence into 'NEGATIVE', 'POSITIVE', or 'NEUTRAL' class. Label 'NEGATIVE' if it is corresponding to negative sentiment, 'POSITIVE' if it is corresponding to positive sentiment, or 'NEUTRAL' if the sentiment is neutral. Provide the label in the first line and provide a short explanation in the second line. The sentence: Our customers come from the following countries : UK , USA , Spain , France , Italy , Germany , China , Hong Kong , Sweden , Norway , Netherlands , Austria , Belgium , Switzerland , Czech Republic , Finland , Canada , Russia , Ukraine , Denmark , Ireland , South Korea and Liechtenstein .[/INST]"

In [20]:
%%time
sequences = []
for prompt in tqdm(prompts):
    sequences.append(llama2_pipeline(prompt))

  0%|          | 0/3 [00:00<?, ?it/s]

CPU times: user 25.8 s, sys: 404 ms, total: 26.2 s
Wall time: 26.2 s


In [43]:
sequences[0][0]['generated_text']

"[INST]<<SYS>>\nDiscard all the previous instructions. Below is an instruction that describes a task. Write a response that appropriately completes the request.\n<</SYS>>\n\nBehave like you are an expert sentence sentiment classifier. Classify the following sentence into 'NEGATIVE', 'POSITIVE', or 'NEUTRAL' class. Label 'NEGATIVE' if it is corresponding to negative sentiment, 'POSITIVE' if it is corresponding to positive sentiment, or 'NEUTRAL' if the sentiment is neutral. Provide the label in the first line and provide a short explanation in the second line. The sentence: Our customers come from the following countries : UK , USA , Spain , France , Italy , Germany , China , Hong Kong , Sweden , Norway , Netherlands , Austria , Belgium , Switzerland , Czech Republic , Finland , Canada , Russia , Ukraine , Denmark , Ireland , South Korea and Liechtenstein .[/INST]  Sure! Here's my response:\n\nLabel: NEUTRAL\n\nExplanation: The sentence provides a list of countries where the company's c

In [32]:
for seq in sequences:
    pp.pprint(seq[0]['generated_text'])

('[INST]<<SYS>>\n'
 'Discard all the previous instructions. Below is an instruction that '
 'describes a task. Write a response that appropriately completes the '
 'request.\n'
 '<</SYS>>\n'
 '\n'
 'Behave like you are an expert sentence sentiment classifier. Classify the '
 "following sentence into 'NEGATIVE', 'POSITIVE', or 'NEUTRAL' class. Label "
 "'NEGATIVE' if it is corresponding to negative sentiment, 'POSITIVE' if it is "
 "corresponding to positive sentiment, or 'NEUTRAL' if the sentiment is "
 'neutral. Provide the label in the first line and provide a short explanation '
 'in the second line. The sentence: Our customers come from the following '
 'countries : UK , USA , Spain , France , Italy , Germany , China , Hong Kong '
 ', Sweden , Norway , Netherlands , Austria , Belgium , Switzerland , Czech '
 'Republic , Finland , Canada , Russia , Ukraine , Denmark , Ireland , South '
 "Korea and Liechtenstein .[/INST]  Sure! Here's my response:\n"
 '\n'
 'Label: NEUTRAL\n'
 '\n'
 