# Setup

In [1]:
import huggingface_hub
huggingface_hub.login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svâ€¦

In [2]:
from datasets import load_dataset

dataset = load_dataset("financial_phrasebank", "sentences_allagree", split='train', trust_remote_code=True)

#dataset = load_dataset("financial_phrasebank", "sentences_allagree", trust_remote_code=True)

# create a new column with the numeric label verbalised as label_text (e.g. "positive" instead of "0")
label_map = {
    i: label_text 
    for i, label_text in enumerate(dataset.features["label"].names)
}

def add_label_text(example):
    example["label_text"] = label_map[example["label"]]
    return example

dataset = dataset.map(add_label_text)

print(dataset)
# Dataset({
#    features: ['sentence', 'label', 'label_text'],
#    num_rows: 2264
#})

Dataset({
    features: ['sentence', 'label', 'label_text'],
    num_rows: 2264
})


## Prompt Engineering

In [3]:
prompt_financial_sentiment = """\
You are a highly qualified expert trained to annotate machine learning training data.

Your task is to analyze the sentiment in the TEXT below from an investor perspective and label it with only one the three labels:
positive, negative, or neutral.

Base your label decision only on the TEXT and do not speculate e.g. based on prior knowledge about a company. 

Do not provide any explanations and only respond with one of the labels as one word: negative, positive, or neutral

Examples:
Text: Operating profit increased, from EUR 7m to 9m compared to the previous reporting period.
Label: positive
Text: The company generated net sales of 11.3 million euro this year.
Label: neutral
Text: Profit before taxes decreased to EUR 14m, compared to EUR 19m in the previous period.	
Label: negative

Your TEXT to analyse:
TEXT: {text}
Label: """


In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")

chat_financial_sentiment = [{"role": "user", "content": prompt_financial_sentiment}]

prompt_financial_sentiment = tokenizer.apply_chat_template(chat_financial_sentiment, tokenize=False)

# The prompt now includes special tokens: '<s>[INST] You are a highly qualified expert ...  [/INST]'


In [6]:
prompt_financial_sentiment

'<s> [INST] You are a highly qualified expert trained to annotate machine learning training data.\n\nYour task is to analyze the sentiment in the TEXT below from an investor perspective and label it with only one the three labels:\npositive, negative, or neutral.\n\nBase your label decision only on the TEXT and do not speculate e.g. based on prior knowledge about a company. \n\nDo not provide any explanations and only respond with one of the labels as one word: negative, positive, or neutral\n\nExamples:\nText: Operating profit increased, from EUR 7m to 9m compared to the previous reporting period.\nLabel: positive\nText: The company generated net sales of 11.3 million euro this year.\nLabel: neutral\nText: Profit before taxes decreased to EUR 14m, compared to EUR 19m in the previous period.\t\nLabel: negative\n\nYour TEXT to analyse:\nTEXT: {text}\nLabel:  [/INST]'

In [6]:
import torch 
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline 

torch.random.manual_seed(0) 
model = AutoModelForCausalLM.from_pretrained( 
    "microsoft/Phi-3-mini-4k-instruct",  
    device_map="cuda",  
    torch_dtype="auto",  
    trust_remote_code=True,  
) 

tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") 

messages = [ 
    #{"role": "system", "content": "You are a helpful AI assistant."}, 
    {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"}, 
    #{"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."}, 
    #{"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"}, 
] 

pipe = pipeline( 
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
) 

generation_args = { 
    "max_new_tokens": 500, 
    "return_full_text": False, 
    "temperature": 0.0, 
    "do_sample": False, 
} 

output = pipe(messages, **generation_args) 
print(output[0]['generated_text'])


`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


 Certainly! Bananas and dragonfruits can be combined in various delicious ways. Here are some creative ideas for incorporating both fruits into your meals or snacks:

1. Smoothie: Blend together a ripe banana, a few slices of dragon fruit, a handful of spinach or kale, a splash of almond milk, and a tablespoon of honey or agave syrup for sweetness. Add a scoop of your favorite protein powder or a handful of ice for a refreshing and nutritious smoothie.

2. Fruit Salad: Slice a ripe banana and a few pieces of dragon fruit, and combine them with other fruits like strawberries, blueberries, and kiwi. Toss the fruits with a drizzle of honey and a squeeze of lime juice for a colorful and flavorful fruit salad.

3. Tropical Salsa: Dice a ripe banana and a few pieces of dragon fruit, and combine them with diced mango, pineapple, and red bell pepper. Add a squeeze of lime juice, a drizzle of honey, and a sprinkle of chopped cilantro for a sweet and tangy tropical salsa. Serve with tortilla chi

## API Calls

In [5]:
import os
import requests

# Choose your LLM annotator
# to find available LLMs see: https://huggingface.co/docs/huggingface_hub/main/en/package_reference/inference_client#huggingface_hub.InferenceClient.list_deployed_models
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1"

# docs on different parameters: https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task
generation_params = dict(
    top_p=0.90,
    temperature=0.8,
    max_new_tokens=128,
    return_full_text=False,
    use_cache=False
)

def generate_text(prompt=None, generation_params=None):
    payload = {
        "inputs": prompt, 
        "parameters": {**generation_params}
    }
    response = requests.post(
        API_URL, 
        headers={"Authorization": f"Bearer {huggingface_hub.get_token()}"}, 
        json=payload
    )
    return response.json()[0]["generated_text"]


In [6]:
labels = ["positive", "negative", "neutral"]

def clean_output(string, random_choice=True):
    for category in labels:
        if category.lower() in string.lower():
            return category
    # if the output string cannot be mapped to one of the categories, we either return "FAIL" or choose a random label
    if random_choice:
        return random.choice(labels)
    else:
        return "FAIL"


In [None]:
prompt_formatted = prompt_financial_sentiment.format(text=dataset["sentence"][0])
prompt_formatted

'<s> [INST] You are a highly qualified expert trained to annotate machine learning training data.\n\nYour task is to analyze the sentiment in the TEXT below from an investor perspective and label it with only one the three labels:\npositive, negative, or neutral.\n\nBase your label decision only on the TEXT and do not speculate e.g. based on prior knowledge about a company. \n\nDo not provide any explanations and only respond with one of the labels as one word: negative, positive, or neutral\n\nExamples:\nText: Operating profit increased, from EUR 7m to 9m compared to the previous reporting period.\nLabel: positive\nText: The company generated net sales of 11.3 million euro this year.\nLabel: neutral\nText: Profit before taxes decreased to EUR 14m, compared to EUR 19m in the previous period.\t\nLabel: negative\n\nYour TEXT to analyse:\nTEXT: According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .\nLabel:  [/INST]'

In [None]:
payload = {"inputs": prompt_formatted, "parameters": {**generation_params}}
response = requests.post(
    API_URL,
    headers={"Authorization": f"Bearer {huggingface_hub.get_token()}"},
    json=payload
)

response

<Response [404]>

In [None]:
output_simple = []
for text in dataset["sentence"][:4]:
    # add text into the prompt template
    prompt_formatted = prompt_financial_sentiment.format(text=text)

    # send text to API
    output = generate_text(
        prompt=prompt_formatted, generation_params=generation_params
    )
    # clean output
    output_cl = clean_output(output, random_choice=True)
    output_simple.append(output_cl)


In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")

messages = [
    {"role": "user", "content": "What is your favourite condiment?"},
    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
    {"role": "user", "content": "Do you have mayonnaise recipes?"}
]

inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

outputs = model.generate(inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


model.safetensors.index.json:   0%|          | 0.00/92.7k [00:00<?, ?B/s]

Fetching 19 files:   0%|          | 0/19 [00:00<?, ?it/s]

model-00003-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00006-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00008-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00001-of-00019.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]

model-00005-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00007-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00009-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00010-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00011-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00012-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00013-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00014-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00015-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00016-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00017-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00018-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00019-of-00019.safetensors:   0%|          | 0.00/4.22G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Some parameters are on the meta device because they were offloaded to the cpu and disk.


RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx