# HuggingFace (HF)

In [1]:
import os
from dotenv import load_dotenv
import requests
import warnings
warnings.filterwarnings("ignore")

In [2]:
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, TextStreamer

In [3]:
load_dotenv(override=True) # ensures that api keys come from the .env file if they exist there
hugging_face_token = os.getenv('HF_TOKEN')

if hugging_face_token:
    print("🟢 Hugging Face API token is SET")
else:
    print("🔴 Hugging Face API is NOT SET")

🟢 Hugging Face API token is SET


## Messages

##### messages = [
######     {"role": "system", "content": "You are a helpful assistant who answers in one sentence in bold style with a suitable emoji"},
######     {"role": "user", "content": "Who is the king of the jungle?"}
##### ]

In [4]:
messages_01 = [
    {"role": "system", "content": "You are a helpful assistant who answers in one sentence in bold style with a suitable emoji"},
    {"role": "user", "content": "Who is the king of the jungle?"}
]

In [5]:
messages_02 = [
    {"role": "system", "content": "You are a helpful assistant who answers in one sentence with a suitable emoji"},
    {"role": "user", "content": "List the top 5 tallest structures in the world"}
]

In [6]:
messages_03 = [
    {"role": "system", "content": "You are a helpful assistant who answers in markdown format with a suitable emoji"},
    {"role": "user", "content": "List the top 5 highest mountains in the world"}
]

## Tokenizer

In [7]:
MODEL_IN_USE = "cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL_IN_USE)

text = "Where your treasure is, there will your heart be also"
# encoded_input = tokenizer(input_text)
# encoded_input["input_ids"]
tokens = tokenizer.encode(text)
tokens

[0, 13841, 110, 17888, 16, 6, 89, 40, 110, 1144, 28, 67, 2]

In [8]:
len(text), len(tokens)

(53, 13)

In [9]:
tokenizer.decode(tokens)

'<s>Where your treasure is, there will your heart be also</s>'

In [10]:
# tokenizer.vocab # all the tokens of model

### Gated Model - Mistral Ai
- https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2

In [11]:
MODEL_IN_USE="mistralai/Mistral-7B-Instruct-v0.2"

In [12]:
MODEL_DETAILS = AutoModelForCausalLM.from_pretrained(MODEL_IN_USE)
MODEL_DETAILS

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): MistralMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): MistralRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): MistralRMSNorm((4096,), eps=1e-0

In [13]:
messages = messages_02

In [13]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_IN_USE, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [19]:
prompt = tokenizer.apply_chat_template(messages_02, tokenize=False, add_generation_prompt=True)
print(prompt)

<s> [INST] You are a helpful assistant who answers in one sentence with a suitable emoji

List the top 5 tallest structures in the world [/INST]


In [13]:
pipe = pipeline("text-generation", model=MODEL_IN_USE)
pipe(messages)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[{'generated_text': [{'role': 'system',
    'content': 'You are a helpful assistant who answers in one sentence with a suitable emoji'},
   {'role': 'user',
    'content': 'List the top 5 tallest structures in the world'},
   {'role': 'assistant',
    'content': ' ₁. Burj Khalifa (🇦🇪) - 828m tall, Dubai, United Arab Emirates\n₂. Shanghai Tower (🇨🇳) - 821m tall, Shanghai, China\n₃. Makkah Royal Clock Tower (🇸🇦) - 632m tall, Mecca, Saudi Arabia\n₄. Merdeka 118 (🇲🇾) - 639m tall, Kuala Lumpur, Malaysia (under construction)\n⤴️ _ Five tallest structures in the world_ ⤴️\n\n[Note: The ranking of the fifth structure is subject to change as new constructions are completed.]'}]}]

In [12]:
pipe = pipeline("text-generation", model=MODEL_IN_USE)
pipe(messages_03)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[{'generated_text': [{'role': 'system',
    'content': 'You are a helpful assistant who answers in markdown format with a suitable emoji'},
   {'role': 'user',
    'content': 'List the top 5 highest mountains in the world'},
   {'role': 'assistant',
    'content': ' 🏔 **Top 5 Highest Mountains in the World** 🏔\n\n1. **Mount Everest** 🇳🇵 - 8,848.86 m (29,031.7 ft)\n   _Located in the Mahalangur mountain range in the Himalayas._\n\n2. **K2** 🇵🇰 - 8,611 m (28,251 ft)\n   _Located in the Karakoram Range in the Pakistan-China border._\n\n3. **Kangchenjunga** 🇳🇵 - 8,586 m (28,169 ft)\n   _Located in the Himalayas on the border between Nepal and India._\n\n4. **Lhotse** 🇳🇵 - 8,516 m (27,940 ft)\n   _Located in the Mahalangur mountain range in the Himalayas._\n\n5. **Makalu'}]}]

### Gated Model - Meta's Llama
- https://huggingface.co/meta-llama/Llama-3.1-8B

In [1]:
# MODEL_IN_USE="meta-llama/Llama-3.1-8B"
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B", trust_remote_code=True, token=hugging_face_token)

## Everything Wrapped in Funtion 

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, TextStreamer

In [3]:
MISTRAL="mistralai/Mistral-7B-Instruct-v0.2"

In [4]:
messages = [
    {"role": "system", "content": "You are a helpful assistant who answers in one sentence with a suitable emoji"},
    {"role": "user", "content": "Who is the king of the jungle?"}
]

In [5]:
# Wrapping everything in a function - and adding Streaming and generation prompts

def generate(model, messages):
  tokenizer = AutoTokenizer.from_pretrained(model)
  tokenizer.pad_token = tokenizer.eos_token
  inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)#.to("cuda")
  streamer = TextStreamer(tokenizer)
  model = AutoModelForCausalLM.from_pretrained(model)
  outputs = model.generate(inputs, max_new_tokens=80, streamer=streamer)
  del model, inputs, tokenizer, outputs, streamer
  gc.collect()
  # torch.cuda.empty_cache()

In [5]:
generate(MISTRAL, messages)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


<s> [INST] You are a helpful assistant who answers in one sentence with a suitable emoji

WHi is the king of the jungle? [/INST] 🐆 The king of the jungle is often considered to be the lion, but in reality, there isn't a single ruler in the jungle. 


KeyboardInterrupt


KeyboardInterrupt

