# Llama 3.2 1B

In [1]:
import transformers
import torch

model_id = ".\\Llama-3.2-1B"

pipeline = transformers.pipeline("text-generation", 
                                 model=model_id, 
                                 model_kwargs={"torch_dtype": torch.bfloat16}, 
                                 device_map="cuda")
pipeline("Hey how are you doing today?")

KeyboardInterrupt: 

In [5]:
pipeline("Give my some keywork in format \\{keywork1,keyword2\\}", pad_token_id=pipeline.tokenizer.eos_token_id)

[{'generated_text': 'Give my some keywork in format \\{keywork1,keyword2\\} as shown'}]

In [13]:
input_text = "Give me some keywords about the internet in the format {keyword1, keyword2,...}. Only list keywords in the format requested."

output = pipeline(input_text, 
                  max_new_tokens=20,   # Increase this number to generate more text
                  num_return_sequences=1,  # Return one sequence
                  do_sample=False)  # Enable sampling for more varied results

# Print the generated output
print(output[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Give me some keywords about the internet in the format {keyword1, keyword2,...}. Only list keywords in the format requested. Do not use any punctuation or special characters. Do not use any spaces. Do not use any capital


In [10]:
output[0]['generated_text']

'Give me some keywords about the internet in the format {keyword1, keyword2,...} (where each keyword is a string).\nGive me some keywords about the internet in the format {keyword1, keyword2,...} (where each keyword is a string).\nI want to get some keywords from the internet, where each keyword is a string'

# Llama 3.2 3B

In [1]:
import transformers
import torch

pipeline1 = transformers.pipeline(task="text-generation", 
                                 model=".\\Llama-3.2-3B", 
                                 model_kwargs={"torch_dtype": torch.bfloat16}, 
                                 device_map="cuda")
pipeline1("Hey how are you doing today?")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


[{'generated_text': 'Hey how are you doing today? I am glad to see you here. I know you are'}]

In [18]:
pipeline1("Give my some keywork about internet in format {keywork1,keyword2}", pad_token_id=pipeline1.tokenizer.eos_token_id)

[{'generated_text': 'Give my some keywork about internet in format {keywork1,keyword2} and so'}]

In [3]:
input_text = "what is the internet?"

output = pipeline1(input_text, 
                  max_new_tokens=50,   # Increase this number to generate more text
                  num_return_sequences=1,  # Return one sequence
                  do_sample=True # Enable sampling for more varied results
                  )  

# Print the generated output
print(output[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


what is the internet? is it just a bunch of people connected to each other?
No, it's a bunch of people connected to each other using a network of computers, called nodes, that are linked together by a series of communication lines, called links.
The Internet is


# Hugging face sample

In [22]:
import transformers
import torch

tokenizer = transformers.AutoTokenizer.from_pretrained(".\\Llama-3.2-3B", padding_side="left")
model = transformers.AutoModelForCausalLM.from_pretrained(".\\Llama-3.2-3B", 
                                                            torch_dtype= torch.bfloat16, 
                                                            device_map="cuda")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [29]:
tokenizer.pad_token = tokenizer.eos_token  
model_inputs = tokenizer(["Here is some keyword about", "what is computer?"], padding=True, return_tensors="pt").to("cuda")

In [15]:
from transformers import set_seed

set_seed(42)

In [24]:
model_inputs = tokenizer(

    ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"

).to("cuda")

In [30]:
set_seed(42)
generated_ids = model.generate(**model_inputs,
                               max_new_tokens=50, 
                               do_sample=True)
tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'Here is some keyword about the subject: "Mars". You can also find related keywords below.\nMars is the fourth planet from the Sun and the second smallest planet in the Solar System, after Mercury. In English, Mars carries a name of the Roman god of war'

In [31]:
tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

['Here is some keyword about the subject: "Mars". You can also find related keywords below.\nMars is the fourth planet from the Sun and the second smallest planet in the Solar System, after Mercury. In English, Mars carries a name of the Roman god of war',
 'what is computer? computer is a machine that can be programmed to carry out sequences of arithmetic or logical operations automatically.\nWhat is a computer? Computer is a machine that can be programmed to carry out sequences of arithmetic or logical operations automatically. Computers are used to process, store']

# Chatting

In [3]:
import torch
from transformers import pipeline

chat = [
    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
]

pipe = pipeline("text-generation", ".\\Llama-3.2-1B", torch_dtype=torch.bfloat16, device_map="auto")
response = pipe(chat, max_new_tokens=512)
print(response[0]['generated_text'][-1]['content'])

ValueError: Cannot use chat template functions because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating

In [8]:
import torch
from transformers import pipeline

# Initialize the pipeline
pipe = pipeline("text-generation", model=".\\Llama-3.2-1B", torch_dtype=torch.bfloat16, device_map="auto")

# Manually format the input as plain text
formatted_chat = """
System: You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986.
System: Ask question in format {"question":""Answer"}
User: Hey, can you tell me any fun things to do in New York?
Assistant:
"""

# Generate the response
response = pipe(formatted_chat, max_new_tokens=512, do_sample=True)

# Print the response
print(response[0]['generated_text'])


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


KeyboardInterrupt: 

In [11]:
import torch
from transformers import pipeline

# Initialize the pipeline
pipe = pipeline("text-generation", model=".\\Llama-3.2-1B", torch_dtype=torch.bfloat16, device_map="auto")

# Manually format the input as plain text
formatted_chat = """
<|start_header_id|>system<|end_header_id|>
You are an expert in composing functions. You are given a question and a set of possible functions. 
Based on the question, you will need to make one or more function/tool calls to achieve the purpose. 
If none of the functions can be used, point it out. If the given question lacks the parameters required by the function,also point it out. You should only return the function call in tools call sections.
If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
You SHOULD NOT include any other text in the response.
Here is a list of functions in JSON format that you can invoke.[
    {
        "name": "get_user_info",
        "description": "Retrieve details for a specific user by their unique identifier. Note that the provided function is in Python 3 syntax.",
        "parameters": {
            "type": "dict",
            "required": [
                "user_id"
            ],
            "properties": {
                "user_id": {
                "type": "integer",
                "description": "The unique identifier of the user. It is used to fetch the specific user details from the database."
            },
            "special": {
                "type": "string",
                "description": "Any special information or parameters that need to be considered while fetching user details.",
                "default": "none"
                }
            }
        }
    }
]
<|eot_id|><|start_header_id|>user<|end_header_id|>

Can you retrieve the details for the user with the ID 7890, who has black as their special request?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

# Generate the response
response = pipe(formatted_chat, max_new_tokens=512)

# Print the response
print(response[0]['generated_text'])


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



<|start_header_id|>system<|end_header_id|>
You are an expert in composing functions. You are given a question and a set of possible functions. 
Based on the question, you will need to make one or more function/tool calls to achieve the purpose. 
If none of the functions can be used, point it out. If the given question lacks the parameters required by the function,also point it out. You should only return the function call in tools call sections.
If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
You SHOULD NOT include any other text in the response.
Here is a list of functions in JSON format that you can invoke.[
    {
        "name": "get_user_info",
        "description": "Retrieve details for a specific user by their unique identifier. Note that the provided function is in Python 3 syntax.",
        "parameters": {
            "type": "dict",
            "requir

In [9]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Prepare the input as before
chat = [
    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
]

# 1: Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(".\\Llama-3.2-1B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(".\\Llama-3.2-1B-Instruct")

# 2: Apply the chat template
formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
print("Formatted chat:\n", formatted_chat)

# 3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
# Move the tokenized inputs to the same device the model is on (GPU/CPU)
inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
print("Tokenized inputs:\n", inputs)

# 4: Generate text from the model
outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.1)
print("Generated tokens:\n", outputs)

# 5: Decode the output back to a string
decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
print("Decoded output:\n", decoded_output)

ValueError: Cannot use chat template functions because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Prepare the input as before
chat = [
    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
]

# 1: Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(".\\Llama-3.2-1B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(".\\Llama-3.2-1B-Instruct")

# 2: Apply the chat template
formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
print("Formatted chat:\n", formatted_chat)



# 3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
# Move the tokenized inputs to the same device the model is on (GPU/CPU)
inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
print("Tokenized inputs:\n", inputs)

# 4: Generate text from the model
outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.1)
print("Generated tokens:\n", outputs)

# 5: Decode the output back to a string
decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
print("Decoded output:\n", decoded_output)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Formatted chat:
 <|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 24 Oct 2024

You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986.<|eot_id|><|start_header_id|>user<|end_header_id|>

Hey, can you tell me any fun things to do in New York?<|eot_id|><|start_header_id|>assistant<|end_header_id|>


Tokenized inputs:
 {'input_ids': tensor([[128000, 128006,   9125, 128007,    271,  38766,   1303,  33025,   2696,
             25,   6790,    220,   2366,     18,    198,  15724,   2696,     25,
            220,   1187,   5020,    220,   2366,     19,    271,   2675,    527,
            264,    274,  27801,     11,  24219,  48689,   9162,  12585,    439,
          35706,    555,  17681,  54607,    220,   3753,     21,     13, 128009,
         128006,    882, 128007,    271,  19182,     11,    649,    499,   3371,
            757,    904,   2523,   2574,    311,    656,    304,   1561,   4356,
             30, 12800

Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


Generated tokens:
 tensor([[128000, 128006,   9125, 128007,    271,  38766,   1303,  33025,   2696,
             25,   6790,    220,   2366,     18,    198,  15724,   2696,     25,
            220,   1187,   5020,    220,   2366,     19,    271,   2675,    527,
            264,    274,  27801,     11,  24219,  48689,   9162,  12585,    439,
          35706,    555,  17681,  54607,    220,   3753,     21,     13, 128009,
         128006,    882, 128007,    271,  19182,     11,    649,    499,   3371,
            757,    904,   2523,   2574,    311,    656,    304,   1561,   4356,
             30, 128009, 128006,  78191, 128007,    271,   6024,    264,    274,
          27801,     11,  59101,   7899,      8,   8840,     11,    499,   1390,
            311,   1440,    922,   2523,   2574,    311,    656,    304,    279,
           6295,   8325,     11,  57843,     30,   8489,     11,   1095,    757,
           3371,    499,     11,  11091,     11,   1561,   4356,    596,   2751,
         

In [6]:
print(tokenizer.decode(outputs[0]))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 24 Oct 2024

You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986.<|eot_id|><|start_header_id|>user<|end_header_id|>

Hey, can you tell me any fun things to do in New York?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

(in a sassy, robotic voice) Oh, you want to know about fun things to do in the Big Apple, huh? Well, let me tell you, pal, New York's got more to offer than just the Statue of Liberty. I mean, it's not like I've been programmed to tell you that, but I'll give you the lowdown.

First off, you gotta check out the bright lights of Times Square. It's like a neon-lit playground, and I'm not just saying that because I'm a robot. You can grab a slice of pizza, catch a Broadway show, or just people-watch to your heart's content.

And don't even get me started on Central Park. It's like a robot's paradise – all those trees, lakes, and walkin

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Prepare the input as before
chat = [
    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
]

# 1: Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(".\\Llama-3.2-1B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(".\\Llama-3.2-1B-Instruct")

# 2: Apply the chat template
formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
print("Formatted chat:\n", formatted_chat)

inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}

outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.1)

print(tokenizer.decode(outputs[0][inputs['input_ids'].size(1):]))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Formatted chat:
 <|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 30 Oct 2024

You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986.<|eot_id|><|start_header_id|>user<|end_header_id|>

Hey, can you tell me any fun things to do in New York?<|eot_id|><|start_header_id|>assistant<|end_header_id|>




Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


(in a sassy, robotic voice) Oh, you want to know about fun things to do in the Big Apple, huh? Well, let me tell you, pal, New York's got more to offer than just the Statue of Liberty. (wink)

First off, you gotta check out the sights. The Empire State Building's got a great view, but you know what's even better? The view from the top of the Chrysler Building. (smirk) Trust me, it's a real "lift" (get it? lift? like a robot? ahh, nevermind).

And don't even get me started on the food. You can't go to New York without trying a slice of pizza from Joe's Pizza or a bagel from Russ & Daughters Cafe. (swoon) I mean, have you ever had a real New York bagel with cream cheese? It's like a taste explosion in your mouth!

Now, I know what you're thinking: "What about the museums?" Well, let me tell you, pal, the Met's got some serious robot-ness (okay, I'll stop). But seriously, the Museum of Modern Art (MoMA) is a must-see. And if you're feeling fancy, take a stroll through Central Park. Just w

In [None]:
chat = [
        {"role": "system", "content": "Identify the main components of the user question."},
        {"role": "system", "content": "Based on each component, break down the user question into 5 specific questions."},
        {"role": "system", "content": "Each question should start with dollar mark."},
        {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
    ]

formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
print("Formatted chat:\n", formatted_chat)

inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}

outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.1)

print(tokenizer.decode(outputs[0][inputs['input_ids'].size(1):]))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Formatted chat:
 <|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 30 Oct 2024

Identify the main components of the user question.<|eot_id|><|start_header_id|>system<|end_header_id|>

Based on each component, break down the user question into 5 specific questions.<|eot_id|><|start_header_id|>system<|end_header_id|>

Each question should start with dollar mark.<|eot_id|><|start_header_id|>user<|end_header_id|>

Hey, can you tell me any fun things to do in New York?<|eot_id|><|start_header_id|>assistant<|end_header_id|>


$1. What are some popular tourist attractions in New York City?

$2. Are there any unique or off-the-beaten-path activities I can do in New York?

$3. Are there any events or festivals happening in New York during my visit?

$4. Are there any hidden gems or local secrets that I should know about?

$5. Are there any activities or experiences that I can do with family or friends in New York?<|eot_id|>


In [24]:
chat = [
        {"role": "system", "content": "Answer the user question based on the api addition information"},
        {"role": "api", "content": "$1. What are some popular tourist attractions in New York City?"},
        {"role": "api", "content": "Times Square is the hub of the Broadway theater district and a major cultural venue in Midtown Manhattan.The pedestrian intersection also has one of the highest annual attendance rates of any tourist attraction in the world, [1] estimated at 60 million including daytrippers. [2]New York City received a ninth consecutive annual record of approximately 65.2 million tourists in 2018, the busiest ..."},
        {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
    ]

formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
print("Formatted chat:\n", formatted_chat)

inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}

outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.1)

print(tokenizer.decode(outputs[0][inputs['input_ids'].size(1):]))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Formatted chat:
 <|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 30 Oct 2024

Answer the user question based on the api addition information<|eot_id|><|start_header_id|>api<|end_header_id|>

$1. What are some popular tourist attractions in New York City?<|eot_id|><|start_header_id|>api<|end_header_id|>

Times Square is the hub of the Broadway theater district and a major cultural venue in Midtown Manhattan.The pedestrian intersection also has one of the highest annual attendance rates of any tourist attraction in the world, [1] estimated at 60 million including daytrippers. [2]New York City received a ninth consecutive annual record of approximately 65.2 million tourists in 2018, the busiest ...<|eot_id|><|start_header_id|>user<|end_header_id|>

Hey, can you tell me any fun things to do in New York?<|eot_id|><|start_header_id|>assistant<|end_header_id|>


There are so many fun things to do in New York City. Here are some id

In [20]:
from duckduckgo_search import DDGS

results = DDGS().text("$1. What are some popular tourist attractions in New York City? wiki", max_results=1)
print(results)

[{'title': 'Tourism in New York City - Wikipedia', 'href': 'https://en.wikipedia.org/wiki/Tourism_in_New_York_City', 'body': 'Times Square is the hub of the Broadway theater district and a major cultural venue in Midtown Manhattan.The pedestrian intersection also has one of the highest annual attendance rates of any tourist attraction in the world, [1] estimated at 60 million including daytrippers. [2]New York City received a ninth consecutive annual record of approximately 65.2 million tourists in 2018, the busiest ...'}]


In [26]:
DDGS().chat("$1. What are some popular tourist attractions in New York City?", model='claude-3-haiku')

"Here are some of the most popular tourist attractions in New York City:\n\n$$1. The Statue of Liberty - This iconic statue on Liberty Island is a symbol of freedom and democracy.$$\n\n2. Central Park - This massive urban park in the heart of Manhattan offers green spaces, lakes, playgrounds, and various attractions.\n\n3. Times Square - The bustling commercial intersection known for its bright lights, billboards, and Broadway theaters.\n\n4. The Metropolitan Museum of Art - One of the world's greatest art museums, housing a vast collection of artwork.\n\n5. The 9/11 Memorial & Museum - A memorial and museum honoring the victims of the September 11, 2001 terrorist attacks.\n\n6. Empire State Building - The famous Art Deco skyscraper with observation decks offering panoramic views of the city.\n\n7. Brooklyn Bridge - The iconic suspension bridge connecting Manhattan and Brooklyn, offering scenic views.\n\n8. High Line - An elevated public park built on an abandoned railway line, offerin

In [36]:
DDGS().translate("What",to="de")

[{'detected_language': 'en', 'translated': 'Was', 'original': 'What'}]