In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

In [21]:
# Step 1: Load a Quantized Model
# Using a smaller model to keep it lightweight; quantization reduces model size and speeds up inference.
model_name = "EleutherAI/gpt-neo-125M"

In [2]:
## Better Model ###

model_name = "tiiuae/falcon-7b-instruct"  # Instruct version of Falcon 7B

model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/17.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.13k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

In [3]:
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading tokenizer...


In [24]:
# Step 2: Teaching Prompt Engineering
# Define different styles of prompts
print("\n### PROMPT ENGINEERING EXAMPLES ###")


### PROMPT ENGINEERING EXAMPLES ###


In [25]:
def generate_response(prompt, max_length=50, temperature=0.7):
    """Helper function to generate responses from the model."""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(inputs.input_ids, max_length=max_length, temperature=temperature, top_k=50, top_p=0.95)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [43]:
# Example 1: Basic Question
example_1 = {
    "description": "Basic Question",
    "prompt": "What are the benefits of playing team sports like basketball or soccer?"
}
print(generate_response(example_1['prompt']))


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


KeyboardInterrupt: 

In [None]:
# Example 2: Instructional Prompt
example_2 = {
    "description": "Instructional Prompt",
    "prompt": "List 5 benefits of playing sports regularly."
}
print(generate_response(example_2['prompt']))


In [None]:
# Example 3: Creative Writing Prompt
example_3 = {
    "description": "Creative Writing Prompt",
    "prompt": "Write a short story about a young athlete who overcomes challenges to win their first championship."
}
print(generate_response(example_3['prompt']))


In [None]:
# Example 4: Specific Formatting
example_4 = {
    "description": "Specific Formatting",
    "prompt": "Provide a bullet-point list of drills to improve soccer skills."
}
print(generate_response(example_4['prompt']))


In [None]:
# Example 5: Role-Playing
example_5 = {
    "description": "Role-Playing",
    "prompt": "You are a sports coach. Advise me on how to prepare for my first marathon."
}
print(generate_response(example_5['prompt']))


In [35]:
# Example 6: Chained Prompts
example_6 = {
    "description": "Chained Prompts",
    "prompt": "What are the benefits of training for endurance sports? Use these to suggest a weekly training plan for a beginner triathlete."
}
print(generate_response(example_6['prompt']))


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


What are the benefits of training for endurance sports? Use these to suggest a weekly training plan for a beginner triathlete.

The benefits of training for endurance sports are not limited to the training itself. The benefits of training for endurance sports include


In [13]:
# Step 3: Experimenting with Parameters
print("\n### EXPERIMENTING WITH PARAMETERS ###")
prompt = "Explain why regular sleep is important for health."

parameters = [
    {"max_length": 30, "temperature": 0.5},
    {"max_length": 50, "temperature": 0.7},
    {"max_length": 70, "temperature": 1.0},
]


### EXPERIMENTING WITH PARAMETERS ###


In [None]:
for params in parameters:
    print(f"\n--- max_length: {params['max_length']}, temperature: {params['temperature']} ---")
    print(generate_response(prompt, max_length=params['max_length'], temperature=params['temperature']))

In [None]:
# Step 4: Advanced Prompt Engineering Techniques
# Few-shot learning demonstration
print("\n### FEW-SHOT LEARNING ###")
few_shot_prompt = (
    "Question: What is the capital of France?\nAnswer: Paris\n"
    "Question: Who wrote '1984'?\nAnswer: George Orwell\n"
    "Question: What is the speed of light?\nAnswer: "
)

In [None]:
print(few_shot_prompt)
print("Response:")
print(generate_response(few_shot_prompt))

print("\nJupyter notebook ready to teach prompt engineering! Experiment with different prompts and parameters for deeper insights.")

# Prompting Techniques

In [16]:
# Zero-Shot Prompting
prompt = "Who won the FIFA World Cup in 2018?"
print(prompt)


Who won the FIFA World Cup in 2018?


In [None]:
# Few-Shot Prompting
few_shot_prompt = """
Answer the following sports trivia questions:

1. Who won the NBA Finals in 2020? -> Los Angeles Lakers.
2. Who is the top scorer in football history? -> Cristiano Ronaldo.
3. Which country has won the most cricket World Cups? -> Australia.
4. Who won the FIFA World Cup in 2018?
"""
print(few_shot_prompt)


In [None]:
# Chain-of-Thought (CoT) Prompting
cot_prompt = """
Q: A basketball team scored 25 points in the first quarter, 30 in the second, 20 in the third, and 15 in the fourth. What was their total score?
A: Let's think step by step. They scored 25 points in the first quarter, 30 in the second quarter, 20 in the third quarter, and 15 in the fourth quarter. Adding these together, 25 + 30 = 55, 55 + 20 = 75, 75 + 15 = 90. The total score is 90.

Q: A football team scored 2 goals in the first half and 1 goal in the second half. How many goals did they score in total?
A: Let's think step by step. They scored 2 goals in the first half and 1 goal in the second half. Adding these together, 2 + 1 = 3. The total is 3 goals.
"""
print(cot_prompt)


In [None]:
# ReAct Prompting (Reasoning + Acting)
react_prompt = """
You are a sports assistant that answers questions by reasoning step by step and then performing an action if needed.

Question: Which country has won the most Olympic gold medals in basketball?
Thought: Basketball has been part of the Olympics for decades, and the USA has consistently dominated this sport. 
Action: Answer is the USA.
"""
print(react_prompt)


In [None]:
# Instruction-Based Prompting
instruction_prompt = "Summarize the performance of Lionel Messi in the 2022 FIFA World Cup in two sentences."
print(instruction_prompt)


In [None]:
# Self-Consistency Prompting
self_consistency_prompt = """
Q: Who holds the record for the fastest 100m sprint?
A: Let's break this down. The fastest 100m sprint was recorded by Usain Bolt in 2009 with a time of 9.58 seconds. The answer is Usain Bolt.

Q: Which tennis player has won the most Grand Slam titles?
A: Let's break this down. Novak Djokovic holds the record for the most Grand Slam titles in tennis. The answer is Novak Djokovic.
"""
print(self_consistency_prompt)


In [None]:
# Contextual Prompting
contextual_prompt = """
Context: The user is interested in basketball.
Task: Explain the role of a point guard in basketball.
Answer: The point guard is often considered the leader on the court. Their role is to set up plays, distribute the ball to teammates, and manage the game's tempo while contributing defensively and offensively.
"""
print(contextual_prompt)


# Exercise 

Play around with the models a bit, feel their power and limitations

In [41]:
%%time

# Example 6: Chained Prompts
ques = 'Whats my name?'
print(generate_response(ques))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


KeyboardInterrupt: 

In [None]:
model_name = "tiiuae/falcon-7b-instruct"  # Instruct version of Falcon 7B

model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Check for GPU availability

In [21]:

example_1 = {
    "description": "Basic Question",
    "prompt": "What are the benefits of playing team sports like basketball or soccer?"
}

# Tokenize the prompt and move input tensors to the same device as the model
inputs = tokenizer(example_1["prompt"], return_tensors="pt").to(device)

# Generate a response
outputs = model.generate(
    inputs["input_ids"],
    max_length=50,
    num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id  # Handle padding token explicitly
)

# Decode the response and print it
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)


What are the benefits of playing team sports like basketball or soccer?
Playing team sports like basketball or soccer can have numerous benefits, including improved physical fitness, increased social skills, and the ability to learn teamwork and cooperation. Additionally, team sports can also


# Reccurent Converastion Builder

In [31]:
def gen_falcon_res(prompt: str) -> str:

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,  # Add padding
        truncation=True,  # Ensure input fits the model's max input size
        max_length=128  
    )
    inputs = {key: value.to(model.device) for key, value in inputs.items()}  # Move to model's device
    
    outputs = model.generate(
        inputs["input_ids"],
        attention_mask=inputs["attention_mask"],  # Pass attention mask
        max_length=50,
        num_return_sequences=1,
        pad_token_id=tokenizer.pad_token_id  # Use the assigned `pad_token_id`
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response    

# Exercise

1. Make the model hold a recurrent conversation. How do you do that? Inject the history to the prompt.
2. Have the ability to argue with the chat by injecting its own answers to the prompt.
3. What do you do if the token size is too small for your input? One option is summarizing the input. Do exactly that. You can use the summarization notebook that exists in the course resources (website/git) or just load another LLM, preferably a summarization expert LLM that will do the job for you.