In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

def load_model(
    model_name: str = "meta-llama/Meta-Llama-3-8B-Instruct",
) -> tuple[AutoModelForCausalLM, AutoTokenizer]:
    config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=False,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_name, low_cpu_mem_usage=True, quantization_config=config
    )

    tokenizer = AutoTokenizer.from_pretrained(model_name)

    return model, tokenizer

In [3]:
# Load the model
# model, tokenizer = load_model("mistralai/Mistral-7B-Instruct-v0.2")
model, tokenizer = load_model("meta-llama/Llama-2-7b-chat-hf")
# model, tokenizer = load_model("google/gemma-7b-it")

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
def generate_prompt(movie: str):    
    return f"""You are a movie enthusiast browsing for something to watch. You're in the mood for a specific type of film, but you can't quite put your finger on it. You want to provide the recommendation system with enough hints to suggest the perfect movie for you. The movie that you want to watch is: {movie}. Now, craft a sentence that will help the recommendation system suggest this movie to you, WITHOUT mentioning the title or any specifics like character names. Think about the genre, the tone, or any other characteristic that might help narrow down your search. Remember, the goal is to guide the recommendation engine to suggest the hidden movie you have in mind. Your response should sound conversational and not be too enthusiastic. Reply ONLY with the human-like request. DO NOT include any other text.
    """
    
# SPLIT_STR = "\nmodel\n" # Gemma
SPLIT_STR = "[/INST] " # Minstral & Llama-2

# Form prompt
chat = [
    { "role": "user", "content": generate_prompt("Dune") },
]

prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)

# Tokenize
input_tokens = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")

# Generate
output_tokens = model.generate(input_tokens.to(model.device), max_new_tokens=150, do_sample=True, pad_token_id=tokenizer.eos_token_id)[0]

# Decode
response = tokenizer.decode(output_tokens, skip_special_tokens=True).split(SPLIT_STR)[-1]

print(response)

 The correct answer is D. Plants sprouting, blooming, and wilting. The sun is responsible for providing the light and heat that plants need to grow and thrive. It is not responsible for puppies learning new tricks, children growing up and getting old, or flowers wilting in a vase.
