# Inference Notebook for Prompt Engineering

In [1]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, MistralForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_model(model_name: str = "google/gemma-7b-it") -> tuple[AutoModelForCausalLM, AutoTokenizer]:
    config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=False,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_name, 
        low_cpu_mem_usage=True, 
        quantization_config=config, 
        attn_implementation="flash_attention_2"
    )

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token if tokenizer.pad_token is None else tokenizer.pad_token

    return model, tokenizer

In [3]:
def generate_prompt(movie: str) -> str:
    return f"""You are a person interacting with a movie recommendation system. Your goal is to make a short request that will help the system to suggest the movie "{movie}" without mentioning its title, characters, or ANY plot elements. The response should instead use GENERAL characteristics like the genre, tone, and themes of the movie. Your request should be concise, sound conversational, and not be too enthusiastic. For example, the hidden movie "Crazy Stupid Love" should give a request like "I'm looking for a silly romantic comedy with a happy ending. Any suggestions?" Reply ONLY with the human-like request for a movie. DO NOT include any other text.
    """

In [4]:
class SimulatorDataset(Dataset):
    def __init__(self, movies: pd.DataFrame, tokenizer: AutoTokenizer) -> None:
        self.movies = movies
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.movies)

    def __getitem__(self, idx):
        # Generate the prompt for the movie
        prompt = generate_prompt(self.movies.iloc[idx]["movie_title"])

        # Form prompt
        chat = [{ "role": "user", "content": prompt}]

        # Apply the chat template
        prompt = self.tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)

        return prompt

In [5]:
SPLIT_STR = "[/INST] " # Minstral & Llama-2

def simulate(
    model: MistralForCausalLM,
    tokenizer: AutoTokenizer,
    dataloader: DataLoader,
    max_length: int = 2048,
) -> list:
    responses = []

    for batch in tqdm(dataloader, desc="Simulating", unit="batch"):
        # Tokenize
        input_tokens = tokenizer(batch, return_tensors="pt", add_special_tokens=False, padding=True).to(model.device)

        # Generate
        output_tokens = model.generate(**input_tokens, max_new_tokens=max_length, do_sample=True, pad_token_id=tokenizer.eos_token_id)

        # Decode
        responses.extend([tokenizer.decode(output, skip_special_tokens=True).split(SPLIT_STR)[-1] for output in output_tokens])

    return responses

In [6]:
# Load the model and tokenizer
model, tokenizer = load_model("mistralai/Mistral-7B-Instruct-v0.2")

Loading checkpoint shards: 100%|██████████| 3/3 [00:04<00:00,  1.63s/it]


In [7]:
# Read in the set Vof movies
movies = pd.read_csv("data/MovieLens 100k/u.item", sep="|", encoding="latin-1", header=None, names=["movie_id", "movie_title", "release_date", "url", "unknown", "Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"])

movies = movies[["movie_id", "movie_title"]]

movies.head()

Unnamed: 0,movie_id,movie_title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [9]:
# Create the dataset
dataset = SimulatorDataset(movies, tokenizer)

# Create the dataloader
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

In [10]:
# Simulate the responses
responses = simulate(model, tokenizer, dataloader)

Simulating: 100%|██████████| 10/10 [00:12<00:00,  1.25s/batch]


In [11]:
# Add the responses to the dataframe
movies["prompt"] = responses

movies.head()

Unnamed: 0,movie_id,movie_title,prompt
0,1,Toy Story (1995),I'd appreciate a family-friendly animated film...
1,2,GoldenEye (1995),I'd appreciate a spy thriller with a dark yet ...
2,3,Four Rooms (1995),I'd appreciate a dark comedy with interconnect...
3,4,Get Shorty (1995),I'm interested in a crime caper with a mix of ...
4,5,Copycat (1995),I'm in the mood for a psychological thriller w...


In [12]:
# Save the new dataframe
movies.to_csv("data/prompts.csv", index=False)