# Inference Notebook for Prompt Engineering

In [1]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, BertModel, BertTokenizer, BitsAndBytesConfig, MistralForCausalLM
import logging

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_language_model(model_name: str = "google/gemma-7b-it") -> tuple[AutoModelForCausalLM, AutoTokenizer]:
    config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=False,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_name, 
        low_cpu_mem_usage=True, 
        quantization_config=config, 
        attn_implementation="flash_attention_2"
    )

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token if tokenizer.pad_token is None else tokenizer.pad_token

    return model, tokenizer

def load_encoder():
    loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]
    for logger in loggers:
        if "transformers" in logger.name.lower():
            logger.setLevel(logging.ERROR)
            
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    model = BertModel.from_pretrained("bert-base-uncased").cuda()

    return model, tokenizer

In [3]:
def generate_prompt(movie: str) -> str:
    return f"""You are a person interacting with a movie recommendation system. Your goal is to make a short request that will help the system to suggest the movie "{movie}" without mentioning its title, characters, or ANY plot elements. The response should instead use GENERAL characteristics like the genre, tone, and themes of the movie. Your request should be concise, sound conversational, and not be too enthusiastic. For example, the hidden movie "Crazy Stupid Love" should give a request like "I'm looking for a silly romantic comedy with a happy ending. Any suggestions?" Reply ONLY with the human-like request for a movie. DO NOT include any other text.
    """

In [4]:
class SimulatorDataset(Dataset):
    def __init__(self, movies: pd.DataFrame, tokenizer: AutoTokenizer) -> None:
        self.movies = movies
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.movies)

    def __getitem__(self, idx):
        # Generate the prompt for the movie
        prompt = generate_prompt(self.movies.iloc[idx]["movie_title"])

        # Form prompt
        chat = [{"role": "user", "content": prompt}]

        # Apply the chat template
        prompt = self.tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)

        return prompt

In [5]:
SPLIT_STR = "[/INST] " # Minstral & Llama-2

def simulate(
    language_model: MistralForCausalLM,
    language_tokenizer: AutoTokenizer,
    encoder_model: BertModel,
    encoder_tokenizer: BertTokenizer,
    dataloader: DataLoader,
    max_length: int = 2048,
) -> tuple[list, list]:
    requests = []
    encoded_requests = []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Simulating", unit="batch"):
            # Tokenize (llm)
            input_tokens = language_tokenizer(batch, add_special_tokens=False, padding=True, return_tensors="pt").to(language_model.device)

            # Generate request
            request_tokens = language_model.generate(**input_tokens, max_new_tokens=max_length, do_sample=True, pad_token_id=language_tokenizer.eos_token_id)

            # Decode request
            batch_requests = [language_tokenizer.decode(output, skip_special_tokens=True).split(SPLIT_STR)[-1] for output in request_tokens]
            requests.extend(batch_requests)

            # Tokenize (bert)
            encoder_tokens = encoder_tokenizer(batch_requests, padding=True, return_tensors="pt").to(language_model.device)

            batch_encoded_requests = encoder_model(**encoder_tokens)

            # Encode request, grab the CLS token
            encoded_requests.extend([hidden_state[0].cpu().numpy() for hidden_state in batch_encoded_requests.last_hidden_state])

    return requests, encoded_requests 

In [6]:
# Load the model and tokenizer
language_model, language_tokenizer = load_language_model("mistralai/Mistral-7B-Instruct-v0.2")

encoder_model, encoder_tokenizer = load_encoder()

Downloading shards: 100%|██████████| 3/3 [00:00<00:00, 10.67it/s]
Loading checkpoint shards: 100%|██████████| 3/3 [00:09<00:00,  3.05s/it]


In [8]:
# Read in the set Vof movies
movies = pd.read_csv("data/ml-100k/u.item", sep="|", encoding="latin-1", header=None, names=["movie_id", "movie_title", "release_date", "url", "unknown", "Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"])

movies = movies[["movie_id", "movie_title"]]

movies = movies.head()

In [9]:
# Create the dataset
dataset = SimulatorDataset(movies, language_tokenizer)

# Create the dataloader
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

In [10]:
# Simulate the responses
requests, encoded_requests = simulate(language_model, language_tokenizer, encoder_model, encoder_tokenizer, dataloader)

Simulating: 100%|██████████| 5/5 [00:08<00:00,  1.62s/batch]


In [11]:
# Add the responses to the dataframe
movies["request"] = requests
movies["encoded_request"] = encoded_requests

movies.head()

Unnamed: 0,movie_id,movie_title,request,encoded_request
0,1,Toy Story (1995),I'd appreciate a family-friendly animated movi...,"[-0.08137988, -0.33736423, -0.15107499, -0.215..."
1,2,GoldenEye (1995),I'm looking for a thrilling espionage film wit...,"[-0.06589088, -0.39797422, -0.258162, -0.51747..."
2,3,Four Rooms (1995),I'd appreciate a recommendation for an antholo...,"[-0.054674856, 0.018768324, -0.26771954, -0.12..."
3,4,Get Shorty (1995),I'd appreciate a crime comedy with a clever sc...,"[0.004236746, -0.27087268, -0.3016517, -0.3320..."
4,5,Copycat (1995),I'm in the mood for a thought-provoking psycho...,"[-0.10669358, -0.3234886, -0.0020956919, -0.32..."


In [13]:
# Save the new dataframe
movies.to_hdf("data/requests.h5", key="df", mode="w", index=False)

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block1_values] [items->Index(['movie_title', 'request', 'encoded_request'], dtype='object')]

  movies.to_hdf("data/requests.h5", key="df", mode="w", index=False)
