In [1]:
import os
from pathlib import Path

from gpt_reranker import run_reranker
from openai import OpenAI
from prompts import AMAZON_RANKING_PROMPT
from transformers import AutoModelForCausalLM, AutoTokenizer, QuantoConfig

GPT Reranker - Cross Encoder

In [2]:
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

In [3]:
product_text = """
YMIX Macbook Pro 13" Case Non-Retina,Folio Embroidered Shell Plastic Hard Protective Cover for Old MacBook Pro 13 Inch with CD-ROM Drive,Model A1278(A_Embroidered Floral)

product category: Electronics
"""
query = "case for apple laptop"

In [9]:
label, probab = run_reranker(
    client=client,
    prompt=AMAZON_RANKING_PROMPT,
    query=query,
    product_text=product_text,
    logger_level="SUCCESS",
)

[32m2024-09-24 13:57:57.629[0m | [32m[1mSUCCESS [0m | [36mgpt_reranker[0m:[36mrun_reranker[0m:[36m76[0m - [32m[1mLabel: Yes with Probability: 0.9999722707254635[0m


## OS - Cross Encoder

In [4]:
MODEL_ID = "meta-llama/Meta-Llama-3.1-8B-Instruct"
# MODEL_ID = "google/gemma-2-2b-it"
cache_dir = Path("../cache")
assert cache_dir.exists(), f"Cache directory {cache_dir} does not exist"

### Gemma 2 2B

In [14]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, cache_dir=cache_dir)
quantized_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    cache_dir=cache_dir,
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Llama 3.1 8B

In [5]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, cache_dir=cache_dir)
quantization_config = QuantoConfig(weights="int8")
quantized_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID, quantization_config=quantization_config, cache_dir=cache_dir
)

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

### Pipeline

In [13]:
input_ids = tokenizer(
    ECOMMERCE_RANKING_PROMPT.format(
        query="macbook keyboard", product_text=product_text
    ),
    return_tensors="pt",
).to(quantized_model.device)

outputs = quantized_model.generate(**input_ids, max_new_tokens=5)
print(tokenizer.decode(outputs[0]))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


<|begin_of_text|>
You are an Assistant responsible for helping detect whether the retrieved product is relevant to the query. For a given input, you need to output a single token: "Yes" or "No" indicating the retrieved product is relevant to the query.

Query: Younique setting powder
Product: 
"""
Younique Touch Behold Translucent Setting Powder

Touch Behold Translucent Setting Powder. Younique’s Touch Behold Translucent Setting Powder effortlessly locks and loads your look so you’re ready to take on the world. Use as the finishing touch to help keep makeup in place, or wear directly on skin for a softening, matte look.

product category: Beauty & Personal Care
"""
Relevant: Yes

Query: white musk hand cream
Product: 
"""
Braided Hair Clips for Women Girls, Sparkling Crystal Stone Braided Hair Clips Barrette with 3 Small Clips, Triple Hair Clips with Rhinestones for Sectioning,4PCS (4pcs-Type A)

product category: Beauty & Personal Care
"""
Relevant: No

Query: HP Pavilion dm4 replace

In [10]:
input_tensor = tokenizer.encode(
    ECOMMERCE_RANKING_PROMPT.format(
        query="macbook keyboard", product_text=product_text
    ),
    return_tensors="pt",
).to(quantized_model.device)

output = quantized_model.forward(input_tensor)
logits = output.logits[0, -1]  # from first batch take last token logits

In [11]:
yes_token_id = tokenizer.convert_tokens_to_ids("Yes")
no_token_id = tokenizer.convert_tokens_to_ids("No")
print(f"ID for Yes: {yes_token_id}, No: {no_token_id}")

ID for Yes: 9642, No: 2822


In [9]:
print(f"Probability for Yes: {torch.softmax(logits, dim=-1)[yes_token_id]}")
print(f"Probability for No: {torch.softmax(logits, dim=-1)[no_token_id]}")

Probability for Yes: 0.05030212178826332
Probability for No: 0.08026696741580963


In [17]:
print(f"Probability for Yes: {torch.softmax(logits, dim=-1)[yes_token_id]}")
print(f"Probability for No: {torch.softmax(logits, dim=-1)[no_token_id]}")

Probability for Yes: 0.668552815914154
Probability for No: 0.09124583750963211
