Author: Grzegorz Zbrzeżny

In [135]:
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

**Prompt used for all the models:**
given: reviews = [
    "Keeps my coffee hot for hours—just what I need for long workdays. Thanks, Contigo.",
    "The lid isn’t leak-proof, but it keeps drinks warm for a decent amount of time.",
    "Love the sleek design of my Zojirushi mug, and it fits perfectly in my car cup holder!",
    "It’s lightweight but keeps my drinks at the right temperature for hours with Hydro Flask.",    "My dishes come out sparkling clean every time with Cascade. Love this detergent!",
    "It works well on glass, but I’ve noticed spots on my silverware.",
    "Great for tough, greasy messes. Leaves no residue! Thanks, Finish.",     "Absorbs quickly and doesn’t leave a greasy residue. Great for daily use with Neutrogena.",
    "The scent is a little strong, but it protects well with Coppertone.",
    "Perfect for sensitive skin! No breakouts or irritation with La Roche-Posay.",]
    Analyze the following review text and extract the following information:
    - Product category
    - Brand name if mentioned
    - Key attributes or descriptors
    Review: "{review}"
    Please return the result in JSON format as:
    {{
        "product_category": "your answer",
        "brand": "your answer",
        "key_descriptors": ["descriptor1", "descriptor2", ...]
    }}

In [159]:
reviews = [
    "Keeps my coffee hot for hours—just what I need for long workdays. Thanks, Contigo.",
    "The lid isn’t leak-proof, but it keeps drinks warm for a decent amount of time.",
    "Love the sleek design of my Zojirushi mug, and it fits perfectly in my car cup holder!",
    "It’s lightweight but keeps my drinks at the right temperature for hours with Hydro Flask.",
    "My dishes come out sparkling clean every time with Cascade. Love this detergent!",
    "It works well on glass, but I’ve noticed spots on my silverware.",
    "Great for tough, greasy messes. Leaves no residue! Thanks, Finish.",
    "Absorbs quickly and doesn’t leave a greasy residue. Great for daily use with Neutrogena.",
    "The scent is a little strong, but it protects well with Coppertone.",
    "Perfect for sensitive skin! No breakouts or irritation with La Roche-Posay."
]

# Output generated from Chat GPT-4o using OpenAI UI, since Python API is not free :(
gpt_llm_output = [
    {
        "product_category": "Travel Mugs",
        "brand": "Contigo",
        "key_descriptors": ["keeps coffee hot", "for hours", "long workdays"]
    },
    {
        "product_category": "Travel Mugs",
        "brand": "Unknown",
        "key_descriptors": ["not leak-proof", "keeps drinks warm", "decent amount of time"]
    },
    {
        "product_category": "Travel Mugs",
        "brand": "Zojirushi",
        "key_descriptors": ["sleek design", "fits in car cup holder"]
    },
    {
        "product_category": "Travel Mugs",
        "brand": "Hydro Flask",
        "key_descriptors": ["lightweight", "keeps drinks at right temperature", "for hours"]
    },
    {
        "product_category": "Dish Detergents",
        "brand": "Cascade",
        "key_descriptors": ["sparkling clean", "every time"]
    },
    {
        "product_category": "Dish Detergents",
        "brand": "Unknown",
        "key_descriptors": ["works well on glass", "noticing spots on silverware"]
    },
    {
        "product_category": "Dish Detergents",
        "brand": "Finish",
        "key_descriptors": ["great for tough messes", "leaves no residue"]
    },
    {
        "product_category": "Moisturizer",
        "brand": "Neutrogena",
        "key_descriptors": ["absorbs quickly", "doesn't leave greasy residue", "great for daily use"]
    },
    {
        "product_category": "Sunscreen",
        "brand": "Coppertone",
        "key_descriptors": ["strong scent", "protects well"]
    },
    {
        "product_category": "Moisturizer",
        "brand": "La Roche-Posay",
        "key_descriptors": ["perfect for sensitive skin", "no breakouts", "no irritation"]
    }
]


# Output from claude-3-5-sonnet-20241022
claude_llm_output = [
  {
  "product_category": "Thermal Mugs",
  "brand": "Contigo",
  "key_descriptors": ["keeps hot for hours", "suitable for workdays"]
  },
  {
  "product_category": "Thermal Mugs",
  "brand": "Unknown",
  "key_descriptors": ["not leak-proof", "keeps drinks warm", "decent duration"]
  },
  {
  "product_category": "Thermal Mugs",
  "brand": "Zojirushi",
  "key_descriptors": ["sleek design", "fits car cup holder"]
  },
  {
  "product_category": "Thermal Mugs",
  "brand": "Hydro Flask",
  "key_descriptors": ["lightweight", "temperature retention", "long-lasting"]
  },
  {
  "product_category": "Dishwasher Detergents",
  "brand": "Cascade",
  "key_descriptors": ["sparkling clean", "consistent results"]
  },
  {
  "product_category": "Dishwasher Detergents",
  "brand": "Unknown",
  "key_descriptors": ["good for glass", "spots on silverware"]
  },
  {
  "product_category": "Dishwasher Detergents",
  "brand": "Finish",
  "key_descriptors": ["handles tough grease", "no residue"]
  },
  {
  "product_category": "Skincare Products",
  "brand": "Neutrogena",
  "key_descriptors": ["quick absorption", "non-greasy", "daily use"]
  },
  {
  "product_category": "Sunscreen",
  "brand": "Coppertone",
  "key_descriptors": ["strong scent", "good protection"]
  },
  {
  "product_category": "Skincare Products",
  "brand": "La Roche-Posay",
  "key_descriptors": ["suitable for sensitive skin", "no breakouts", "no irritation"]
  }
]

# Output from gemini-1.5-pro-002
gemini_llm_output = [
{
  "product_category": "Coffee Thermos/Travel Mug",
  "brand": "Contigo",
  "key_descriptors": ["keeps coffee hot for hours", "long workdays"]
},
{
  "product_category": "Coffee Thermos/Travel Mug",
  "brand": "Unknown",
  "key_descriptors": ["lid isn’t leak-proof", "keeps drinks warm"]
},
{
  "product_category": "Coffee Thermos/Travel Mug",
  "brand": "Zojirushi",
  "key_descriptors": ["sleek design", "fits in car cup holder"]
},
{
  "product_category": "Coffee Thermos/Travel Mug",
  "brand": "Hydro Flask",
  "key_descriptors": ["lightweight", "keeps drinks at the right temperature for hours"]
},
{
  "product_category": "Dish Detergent",
  "brand": "Cascade",
  "key_descriptors": ["sparkling clean dishes", "every time"]
},
{
  "product_category": "Dish Detergent",
  "brand": "Unknown",
  "key_descriptors": ["works well on glass", "spots on silverware"]
},
{
  "product_category": "Dish Detergent",
  "brand": "Finish",
  "key_descriptors": ["tough, greasy messes", "leaves no residue"]
},
{
  "product_category": "Skincare/Moisturizer",
  "brand": "Neutrogena",
  "key_descriptors": ["absorbs quickly", "doesn’t leave a greasy residue", "daily use"]
},
{
  "product_category": "Sunscreen",
  "brand": "Coppertone",
  "key_descriptors": ["strong scent", "protects well"]
},
{
  "product_category": "Skincare/Face Moisturizer",
  "brand": "La Roche-Posay",
  "key_descriptors": ["sensitive skin", "no breakouts or irritation"]
}
]

In [160]:
def get_embeddings(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings

In [161]:
def calc_similarities(model_output, model_name):
  df = pd.DataFrame(model_output)
  df['review'] = reviews

  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
  model = AutoModel.from_pretrained("distilbert-base-uncased")

  product_categories = [str(i) for i in df['product_category'].unique()]
  product_embeddings = get_embeddings(product_categories)
  review_embeddings = get_embeddings(reviews)

  similarity_matrix = cosine_similarity(review_embeddings, product_embeddings)

  similarity_df = pd.DataFrame(similarity_matrix, index=reviews, columns=product_categories)
  print(f"\nSimilarity Scores between Reviews and Product Categories for model {model_name}:")

  for category in product_categories:
      similar_reviews = similarity_df[category].nlargest(3)
      print(f"\nTop similar reviews for category '{category}':")
      print(similar_reviews)


### GPT-40

In [156]:
calc_similarities(gpt_llm_output, "GPT-4o")




Similarity Scores between Reviews and Product Categories for model GPT-4o:

Top similar reviews for category 'Travel Mugs':
Love the sleek design of my Zojirushi mug, and it fits perfectly in my car cup holder!       0.683088
It’s lightweight but keeps my drinks at the right temperature for hours with Hydro Flask.    0.676094
It works well on glass, but I’ve noticed spots on my silverware.                             0.672872
Name: Travel Mugs, dtype: float32

Top similar reviews for category 'Dish Detergents':
My dishes come out sparkling clean every time with Cascade. Love this detergent!            0.710427
Absorbs quickly and doesn’t leave a greasy residue. Great for daily use with Neutrogena.    0.709854
The scent is a little strong, but it protects well with Coppertone.                         0.668281
Name: Dish Detergents, dtype: float32

Top similar reviews for category 'Moisturizer':
Absorbs quickly and doesn’t leave a greasy residue. Great for daily use with Neutrogena.    

### Claude

In [162]:
calc_similarities(claude_llm_output, "Claude")




Similarity Scores between Reviews and Product Categories for model Claude:

Top similar reviews for category 'Thermal Mugs':
It’s lightweight but keeps my drinks at the right temperature for hours with Hydro Flask.    0.721836
Perfect for sensitive skin! No breakouts or irritation with La Roche-Posay.                  0.705778
Great for tough, greasy messes. Leaves no residue! Thanks, Finish.                           0.703471
Name: Thermal Mugs, dtype: float32

Top similar reviews for category 'Dishwasher Detergents':
My dishes come out sparkling clean every time with Cascade. Love this detergent!            0.725572
Absorbs quickly and doesn’t leave a greasy residue. Great for daily use with Neutrogena.    0.722025
The lid isn’t leak-proof, but it keeps drinks warm for a decent amount of time.             0.681905
Name: Dishwasher Detergents, dtype: float32

Top similar reviews for category 'Skincare Products':
Perfect for sensitive skin! No breakouts or irritation with La Roche-Pos

### Gemini

In [163]:
calc_similarities(gemini_llm_output, "Gemini")




Similarity Scores between Reviews and Product Categories for model Gemini:

Top similar reviews for category 'Coffee Thermos/Travel Mug':
It’s lightweight but keeps my drinks at the right temperature for hours with Hydro Flask.    0.782012
Keeps my coffee hot for hours—just what I need for long workdays. Thanks, Contigo.           0.743864
Love the sleek design of my Zojirushi mug, and it fits perfectly in my car cup holder!       0.728084
Name: Coffee Thermos/Travel Mug, dtype: float32

Top similar reviews for category 'Dish Detergent':
My dishes come out sparkling clean every time with Cascade. Love this detergent!            0.744092
Perfect for sensitive skin! No breakouts or irritation with La Roche-Posay.                 0.701452
Absorbs quickly and doesn’t leave a greasy residue. Great for daily use with Neutrogena.    0.695096
Name: Dish Detergent, dtype: float32

Top similar reviews for category 'Skincare/Moisturizer':
Absorbs quickly and doesn’t leave a greasy residue. Great