In [1]:
from google.cloud import bigquery
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"

client = bigquery.Client()

project_id = 'ingka-tugc-infra-prod'
dataset_id = 'eu_ai_content'
table_id = 'reviews'

table_ref = f'{project_id}.{dataset_id}.{table_id}'

# First 5 articles with under 1k reviews

articles_1000reviews = ['00577935', '30393063', '40577943', '50361792', '20393073']

article_id = articles_1000reviews[1]

query = f"""
    SELECT concat(title, '. ', text) as review_text
    FROM {table_ref}
    WHERE franchise='set-11' AND content_lang_code = 'en' AND art_id = '{article_id}'
"""

query_job = client.query(query)

reviews = [row['review_text'] for row in query_job]

print(f"Processing {len(reviews)} reviews")

Processing 1001 reviews


In [None]:
import os
from openai import AzureOpenAI
from utils.getSecret import get_secret

project = "923326131319"
secret  = "derai-azure"
api_key = get_secret(project, secret)

llm_client = AzureOpenAI(
    api_key=api_key,
    api_version="2023-07-01-preview",
    azure_endpoint="https://derai-vision.openai.azure.com/",
)

model = "gpt-4o" 

In [3]:
def get_topics(reviews):
    messages = [
        {
            "role": "system",
            "content": (
                "You are an helpful customer reviews expert that identifies the main topics discussed in a group of reviews.\n"
                "Use singular words unless a plural form is necessary.\n"                
                "Use only one word. 2 or 3 words can be used only when they are part of a composite word and are better to represent the idea of the topic (e.g.: Ease of use).\n"
                "If you identify a verb as a topic, use the noun version (e.g.: use 'Order' instead of 'Ordering').\n"
                "Generalize the topic word; for example, if you encounter 'Saleswoman' or 'Salesman', abstract it to 'Staff'.\n"
                "Provide the output as a comma-separated list of topics with the first letter capitalized.\n"
            ),
        },
        {
            "role": "user",
            "content": (
                "Read the following reviews and generate a maximum of 8 topics that are mentioned in the reviews.\n"
                "For each topic that you generate, indicate which reviews mention implicitly or explicitly that topic.\n"
                "ONLY return topics that are mentioned more than once, don't consider topics mentioned only in a couple of reviews.\n"
                "The topic names should be broad and general, for example Quality, Price, etc.\n"
                "The topics could be either nouns that refers to a certain characteristic of the product or spefic features or parts of the product (screws, cookware, etc.)\n"
                "First return all the topics that you identify as a comma-separated list, then for each of them return a few reviews that mention it.\n"
                f"Reviews: {', '.join(reviews)}\n"
                "Topics:"
            ),
        },
    ]

    response = ' '
    
    # Generate the topic word using the language model
    response = llm_client.chat.completions.create(
        model=model,
        messages=messages,
        max_tokens=1000,
        temperature=0.4,
        n=1,
        stop=None,
    )

    # Extract and return the topic word
    return response.choices[0].message.content.strip()

In [4]:
answer = get_topics(reviews)
print(answer)

Lid, Seal, Quality, Price, Fit, Breakage, Smell, Cleanliness

1. Lid
- The only lid in stock!  . I prefer the bamboo lids but this is all they had in stock. Tabs can break off (check them before you buy) and I avoid plastic food storage.
- Not holding the vacuum seal. I have been using similar round products for years and was very excited to find these to fit my rectangular and square IKEA glass containers. However, they do not hold the seal and are pretty useless. I am glad I only bought one of each to test.
- Lid alternative. Good alternative to the lids that crack after a few years, Love it.. Love it.
- Glass container with lid. This can be your lunch box and also container for putting into your microwave. It's only $2.
- Functional. We are buying this as a replacement. The previous lid is has a hinge that is slowly breaking. We think part of the problem comes from putting it through the dishwasher, but maybe not.
- Clip on lids easy to. Clip on lids easy to use
- Kids don’t fit wel

In [5]:
topics = answer.split('\n')[0].split(', ')
print(topics)

['Lid', 'Seal', 'Quality', 'Price', 'Fit', 'Breakage', 'Smell', 'Cleanliness']


In [6]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Define the embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Encode the topics
topic_embeddings = model.encode(topics, convert_to_numpy=True)

# Compute the cosine similarity matrix
similarity_matrix = cosine_similarity(topic_embeddings)

# Set a similarity threshold
similarity_threshold = 0.75

# Identify and remove similar topics
unique_topics = topics.copy()
for i in range(len(topics)):
    for j in range(i + 1, len(topics)):
        if similarity_matrix[i, j] > similarity_threshold:
            if topics[j] in unique_topics:
                unique_topics.remove(topics[j])
                
deleted_topics_count = len(topics) - len(unique_topics)
print("Number of deleted topics:", deleted_topics_count)
print("Unique topics:", unique_topics)
topics = unique_topics

  from .autonotebook import tqdm as notebook_tqdm


Number of deleted topics: 0
Unique topics: ['Lid', 'Seal', 'Quality', 'Price', 'Fit', 'Breakage', 'Smell', 'Cleanliness']


# Topic Identification Evaluation - Are those 8 topics good?

In [None]:
from openai import AzureOpenAI
prompt = f"""You are an AI review analyst. Your task is to analyze a collection of reviews and assess the following topics: "{topics}" 
            Go through all of the reviews and check that each topic is mentioned either explicitly or implicitly. 
            Once you read all the reviews, provide a rating for each topic (from 0 to 5) considering the relevance of the topic to the reviews.
            If a topic is not mentioned in the reviews, you should rate it as 0. If it is mentioned in a lot of reviews, you should rate it as 5.
            Reviews: "{', '.join(reviews)}" 
            Answer first with an array of scoring of the type 'Topic':'Rating' (e.g.: Quality: 5, Price: 5, ...) and after that, on a new line, 
            with a table containing the topics, their ratings and a brief justification on why that score has been assigned. 
            Remember that the score only indicated how much a topic is relevant for that group of reviews. 
            Do not add any additional text to the answer."""

ai_client = AzureOpenAI(
    api_key=api_key,
    api_version="2023-07-01-preview",
    azure_endpoint="https://derai-vision.openai.azure.com/",
)
result = ai_client.chat.completions.create(
    model="o1-mini",
    messages=[
        {"role": "user", "content": prompt},
    ],
)

print(result.choices[0].message.content)

["Lid":5, "Seal":5, "Quality":5, "Price":4, "Fit":4, "Breakage":4, "Smell":3, "Cleanliness":4]

| Topic       | Rating | Justification                                                                                                                                                                 |
|-------------|--------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Lid         | 5      | The majority of reviews discuss the lid’s functionality, ease of use, and design, indicating it is a central aspect of the product.                                           |
| Seal        | 5      | Numerous comments focus on the airtight and leakproof qualities of the seal, highlighting its importance and effectiveness.                                                    |
| Quality     | 5      | Reviews frequently mention both positive and negative aspects of quality

# Topic assignment - Labeling each review with 0 to N topics

## LLM 3-shot learning - GPT-4o

In [None]:
llm_client = AzureOpenAI(
    api_key=api_key,
    api_version="2023-07-01-preview",
    azure_endpoint="https://derai-vision.openai.azure.com/",
)

model = "gpt-4o" 

In [9]:
def get_reviews_labels_3shots(review, topics):
    messages = [
        {
            "role": "system",
            "content": (
                "You are a helpful customer reviews expert that identifies the main topics in a review.\n"
                "Provide the output as a comma-separated list of topics. The first letter of the topics should always be capitalized.\n"
            ),
        },
        # Example 1
        {
            "role": "user",
            "content": (
                "Read the following review and associate the topics mentioned implicitly or explicitly in the review.\n"
                "Only answer with the topics that are mentioned in the review. Example: ['price', 'quality']. \n"
                "If you cannot identify any topics, just return '[]' \n"
                "Review: 'The product arrived quickly, and the packaging was great. However, the price is too high.'\n"
                "Topics: ['Delivery', 'Packaging', 'Price', 'Quality']. DO NOT write any topics outside of this list. \n"
                "Topics mentioned within the review:"
            ),
        },
        {
            "role": "assistant",
            "content": "['Delivery', 'Packaging', 'Price']",
        },
        # Example 2
        {
            "role": "user",
            "content": (
                "Read the following review and associate the topics mentioned implicitly or explicitly in the review.\n"
                "Only answer with the topics that are mentioned in the review. Example: ['price', 'quality']. \n"
                "If you cannot identify any topics, just return '[]' \n"
                "Review: 'I love how comfortable these shoes are! They fit perfectly, and the material feels premium. Good price.'\n"
                "Topics: ['Comfort', 'Fit', 'Material', 'Design', 'Value']. DO NOT write any topics outside of this list.\n"
                "Topics mentioned within the review:"
            ),
        },
        {
            "role": "assistant",
            "content": "['Comfort', 'Fit', 'Material', 'Value']",
        },
        # Example 3
        {
            "role": "user",
            "content": (
                "Read the following review and associate the topics mentioned implicitly or explicitly in the review.\n"
                "Only answer with the topics that are mentioned in the review. Example: ['price', 'quality']. \n"
                "If you cannot identify any topics, just return '[]' \n"
                "Review: 'The app crashes frequently and is very slow. It needs major improvements.'\n"
                "Topics: ['Performance', 'Usability', 'Design']. DO NOT write any topics outside of this list. \n"
                "Topics mentioned within the review:"
            ),
        },
        {
            "role": "assistant",
            "content": "['Performance', 'Usability']",
        },
        {
            "role": "user",
            "content": (
                "Read the following review and associate the topics mentioned implicitly or explicitly in the review.\n"
                "Only answer with the topics that are mentioned in the review. Example: ['Price', 'Quality']. \n"
                "If you cannot identify any topics, just return '[]' \n"
                f"Review: '{review}' \n"
                f"Topics: {topics}. DO NOT write any topics outside of this list. \n"
                f"Topics mentioned within the review:"
            ),
        },
    ]

    response = ' '
    model = "gpt-4o" 
    # Generate the topic word using the language model
    response = llm_client.chat.completions.create(
        model=model,
        messages=messages,
        max_tokens=30,
        temperature=0.4,
        n=1,
        stop=None,
    )

    # Extract and return the topic word
    return response.choices[0].message.content.strip()

In [10]:
print(topics)

['Lid', 'Seal', 'Quality', 'Price', 'Fit', 'Breakage', 'Smell', 'Cleanliness']


In [11]:
import pandas as pd
import logging

results = []
for review in reviews:
    try:
        result = get_reviews_labels_3shots(review, topics)
        result = [topic for topic in eval(result) if topic in topics]
        results.append([review, result])
    except Exception as e:
        # Check for content filter issues
        if "content_filter" in str(e) or "ResponsibleAIPolicyViolation" in str(e):
            logging.error(f"Content filter triggered for review: {review} - Skipping.")
            results.append([review, []])  # Add review with empty topics
        else:
            # Log other exceptions
            logging.error(f"Error processing review: {review} - {e}")
            results.append([review, []])  # Add review with empty topics

# Transform results into a DataFrame
df = pd.DataFrame(results, columns=["review", "topics"])

# Save DataFrame to CSV
df.to_csv(f'csv/LLM3shots_{article_id}.csv', index=False)

ERROR:root:Content filter triggered for review: Quality lid . Nice tight snappy little fellow. . Skipping.


In [12]:
streamlit_csv = f'LLM3shots_{article_id}.csv'

In [13]:
"""import json
import pandas as pd
import ast

def load_ground_truth(csv_file):
    df = pd.read_csv(csv_file)
    # Convert the topics column (a string) into an actual list.
    df['topics'] = df['topics'].apply(ast.literal_eval)
    return df

def load_predictions(json_file):
    with open(json_file, "r", encoding="utf-8") as f:
        data = json.load(f)

    # Filter out entries that do not contain a 'review' key
    predictions = [entry for entry in data if "review" in entry]

    return predictions  # List of reviews with topics

def compute_precision_recall_f1(gt_topics, pred_topics):
    # True positives: the intersection of predicted and ground truth topics.
    true_positives = len(gt_topics & pred_topics)
    
    precision = true_positives / len(pred_topics) if pred_topics else 0.0
    recall = true_positives / len(gt_topics) if gt_topics else 0.0
    f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) else 0.0
    
    return precision, recall, f1

def safe_literal_eval(val):
    try:
        if isinstance(val, str):
            return ast.literal_eval(val)
        return val  # If it's already a list, return as is.
    except (ValueError, SyntaxError):
        print(f"Warning: Could not parse topics: {val}")
        return []  # Return an empty list if parsing fails.

# Load the CSV file
gt_df = pd.read_csv("csv/ground_truth.csv")

# Apply the safe conversion
gt_df['topics'] = gt_df['topics'].apply(safe_literal_eval)


predictions = load_predictions(json_name)

# List the models that we want to evaluate (must match keys in the JSON)
models = [
    "deberta_v3_topics",
    "nli_deberta_v3_topics",
    "bart_large_mnli_topics",
    "embedding_similarity_topics",
    "word_embedding_similarity_topics",
    "LLM_topics",
    "LLM_3shots_topics"
]

# Initialize an accumulator for metrics per model.
metrics = {model: {"precision": [], "recall": [], "f1": []} for model in models}

# Check that the number of reviews is the same in both files.
if len(gt_df) != len(predictions):
    print("len(gt_df),", len(gt_df))
    print("len(predictions),", len(predictions))
    raise ValueError("The number of reviews in the CSV and JSON files do not match!")

# Iterate over the reviews (assumed aligned by index)
for idx, pred_entry in enumerate(predictions):
    # For consistency, we compare topics in lowercase with whitespace stripped.
    gt_topics = {topic.lower().strip() for topic in gt_df.iloc[idx]["topics"]}
    
    for model in models:
        # In the JSON predictions, if a model has produced topics,
        # we take the keys (ignoring the scores) as the predicted topics.
        pred_model_dict = pred_entry.get(model, {})
        pred_topics = {topic.lower().strip() for topic in pred_model_dict.keys()}
        
        precision, recall, f1 = compute_precision_recall_f1(gt_topics, pred_topics)
        
        metrics[model]["precision"].append(precision)
        metrics[model]["recall"].append(recall)
        metrics[model]["f1"].append(f1)

# Compute the average scores for each model and print them
for model in models:
    avg_precision = sum(metrics[model]["precision"]) / len(metrics[model]["precision"])
    avg_recall = sum(metrics[model]["recall"]) / len(metrics[model]["recall"])
    avg_f1 = sum(metrics[model]["f1"]) / len(metrics[model]["f1"])
    print(f"Model: {model}")
    print(f"  Precision: {avg_precision:.3f}")
    print(f"  Recall:    {avg_recall:.3f}")
    print(f"  F1 Score:  {avg_f1:.3f}")
    print("-" * 30)"""



In [16]:
!streamlit run app.py

[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://192.168.178.207:8501[0m
[0m
[34m[1m  For better performance, install the Watchdog module:[0m

  $ xcode-select --install
  $ pip install watchdog
            [0m
^C
[34m  Stopping...[0m
Exception ignored in: <module 'threading' from '/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py'>
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py", line 1594, in _shutdown
    atexit_call()
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/concurrent/futures/thread.py", line 31, in _python_exit
    t.join()
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py", line 1149, in join
    self._wait_for_tstate_lock()
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib