# DEMO 2: Generate product review

## Imports

In [24]:
# load custom modules ------------------------------------------------------------------------------
import os
import importlib
import helpers
import prompting
importlib.reload(helpers)
importlib.reload(prompting)

# suppress all warnings for the demo
import warnings
warnings.filterwarnings("ignore")

# constants and paths ------------------------------------------------------------------------------
DATA_PROCESSED_PKL = 'pickle/data_processed.pkl'
DATA_CLUSTERED_PKL = 'pickle/data_clustered.pkl'
DATA_SA_PKL = 'pickle/data_sentiment_analysis.pkl'
DATA_SCORED_PKL = 'pickle/data_scored.pkl'
SEP = 100 * '-'


## Load model

In [2]:
# load HugginFace token to environment
if not os.environ.get('HF_TOKEN'):
    os.environ['HF_TOKEN'] = input('Enter API token for Hugging Face: ')
else:
    print('Hugging Face token already loaded to environment')

# load the model and tokenizer
model, tokenizer = prompting.load_model_and_tokenizer()


tokenizer_config.json:   0%|          | 0.00/137k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

## Review best product per category

In [25]:
def review_best_product_by_category(
        model: object,
        tokenizer: object,
        category: str,
        n: int = 10
) -> dict:
    """Review the best product in a category based on n reviews.

    Args:
        model (object): The model instance to use for inference.
        tokenizer (object): The tokenizer instance to use for inference.
        category (str): The category to extract the best product from.
        n (int, optional): Number of reviews to use for the final review. Defaults to 10.

    Returns:
        str: The full review packed inside a dict.
    """
    
    print(f"{SEP}\nCATEGORY: {category}\n")
    
    # get the name of the top product in the category
    top_product_name = helpers.get_top_products_per_category(category, 1)[0]
    print(f"{SEP}\nTOP PRODUCT NAME: {top_product_name}\n")
    
    # get n reviews for the top product
    sample_reviews = helpers.sample_product_reviews(top_product_name, category, n)

    print(f"{SEP}\nINFERENCE 1 - SAMPLE OF PRODUCT REVIEWS:\n")

    # load review text from pickle if available
    pickle_path = os.path.join('pickle', f"reviews_{category.lower().replace(' ', '_')}.pkl")

    if os.path.exists(pickle_path):
        review_text = helpers.load_pickled_reviews(pickle_path)
        print(f"{review_text}")

    else:
        # summarize all reviews and build review_text
        review_text = ""
        for i, review in enumerate(sample_reviews):
            # infer model and summarize review
            review_summary = f"[Review {i + 1}]: {prompting.generate_review_summary(model, tokenizer, review)}"
            print(review_summary)
            review_text += review_summary + '\n'

        # pickle the review text
        helpers.pickle_list_reviews(review_text, pickle_path)

    # generate recurring ideas
    recurring_ideas = prompting.generate_reviews_recurring_ideas(model, tokenizer, review_text, max_tokens=75)
    print(f"{SEP}\nINFERENCE 2 - RECURRING IDEAS:\n")
    for idea in recurring_ideas.split('\n'):
        print(f"- {idea}")

    # generate final review
    review_title, product_review = prompting.generate_final_review(model, tokenizer, top_product_name, recurring_ideas, max_tokens=150)
    print(f"\n{SEP}\nINFERENCE 3 - FINAL REVIEW:\n")

    review_dict = {}
    review_dict["category"] = category
    review_dict["product"] = top_product_name
    review_dict["title"] = review_title
    review_dict["review"] = product_review

    return review_dict


if __name__ == "__main__":

    # get the unique categories
    categories = helpers.get_categories_from_dataset()

    # review the best product for a given category
    category = categories[0]

    number_of_reviews = 10
    review_dict = review_best_product_by_category(model, tokenizer, category, n=10)

    for k, v in review_dict.items():
        print(f"{k.upper()}: {v}")
        print()

----------------------------------------------------------------------------------------------------
CATEGORY: Accessories & Adapters

----------------------------------------------------------------------------------------------------
TOP PRODUCT NAME: AmazonBasics AAA Performance Alkaline Batteries (36 Count)

----------------------------------------------------------------------------------------------------
INFERENCE 1 - SAMPLE OF PRODUCT REVIEWS:

Reviews loaded from pickle/reviews_accessories_&_adapters.pkl.
[Review 1]: No leakage or discharge like some cheap batteries. Work great!
[Review 2]: Long-lasting batteries that are comparable to name brands.
[Review 3]: Great batteries for a great price. Long-lasting and good for the holidays.
[Review 4]: Great bang for the buck. Amazon batteries.
[Review 5]: Works perfectly, but durability is unknown.
[Review 6]: Not bad but not as long-lasting as top brands. For the price, okay, but will stick with top brands in the future.
[Review 7]