Objective:
- Scrape public product reviews from BestBuy Canada
- Apply filters and pagination
- Extract structured review fields
- Perform sentiment analysis using TensorFlow
- Generate insights for business stakeholders


In [1]:
# Web requests, data handling, and datetime
import requests
import pandas as pd
from datetime import datetime
import time

# Transformers for sentiment analysis
from transformers import pipeline

# ---------------- CONFIG ----------------

# BestBuy Product ID
PRODUCT_ID = 19320385

# API endpoint
BASE_URL = f"https://www.bestbuy.ca/api/reviews/v2/products/{PRODUCT_ID}/reviews"

# Reviews per page
PAGE_SIZE = 25

# Sorting filter: relevancy, newest, highestRating, lowestRating, mostHelpful
SORT_BY = "relevancy"

# Parameters for API
params = {
    "source": "all",
    "lang": "en-CA",
    "pageSize": PAGE_SIZE,
    "page": 1,
    "sortBy": SORT_BY,
    "hasPhotosFilter": "false",
}

# Headers to mimic a browser (avoid blocks)
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36",
    "Accept": "application/json, text/plain, */*",
    "Referer": "https://www.bestbuy.ca/",
    "Origin": "https://www.bestbuy.ca",
    "Connection": "keep-alive"
}

# Placeholder for all reviews
all_reviews = []


In [2]:
# ---------------- SCRAPING REVIEWS ----------------

while True:
    print(f"Fetching page {params['page']} ...")

    response = requests.get(BASE_URL, params=params, headers=headers, verify=False)

    if response.status_code != 200:
        print("Error fetching page:", response.status_code)
        break

    data = response.json()

    if "reviews" not in data or len(data["reviews"]) == 0:
        break

    for r in data["reviews"]:
        review_id = r.get("reviewId")
        title = r.get("title", "").strip()
        text = r.get("reviewText", "").strip()
        raw_rating = r.get("rating", 0)
        reviewer = r.get("nickname", "").strip()
        date_raw = r.get("creationDate", "")

        try:
            date_parsed = datetime.strptime(date_raw[:10], "%Y-%m-%d").strftime("%Y-%m-%d")
        except:
            date_parsed = date_raw

        all_reviews.append({
            "Primary Key": review_id,
            "Title": title,
            "Review Text": text,
            "Date": date_parsed,
            "Rating": raw_rating,
            "Source": "BestBuy Canada",
            "Reviewer Name": reviewer
        })

    # Move to next page
    params["page"] += 1
    time.sleep(1)  # polite delay to avoid rate limits

print(f"Total reviews fetched: {len(all_reviews)}")

# Save raw reviews CSV
df_reviews = pd.DataFrame(all_reviews)
df_reviews.to_csv("bestbuy_reviews_api.csv", index=False)
print("Saved raw reviews => bestbuy_reviews_api.csv")
df_reviews.head()


Fetching page 1 ...




Fetching page 2 ...




Fetching page 3 ...




Fetching page 4 ...




Fetching page 5 ...




Fetching page 6 ...




Fetching page 7 ...




Fetching page 8 ...




Fetching page 9 ...




Fetching page 10 ...




Fetching page 11 ...




Fetching page 12 ...




Fetching page 13 ...




Fetching page 14 ...




Fetching page 15 ...




Fetching page 16 ...




Total reviews fetched: 351
Saved raw reviews => bestbuy_reviews_api.csv


Unnamed: 0,Primary Key,Title,Review Text,Date,Rating,Source,Reviewer Name
0,,Great replacement for Apple AirPod Max,,,5,BestBuy Canada,
1,,I chose Sony over Bose. And I'm glad I did!!,,,5,BestBuy Canada,
2,,Fantastic Headphones with a serious flaw,,,3,BestBuy Canada,
3,,Moderate improvement over the XM4,,,4,BestBuy Canada,
4,,Ten out of 10,,,5,BestBuy Canada,


In [3]:
# ---------------- SENTIMENT ANALYSIS ----------------

print("Loading reviews for sentiment analysis...")
df = pd.read_csv("bestbuy_reviews_api.csv")
print("Total Reviews:", len(df))

print("Loading Sentiment Model (TensorFlow)...")
sentiment_model = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    framework="tf"
)

results = []

for i, text in enumerate(df["Review Text"]):
    try:
        if isinstance(text, str) and len(text.strip()) > 0:
            # Truncate to first 512 tokens for transformer
            result = sentiment_model(text[:512])[0]
            results.append(result["label"])
        else:
            results.append("NEUTRAL")

        if i % 20 == 0:
            print(f"Processed {i} reviews")
    except:
        results.append("ERROR")

df["Sentiment"] = results

# Save CSV with sentiment
output_file = "bestbuy_reviews_with_sentiment.csv"
df.to_csv(output_file, index=False)
print("Saved sentiment reviews =>", output_file)
df.head()


Loading reviews for sentiment analysis...
Total Reviews: 351
Loading Sentiment Model (TensorFlow)...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/104 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Processed 0 reviews
Processed 20 reviews
Processed 40 reviews
Processed 60 reviews
Processed 80 reviews
Processed 100 reviews
Processed 120 reviews
Processed 140 reviews
Processed 160 reviews
Processed 180 reviews
Processed 200 reviews
Processed 220 reviews
Processed 240 reviews
Processed 260 reviews
Processed 280 reviews
Processed 300 reviews
Processed 320 reviews
Processed 340 reviews
Saved sentiment reviews => bestbuy_reviews_with_sentiment.csv


Unnamed: 0,Primary Key,Title,Review Text,Date,Rating,Source,Reviewer Name,Sentiment
0,,Great replacement for Apple AirPod Max,,,5,BestBuy Canada,,NEUTRAL
1,,I chose Sony over Bose. And I'm glad I did!!,,,5,BestBuy Canada,,NEUTRAL
2,,Fantastic Headphones with a serious flaw,,,3,BestBuy Canada,,NEUTRAL
3,,Moderate improvement over the XM4,,,4,BestBuy Canada,,NEUTRAL
4,,Ten out of 10,,,5,BestBuy Canada,,NEUTRAL
