In [4]:
import openai
import json
import pandas as pd
import random
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import nltk

# Đọc file JSON chứa các review
df = pd.read_csv('amazon-reviews/cleaned_reviews.csv')
print(df["cleaned_review"].head())

0    i wish would have gotten one earlier love it a...
1    i ve learned this lesson again open the packag...
2            it is so slow and lags find better option
3    roller ball stopped working within months of m...
4    i like the color and size but it few days out ...
Name: cleaned_review, dtype: object


In [None]:
reviews = df['cleaned_review'].dropna().astype(str).tolist()

## Loại bỏ reviews quá ngắn
def chunk_reviews(reviews, chunk_size=10):
    for i in range(0, len(reviews), chunk_size):
        yield reviews[i:i + chunk_size]

sample_size = 20  # Số lượng review muốn lấy mẫu, có thể điều chỉnh
if len(reviews) > sample_size:
    reviews = random.sample(reviews, sample_size)

# Tiền xử lý: Loại bỏ stopwords, từ đồng nghĩa, ký tự đặc biệt, cú pháp không cần thiết mà vẫn giữ được bản chất ban đầu

# Tải stopwords nếu chưa có
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_review(review):
    # Loại bỏ ký tự đặc biệt và số
    review = re.sub(r'[^a-zA-Z\s]', '', review)
    # Chuyển về chữ thường
    review = review.lower()
    # Tách từ
    words = review.split()
    # Loại bỏ stopwords và lemmatize
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    # Ghép lại thành câu
    return ' '.join(words)

reviews = [preprocess_review(r) for r in reviews]

print("Processed reviews:", reviews)

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/hoangtuan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /home/hoangtuan/nltk_data...




In [None]:
# new
from dotenv import load_dotenv
import os
load_dotenv()
from openai import AsyncOpenAI
api_key = os.getenv("OPENAI_API_KEY")
client = AsyncOpenAI(api_key=api_key)  # Never share your real API key in code!

async def summarize_features_for_buyer(reviews):
    prompt = f"""
        You will be given a batch of product reviews. Summarize them by extracting:

        - Overall sentiment (positive, negative, mixed)
        - Common PROS: list 3–5 things customers often praised
        - Common CONS: list 2–3 frequent complaints
        - Optional: 1–2 short, representative quotes
        - Final verdict: Should a potential buyer feel confident in the product?

        Here are the reviews:
        ---
        {reviews}
        ---
    """
    completion = await client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=300,
        temperature=0.3
    )
    return completion.choices[0].message.content.strip()

summary = await summarize_features_for_buyer(reviews)
print("For buyer", summary)
# ...existing code...

Overall sentiment: Mixed

Common PROS:
1. Good gaming PC
2. Lightweight and easy to use
3. Awesome customer service
4. Good headset performance
5. Fantastic sounding speaker with a lot of volume

Common CONS:
1. Wireless mouse frequently drops connection
2. Sporadic laser mouse behavior
3. Headphone and speaker issues after a short period of use

Optional quotes:
- "Fantastic sounding speaker with a lot of volume."
- "Love the new mouse, small and light, easy to use."

Final verdict: A potential buyer may feel hesitant about the product due to the mixed reviews and issues reported with the wireless mouse and headphone/speaker. It may be worth considering other options before making a purchase.


In [15]:
async def summarize_features_for_merchant(reviews):
    prompt = f"""
        You are an assistant for a business owner who wants to improve their product based on customer feedback.

Given a list of customer reviews, summarize the most common positive and negative opinions. Group the feedback into key aspects (e.g., battery, design, performance, price, etc.). Then, give clear suggestions to improve the product based on customer concerns.

Reviews:
        ---
        {reviews}
        ---
    """
    completion = await client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=300,
        temperature=0.3
    )
    return completion.choices[0].message.content.strip()

summary = await summarize_features_for_merchant(reviews)
print("For merchant", summary)
# ...existing code...

For merchant Positive feedback:
- Customers appreciate the good gaming experience and the lightweight design of the product.
- Some customers mentioned the fantastic sound quality of the speaker.
- The mouse is described as small, light, easy to use, and with smooth gliding.

Negative feedback:
- Some customers experienced issues with connectivity, such as the mouse dropping connection frequently.
- There were complaints about the mouse not tracking properly after a short period of use.
- A few customers received incorrect or used products, leading to disappointment.

Suggestions for improvement:
- Improve the connectivity issues with the mouse to ensure a consistent and reliable performance.
- Address the quality control issues to prevent customers from receiving incorrect or used products.
- Consider enhancing the comfort and support of the headset for a better user experience.
- Provide a more durable and long-lasting solution for the headphone speaker to prevent shorting out.
- Off