using google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Then load your file from Drive



Mounted at /content/drive


##summariaztion model

In [None]:
# Install and import
!pip install --upgrade openai
import pandas as pd
from collections import defaultdict
from openai import OpenAI

# Load the data
df = pd.read_csv('/content/drive/MyDrive/cleaned_reviews.csv')  # Adjust path if needed

# Step 1: Convert to structured review data
def convert_df_to_json(df):
    grouped_data = defaultdict(lambda: {"reviews": [], "category": ""})

    for _, row in df.iterrows():
        product = row.get('name')
        category = str(row.get('categories')).split(',')[0] if pd.notnull(row.get('categories')) else "Misc"
        review_text = row.get('reviews.text')
        rating = row.get('reviews.rating')

        if pd.notnull(review_text) and pd.notnull(rating):
            grouped_data[product]["reviews"].append({
                "text": str(review_text),
                "rating": float(rating)
            })
            grouped_data[product]["category"] = category

    structured_reviews = []
    for product, details in grouped_data.items():
        if len(details["reviews"]) >= 2:
            ratings = [r["rating"] for r in details["reviews"]]
            texts = [r["text"] for r in details["reviews"]]

            structured_reviews.append({
                "product_name": product,
                "category": details["category"],
                "avg_rating": sum(ratings) / len(ratings),
                "top_pros": [t for t in texts if "good" in t.lower() or "great" in t.lower()][:2],
                "top_complaints": [t for t in texts if "bad" in t.lower() or "disappoint" in t.lower()][:2]
            })
    return structured_reviews

# Step 2: Group insights by category
def group_by_category(insights):
    category_map = defaultdict(list)
    for item in insights:
        category_map[item['category']].append(item)
    return category_map

# Step 3: Build insight string for GPT
def build_insight_string(products):
    sorted_products = sorted(products, key=lambda x: x["avg_rating"], reverse=True)
    top_3 = sorted_products[:3]
    worst = sorted_products[-1]

    insights_str = ""
    for idx, p in enumerate(top_3, 1):
        insights_str += f"""{idx}. {p['product_name']} - Rating: {p['avg_rating']:.2f}
Key Pros: {", ".join(p['top_pros']) or "N/A"}
Top Complaints: {", ".join(p['top_complaints']) or "N/A"}\n\n"""

    insights_str += f"Worst Product:\n{worst['product_name']} - Rating: {worst['avg_rating']:.2f}\n"
    insights_str += f"Complaints: {', '.join(worst['top_complaints']) or 'N/A'}"

    return insights_str

# Step 4: GPT-4 article generation
client = OpenAI(api_key="my key")

def generate_article(category, insights):
    prompt = f"""
You are a professional tech writer.

Write a clear, structured summary about the product category: "{category}".

The summary should include:
1. Top 3 products in this category and their key differences.
2. Top complaints for each of those top 3 products.
3. The worst product and why users should avoid it.

Make it professional, insightful, and easy to read — no bullet points or lists.

Here are the insights to use:
{insights}

Now write the article.
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
        max_tokens=900
    )
    return response.choices[0].message.content

# Step 5: Run for each category
structured = convert_df_to_json(df)
grouped = group_by_category(structured)

for category, products in grouped.items():
    if len(products) < 2:
        continue  # Skip if not enough products in the category

    insights = build_insight_string(products)
    article = generate_article(category, insights)

    print(f"\n{'='*80}\n📘 Category: {category}\n{'='*80}\n")
    print(article)



📘 Category: E-Readers & Home

In the product category of "E-Readers & Home", a variety of items are available that cater to different needs and uses in both professional and personal settings. The top three products in this category have been identified, along with their key differences, top complaints, and a product that users should avoid.

The Expanding Accordion File Folder Plastic Portable Document Organizer Letter Size is a standout product with a perfect rating of 5.00. Its key feature is its expandability, which provides a practical solution for organizing files in one compact and portable system. Users commend the product for its high-quality material and cost-effectiveness, alongside its portability, making it a recommended choice for those seeking an efficient and reliable document organizer. Remarkably, there have been no recorded complaints about this product, signifying a high level of customer satisfaction.

Secondly, the AmazonBasics AAA Performance Alkaline Batteries 