In [5]:
import pandas as pd
import random
import os
from datetime import datetime

file_path = "amazon.csv"

# Load existing data
df = pd.read_csv(file_path)

# Clean column names (important!)
df.columns = df.columns.str.strip()

# Get last review_id number
df["review_id_num"] = df["review_id"].str.extract(r'(\d+)').astype(int)
last_review_id = df["review_id_num"].max()

products = df["product_id"].unique()

review_titles = [
    "Great product", "Worth the money", "Average quality",
    "Not satisfied", "Excellent!", "Would not recommend"
]

review_texts = [
    "The product works as expected.",
    "Quality could be better.",
    "Exceeded my expectations.",
    "Stopped working after a week.",
    "Very good value for money.",
    "Packaging was damaged but product is fine."
]

num_new = random.randint(10, 20)
new_rows = []

for i in range(num_new):
    product_id = random.choice(products)

    new_rows.append({
        "product_id": product_id,
        "product_name": df[df["product_id"] == product_id]["product_name"].iloc[0],
        "category": df[df["product_id"] == product_id]["category"].iloc[0],
        "discounted_price": df[df["product_id"] == product_id]["discounted_price"].iloc[0],
        "actual_price": df[df["product_id"] == product_id]["actual_price"].iloc[0],
        "discount_percentage": df[df["product_id"] == product_id]["discount_percentage"].iloc[0],
        "rating": round(random.uniform(1, 5), 1),
        "rating_count": None,  # will recalc later
        "about_product": df[df["product_id"] == product_id]["about_product"].iloc[0],
        "user_id": f"U{random.randint(10000,99999)}",
        "user_name": f"user_{random.randint(1000,9999)}",
        "review_id": f"R{last_review_id + i + 1}",
        "review_title": random.choice(review_titles),
        "review_content": random.choice(review_texts),
        "img_link": df[df["product_id"] == product_id]["img_link"].iloc[0],
        "product_link": df[df["product_id"] == product_id]["product_link"].iloc[0]
    })

new_df = pd.DataFrame(new_rows)

# Append new data
df = pd.concat([df.drop(columns=["review_id_num"]), new_df], ignore_index=True)

# Recalculate rating count per product
rating_counts = df.groupby("product_id")["rating"].count()
df["rating_count"] = df["product_id"].map(rating_counts)

# Save back to CSV
df.to_csv(file_path, index=False)

#print to indicate done
print('done')

done
