In [1]:
# notebooks/Ulta_Web_Scrape_SQL_Analysis.ipynb

import pandas as pd
import sqlalchemy
import os
from sqlalchemy import text
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Read DB credentials
DB_USER = os.getenv("DB_USER")
DB_PASS = os.getenv("DB_PASS")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")

# Connect to AWS RDS PostgreSQL instance
engine = sqlalchemy.create_engine(
    f"postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)

# Query 1: Most Reviewed Products
query_most_reviewed = '''
SELECT 
    product,
    COUNT(*) AS total_reviews
FROM sql_project.ulta_reviews
GROUP BY product
ORDER BY total_reviews DESC
LIMIT 10;
'''

most_reviewed_df = pd.read_sql(query_most_reviewed, engine)
most_reviewed_df.to_csv("../../Data/Retrieve/ulta_most_reviewed.csv", index=False)
print("✅ Exported: ulta_most_reviewed.csv")

# Query 2: All Reviews of People for a Product
query_reviews = '''
SELECT 
    product,
    brand,
    review_title,
    review_text,
    review_location,
    verified_buyer,
    review_upvotes,
    review_downvotes
FROM sql_project.ulta_reviews
ORDER BY product;
'''

reviews_df = pd.read_sql(query_reviews, engine)
reviews_df.to_csv("../../Data/Retrieve/ulta_reviews_by_product.csv", index=False)
print("✅ Exported: ulta_reviews_by_product.csv")


✅ Exported: ulta_most_reviewed.csv


✅ Exported: ulta_reviews_by_product.csv
