## Amazon Sales Analysis - Visualizations

Revenue trends and performance visualizations.

In [0]:
# Configuration
CATALOG = "db_ecom_project"
SCHEMA = "amazon_sales_schema"
CLEANED_TABLE = f"{CATALOG}.{SCHEMA}.amazon_sales_cleaned"

print(f"ðŸ“Š Visualizing: {CLEANED_TABLE}")

### 1. Load Data

In [0]:
df = spark.table(CLEANED_TABLE)
print(f"Total records: {df.count():,}")

### 2. Monthly Revenue Trend

In [0]:
from pyspark.sql.functions import concat, lpad, col, sum as spark_sum

monthly_revenue = df.groupBy("Year", "Month") \
    .agg(spark_sum("total_revenue").alias("Total_Revenue")) \
    .withColumn("Period", concat(col("Year"), lpad(col("Month"), 2, "0"))) \
    .orderBy("Year", "Month")

display(monthly_revenue)

### 3. Category Performance

In [0]:
category_revenue = df.groupBy("product_category") \
    .agg(spark_sum("total_revenue").alias("Total_Revenue")) \
    .orderBy(col("Total_Revenue").desc())

display(category_revenue)

### 4. Regional Performance

In [0]:
region_revenue = df.groupBy("customer_region") \
    .agg(spark_sum("total_revenue").alias("Total_Revenue")) \
    .orderBy(col("Total_Revenue").desc())

display(region_revenue)

### 5. Quarterly Trends

In [0]:
quarterly_revenue = df.groupBy("Year", "Quarter") \
    .agg(spark_sum("total_revenue").alias("Total_Revenue")) \
    .orderBy("Year", "Quarter")

display(quarterly_revenue)

### 6. Payment Method Distribution

In [0]:
payment_revenue = df.groupBy("payment_method") \
    .agg(spark_sum("total_revenue").alias("Total_Revenue")) \
    .orderBy(col("Total_Revenue").desc())

display(payment_revenue)