# Gold Report - Python Visualization

This notebook creates visualizations and reports from the gold table data.

In [None]:
import dlt
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pyspark.sql import SparkSession

# Initialize Spark session
spark = SparkSession.builder.appName("GoldReporting").getOrCreate()

# Set matplotlib style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

In [None]:
# Read data from gold table using DLT
def load_gold_data():
    """Load data from the gold table using DLT"""
    # Read from DLT pipeline (not catalog)
    gold_df = dlt.read("gold_table")
    return gold_df.toPandas()

# Load the data
df_gold = load_gold_data()
print("Gold table data loaded successfully!")
print(f"Number of categories: {len(df_gold)}")
print("\nSample data:")
print(df_gold.head())

In [None]:
# Create Sales by Category Bar Chart
plt.figure(figsize=(12, 8))

# Sales by Category
plt.subplot(2, 2, 1)
plt.bar(df_gold['category'], df_gold['total_sales'], color='skyblue', alpha=0.7)
plt.title('Total Sales by Category', fontsize=14, fontweight='bold')
plt.xlabel('Category')
plt.ylabel('Total Sales ($)')
plt.xticks(rotation=45)
for i, v in enumerate(df_gold['total_sales']):
    plt.text(i, v + max(df_gold['total_sales']) * 0.01, f'${v:,.0f}', 
             ha='center', va='bottom', fontweight='bold')

# Transactions by Category  
plt.subplot(2, 2, 2)
plt.bar(df_gold['category'], df_gold['total_transactions'], color='lightcoral', alpha=0.7)
plt.title('Total Transactions by Category', fontsize=14, fontweight='bold')
plt.xlabel('Category')
plt.ylabel('Number of Transactions')
plt.xticks(rotation=45)
for i, v in enumerate(df_gold['total_transactions']):
    plt.text(i, v + max(df_gold['total_transactions']) * 0.01, str(v), 
             ha='center', va='bottom', fontweight='bold')

# Average Transaction Value
plt.subplot(2, 2, 3)
plt.bar(df_gold['category'], df_gold['avg_transaction_value'], color='lightgreen', alpha=0.7)
plt.title('Average Transaction Value by Category', fontsize=14, fontweight='bold')
plt.xlabel('Category')
plt.ylabel('Average Transaction Value ($)')
plt.xticks(rotation=45)
for i, v in enumerate(df_gold['avg_transaction_value']):
    plt.text(i, v + max(df_gold['avg_transaction_value']) * 0.01, f'${v:.2f}', 
             ha='center', va='bottom', fontweight='bold')

# Unique Customers
plt.subplot(2, 2, 4)
plt.bar(df_gold['category'], df_gold['unique_customers'], color='gold', alpha=0.7)
plt.title('Unique Customers by Category', fontsize=14, fontweight='bold')
plt.xlabel('Category')
plt.ylabel('Number of Unique Customers')
plt.xticks(rotation=45)
for i, v in enumerate(df_gold['unique_customers']):
    plt.text(i, v + max(df_gold['unique_customers']) * 0.01, str(v), 
             ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
# Create a pie chart for sales distribution
plt.figure(figsize=(10, 6))

# Sales Distribution Pie Chart
plt.subplot(1, 2, 1)
plt.pie(df_gold['total_sales'], labels=df_gold['category'], autopct='%1.1f%%', startangle=90)
plt.title('Sales Distribution by Category', fontsize=14, fontweight='bold')

# Quantity Distribution Pie Chart
plt.subplot(1, 2, 2)
plt.pie(df_gold['total_quantity_sold'], labels=df_gold['category'], autopct='%1.1f%%', startangle=90)
plt.title('Quantity Distribution by Category', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
# Print summary report
print("\n" + "="*60)
print("               SALES SUMMARY REPORT")
print("="*60)
print(f"Total Revenue: ${df_gold['total_sales'].sum():,.2f}")
print(f"Total Transactions: {df_gold['total_transactions'].sum():,}")
print(f"Total Items Sold: {df_gold['total_quantity_sold'].sum():,}")
print(f"Average Transaction Value: ${df_gold['avg_transaction_value'].mean():.2f}")
print(f"Total Unique Customers: {df_gold['unique_customers'].sum():,}")
print("\n" + "-"*60)
print("CATEGORY BREAKDOWN:")
print("-"*60)

for _, row in df_gold.iterrows():
    print(f"{row['category']:12} | Sales: ${row['total_sales']:8,.2f} | "
          f"Transactions: {row['total_transactions']:4} | "
          f"Avg Value: ${row['avg_transaction_value']:6.2f}")

print("="*60)
print("Report generated successfully!")