# Sales Performance Report

Track and analyze sales performance metrics

Data source node: Loads sales transactions

In [None]:
# ===== System-managed metadata (auto-generated, understand to edit) =====
# @node_type: data_source
# @node_id: load_sales_data
# @execution_status: validated
# @name: Load Sales Data
# ===== End of system-managed metadata =====

import pandas as pd

# Load sales records from database
sales_data = pd.read_csv('sales.csv')
print(f"Loaded {len(sales_data)} sales records")

In [None]:
# @node_id: load_sales_data
# @result_format: parquet
import pandas as pd
import os

# Load result from parquet
result_path = r'../projects/test_sales_performance_report/parquets/load_sales_data.parquet'
if os.path.exists(result_path):
    load_sales_data = pd.read_parquet(result_path)
    display(load_sales_data)
else:
    print(f"Result file not found: {result_path}")

Data source node: Loads regional sales targets

In [None]:
# ===== System-managed metadata (auto-generated, understand to edit) =====
# @node_type: data_source
# @node_id: load_targets
# @execution_status: validated
# @name: Load Sales Targets
# ===== End of system-managed metadata =====

import pandas as pd

# Load regional sales targets
targets = pd.read_csv('targets.csv')
print(f"Loaded targets for {len(targets)} regions")

In [None]:
# @node_id: load_targets
# @result_format: parquet
import pandas as pd
import os

# Load result from parquet
result_path = r'../projects/test_sales_performance_report/parquets/load_targets.parquet'
if os.path.exists(result_path):
    load_targets = pd.read_parquet(result_path)
    display(load_targets)
else:
    print(f"Result file not found: {result_path}")

Compute node: Aggregates sales by region

In [None]:
# ===== System-managed metadata (auto-generated, understand to edit) =====
# @node_type: compute
# @node_id: process_sales
# @execution_status: validated
# @depends_on: [load_sales_data]
# @name: Process Sales Data
# ===== End of system-managed metadata =====

import pandas as pd

# Process and aggregate sales data
processed_sales = sales_data.groupby('region').agg({
    'sales_amount': 'sum',
    'units_sold': 'sum'
}).reset_index()
print(processed_sales)

In [None]:
# @node_id: process_sales
# @result_format: parquet
import pandas as pd
import os

# Load result from parquet
result_path = r'../projects/test_sales_performance_report/parquets/process_sales.parquet'
if os.path.exists(result_path):
    process_sales = pd.read_parquet(result_path)
    display(process_sales)
else:
    print(f"Result file not found: {result_path}")

Compute node: Calculates performance KPIs

In [None]:
# ===== System-managed metadata (auto-generated, understand to edit) =====
# @node_type: compute
# @node_id: calculate_metrics
# @execution_status: validated
# @depends_on: [process_sales, load_targets]
# @name: Calculate Performance Metrics
# ===== End of system-managed metadata =====

import pandas as pd

# Calculate KPIs and performance metrics
metrics = {
    'total_sales': processed_sales['sales_amount'].sum(),
    'avg_deal_size': processed_sales['sales_amount'].mean(),
    'by_region': processed_sales.to_dict('records')
}
print(metrics)

In [None]:
# @node_id: calculate_metrics
# @result_format: parquet
import pandas as pd
import os

# Load result from parquet
result_path = r'../projects/test_sales_performance_report/parquets/calculate_metrics.parquet'
if os.path.exists(result_path):
    calculate_metrics = pd.read_parquet(result_path)
    display(calculate_metrics)
else:
    print(f"Result file not found: {result_path}")

Chart node: Visualizes sales performance as bar chart

In [None]:
# ===== System-managed metadata (auto-generated, understand to edit) =====
# @node_type: chart
# @node_id: visualize_results
# @execution_status: validated
# @depends_on: [calculate_metrics]
# @name: Visualize Results
# ===== End of system-managed metadata =====

import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')

# Create visualization
fig, ax = plt.subplots(figsize=(10, 6))
regions = ['North', 'South', 'East', 'West']
sales = [800000, 850000, 920000, 980000]
targets = [750000, 900000, 840000, 960000]

x = range(len(regions))
width = 0.35
ax.bar([i - width/2 for i in x], sales, width, label='Actual Sales', color='#3498db')
ax.bar([i + width/2 for i in x], targets, width, label='Target', color='#e74c3c')

ax.set_ylabel('Amount ($)', fontsize=12)
ax.set_title('Sales Performance vs Target by Region', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(regions)
ax.legend()

# 格式化 y 轴为货币
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x/1e6:.1f}M'))

plt.tight_layout()

# 保存图表为 PNG 文件
import os
os.makedirs('visualizations', exist_ok=True)
plt.savefig('visualizations/visualize_results.png', dpi=150, bbox_inches='tight')
print("✓ Chart saved to visualizations/visualize_results.png")

In [None]:
# @node_id: visualize_results
# @result_format: image
from IPython.display import Image, display
import os

# Load and display image
image_path = r'../projects/test_sales_performance_report/visualizations/visualize_results.png'
if os.path.exists(image_path):
    display(Image(filename=image_path))
else:
    print(f"Image file not found: {image_path}")