# OCI Services Growth Trends & Sales Planning Analysis

**Objective:** Discover growth trends in OCI service consumption, identify expansion opportunities, and develop data-driven sales strategies for increased service adoption across the tenancy.

**Analysis Date:** December 2025
**Dataset:** output_merged.csv (merged billing and usage data)

---

## üìã Table of Contents
1. Import Required Libraries
2. Load and Explore the Merged Dataset
3. Data Cleaning and Preprocessing
4. Time Series Analysis of Service Consumption
5. Growth Rate Calculations and Trends
6. Service-Level Consumption Patterns
7. Cost Analysis and Revenue Projections
8. Identify High-Growth Services
9. Regional and Compartment Analysis
10. Forecast Future Consumption
11. Generate Sales Recommendations and Insights
12. Key Metrics and Trends Visualization

## 1. Import Required Libraries

In [None]:
# Core data processing
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import json
import warnings
warnings.filterwarnings('ignore')

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.dates import DateFormatter
import matplotlib.dates as mdates

# Statistical and ML libraries
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Configuration
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', '{:.2f}'.format)
pd.set_option('display.max_colwidth', 100)

# Plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['font.size'] = 10

print("‚úÖ All libraries imported successfully")
print(f"üìÖ Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 2. Load and Explore the Merged Dataset

In [None]:
# Load the merged dataset
file = '../output/output_merged.csv'

try:
    df = pd.read_csv(file, low_memory=False)
    print(f"‚úÖ Dataset loaded successfully from {file}")
except Exception as e:
    print(f"‚ùå Error loading dataset: {e}")
    df = None

if df is not None:
    print(f"\nüìä Dataset Overview:")
    print(f"   Shape: {df.shape[0]:,} rows √ó {df.shape[1]} columns")
    print(f"   Memory Usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
    
    print(f"\nüìÖ Time Coverage:")
    print(f"   Date Range: {df['timeUsageStarted'].min()} to {df['timeUsageEnded'].max()}")
    
    print(f"\nüí∞ Financial Summary:")
    print(f"   Total Cost: ${df['computedAmount'].sum():,.2f}")
    print(f"   Average Cost per Row: ${df['computedAmount'].mean():,.2f}")
    
    print(f"\nüìã Key Dimensions:")
    print(f"   Unique Services: {df['service'].nunique()}")
    print(f"   Unique Regions: {df['region'].nunique()}")
    print(f"   Unique Compartments: {df['compartmentName'].nunique()}")
    print(f"   Unique SKUs: {df['skuName'].nunique()}")
    
    print(f"\nüìä Column Data Types:")
    print(df.dtypes)
    
    print(f"\nüìä First Few Rows:")
    df.head()

## 3. Data Cleaning and Preprocessing

In [None]:
# Convert date columns to datetime
df['timeUsageStarted'] = pd.to_datetime(df['timeUsageStarted'])
df['timeUsageEnded'] = pd.to_datetime(df['timeUsageEnded'])

# Create date-based features
df['date'] = df['timeUsageStarted'].dt.date
df['year'] = df['timeUsageStarted'].dt.year
df['month'] = df['timeUsageStarted'].dt.month
df['year_month'] = df['timeUsageStarted'].dt.strftime('%Y-%m')
df['week'] = df['timeUsageStarted'].dt.isocalendar().week
df['day_of_week'] = df['timeUsageStarted'].dt.day_name()
df['day_of_month'] = df['timeUsageStarted'].dt.day
df['quarter'] = df['timeUsageStarted'].dt.quarter

# Fill missing values
df['service'] = df['service'].fillna('Unknown')
df['region'] = df['region'].fillna(df['region_from_call2']).fillna('Unknown')
df['compartmentName'] = df['compartmentName'].fillna(df['compartmentPath'].str.split('/').str[-1]).fillna('Unknown')
df['skuName'] = df['skuName'].fillna('Unknown SKU')
df['computedAmount'] = pd.to_numeric(df['computedAmount'], errors='coerce').fillna(0)
df['computedQuantity'] = pd.to_numeric(df['computedQuantity'], errors='coerce').fillna(0)

# Handle tags - parse JSON if available
def extract_tags(tag_str):
    try:
        if pd.isna(tag_str) or tag_str == '':
            return {}
        tags_list = json.loads(tag_str)
        return {tag['key']: tag['value'] for tag in tags_list if 'key' in tag and 'value' in tag}
    except:
        return {}

df['tags_dict'] = df['tags'].apply(extract_tags)
df['cost_center'] = df['tags_dict'].apply(lambda x: x.get('CostCenter', 'Untagged'))
df['environment'] = df['tags_dict'].apply(lambda x: x.get('Environment', 'Untagged'))
df['team'] = df['tags_dict'].apply(lambda x: x.get('Team', 'Untagged'))

# Remove duplicates if any
initial_rows = len(df)
df = df.drop_duplicates(subset=['timeUsageStarted', 'service', 'region', 'compartmentName', 'skuName', 'resourceId'])
print(f"‚úÖ Removed {initial_rows - len(df):,} duplicate rows")

print(f"‚úÖ Data cleaning completed")
print(f"üìä Final dataset: {len(df):,} rows √ó {df.shape[1]} columns")
print(f"üìä Date range: {df['date'].min()} to {df['date'].max()}")
print(f"üí∞ Total Cost: ${df['computedAmount'].sum():,.2f}")

## 4. Time Series Analysis of Service Consumption

In [None]:
# Daily cost aggregation
daily_costs = df.groupby('date').agg({
    'computedAmount': 'sum',
    'computedQuantity': 'sum',
    'service': 'nunique',
    'region': 'nunique',
    'resourceId': 'count'
}).rename(columns={'resourceId': 'transaction_count'}).reset_index()

daily_costs['date'] = pd.to_datetime(daily_costs['date'])
daily_costs = daily_costs.sort_values('date')

# Weekly cost aggregation
weekly_costs = df.groupby('year_month').agg({
    'computedAmount': 'sum',
    'computedQuantity': 'sum',
    'service': 'nunique'
}).reset_index()

# Monthly cost aggregation
monthly_costs = df.groupby('year_month').agg({
    'computedAmount': 'sum',
    'computedQuantity': 'sum',
    'service': 'nunique',
    'region': 'nunique',
    'compartmentName': 'nunique'
}).reset_index()
monthly_costs.columns = ['year_month', 'total_cost', 'total_quantity', 'num_services', 'num_regions', 'num_compartments']

print(f"‚úÖ Time Series Analysis Completed")
print(f"\nüìä Daily Statistics:")
print(f"   Min Daily Cost: ${daily_costs['computedAmount'].min():,.2f}")
print(f"   Max Daily Cost: ${daily_costs['computedAmount'].max():,.2f}")
print(f"   Avg Daily Cost: ${daily_costs['computedAmount'].mean():,.2f}")
print(f"   Std Dev: ${daily_costs['computedAmount'].std():,.2f}")

print(f"\nüìä Monthly Statistics:")
print(f"   Min Monthly Cost: ${monthly_costs['total_cost'].min():,.2f}")
print(f"   Max Monthly Cost: ${monthly_costs['total_cost'].max():,.2f}")
print(f"   Avg Monthly Cost: ${monthly_costs['total_cost'].mean():,.2f}")

print(f"\nüìä Monthly Breakdown:")
print(monthly_costs.tail(10))

# Create comprehensive time series visualization
print("\nCreating time series visualization...")

fig = plt.figure(figsize=(18, 10))
gs = fig.add_gridspec(3, 2, hspace=0.3, wspace=0.25)

# Main chart: Daily costs with 7-day moving average and trend
ax_main = fig.add_subplot(gs[0:2, :])

# Plot daily costs as area chart
ax_main.fill_between(daily_costs['date'], daily_costs['computedAmount'], 
                      alpha=0.3, color='#2196F3', label='Daily Cost')
ax_main.plot(daily_costs['date'], daily_costs['computedAmount'], 
             color='#1976D2', linewidth=1.5, alpha=0.7)

# Calculate and plot 7-day moving average
if len(daily_costs) >= 7:
    daily_costs['ma_7'] = daily_costs['computedAmount'].rolling(window=7, min_periods=1).mean()
    ax_main.plot(daily_costs['date'], daily_costs['ma_7'], 
                 color='#FF5722', linewidth=2.5, label='7-Day Moving Average', linestyle='-')

# Add trend line
if len(daily_costs) > 1:
    x_numeric = (daily_costs['date'] - daily_costs['date'].min()).dt.days
    z = np.polyfit(x_numeric, daily_costs['computedAmount'], 1)
    p = np.poly1d(z)
    ax_main.plot(daily_costs['date'], p(x_numeric), 
                 color='#4CAF50', linewidth=2, linestyle='--', 
                 label=f'Trend Line ({"‚Üó" if z[0] > 0 else "‚Üò"} ${z[0]:.2f}/day)', alpha=0.8)

# Highlight max and min days
max_day = daily_costs.loc[daily_costs['computedAmount'].idxmax()]
min_day = daily_costs.loc[daily_costs['computedAmount'].idxmin()]

ax_main.scatter([max_day['date']], [max_day['computedAmount']], 
                color='red', s=150, zorder=5, marker='^', 
                label=f'Peak: ${max_day["computedAmount"]:.2f}')
ax_main.scatter([min_day['date']], [min_day['computedAmount']], 
                color='green', s=150, zorder=5, marker='v',
                label=f'Low: ${min_day["computedAmount"]:.2f}')

ax_main.set_xlabel('Date', fontsize=12, fontweight='bold')
ax_main.set_ylabel('Daily Cost ($)', fontsize=12, fontweight='bold')
ax_main.set_title('Daily Cost Trend Analysis with Moving Average', 
                  fontsize=14, fontweight='bold', pad=15)
ax_main.legend(loc='best', fontsize=10, framealpha=0.9)
ax_main.grid(True, alpha=0.3, linestyle='--')
ax_main.tick_params(axis='x', rotation=45)

# Format y-axis as currency
ax_main.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x:,.0f}'))

# Bottom left: Transaction count over time
ax_tx = fig.add_subplot(gs[2, 0])
ax_tx.bar(daily_costs['date'], daily_costs['transaction_count'], 
          color='#9C27B0', alpha=0.7, width=0.8)
ax_tx.set_xlabel('Date', fontsize=10, fontweight='bold')
ax_tx.set_ylabel('Transactions', fontsize=10, fontweight='bold')
ax_tx.set_title('Daily Transaction Volume', fontsize=11, fontweight='bold')
ax_tx.grid(True, alpha=0.3, axis='y')
ax_tx.tick_params(axis='x', rotation=45, labelsize=8)

# Bottom right: Service diversity over time
ax_svc = fig.add_subplot(gs[2, 1])
ax_svc.plot(daily_costs['date'], daily_costs['service'], 
            color='#FF9800', linewidth=2, marker='o', markersize=4, alpha=0.8)
ax_svc.fill_between(daily_costs['date'], daily_costs['service'], 
                     alpha=0.2, color='#FF9800')
ax_svc.set_xlabel('Date', fontsize=10, fontweight='bold')
ax_svc.set_ylabel('Number of Services', fontsize=10, fontweight='bold')
ax_svc.set_title('Service Diversity Over Time', fontsize=11, fontweight='bold')
ax_svc.grid(True, alpha=0.3)
ax_svc.tick_params(axis='x', rotation=45, labelsize=8)

# Add summary statistics as text box
stats_text = f"""Key Metrics:
‚Ä¢ Total Days: {len(daily_costs)}
‚Ä¢ Avg Daily: ${daily_costs['computedAmount'].mean():,.2f}
‚Ä¢ Std Dev: ${daily_costs['computedAmount'].std():,.2f}
‚Ä¢ Trend: {"‚Üó Growing" if z[0] > 0 else "‚Üò Declining"} (${z[0]:.2f}/day)
‚Ä¢ Total: ${daily_costs['computedAmount'].sum():,.2f}"""

ax_main.text(0.02, 0.98, stats_text, transform=ax_main.transAxes,
            fontsize=9, verticalalignment='top', fontfamily='monospace',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))

plt.suptitle('üìä TIME SERIES COST ANALYSIS', fontsize=16, fontweight='bold', y=0.995)

plt.show()

print("‚úÖ Time series visualization created successfully!")

## 5. Growth Rate Calculations and Trends

In [None]:
# Calculate Month-over-Month (MoM) growth
monthly_costs['mom_growth'] = monthly_costs['total_cost'].pct_change() * 100

# Calculate Year-over-Year (YoY) growth (if available)
yoy_data = df.groupby(['year', 'month']).agg({'computedAmount': 'sum'}).reset_index()
yoy_pivot = yoy_data.pivot_table(index='month', columns='year', values='computedAmount')
if yoy_pivot.shape[1] >= 2:
    latest_year = yoy_pivot.columns[-1]
    prev_year = yoy_pivot.columns[-2]
    yoy_growth = ((yoy_pivot[latest_year] - yoy_pivot[prev_year]) / yoy_pivot[prev_year] * 100).fillna(0)
    print(f"‚úÖ Year-over-Year Growth Available: {latest_year} vs {prev_year}")
else:
    print("‚ö†Ô∏è  Insufficient data for Year-over-Year comparison")
    yoy_growth = None

# Linear regression trend analysis
X = np.arange(len(daily_costs)).reshape(-1, 1)
y = daily_costs['computedAmount'].values

model = LinearRegression()
model.fit(X, y)
daily_costs['trend'] = model.predict(X)

slope = model.coef_[0]
daily_growth_rate = (slope / daily_costs['computedAmount'].mean()) * 100

print(f"‚úÖ Growth Rate Calculations Completed")
print(f"\nüìà Daily Trend Analysis:")
print(f"   Slope (daily change): ${slope:,.4f}")
print(f"   Daily Growth Rate: {daily_growth_rate:.3f}% per day")
print(f"   Annualized Growth Rate: {daily_growth_rate * 365:.2f}%")

print(f"\nüìä Month-over-Month Growth:")
print(monthly_costs[['year_month', 'total_cost', 'mom_growth']].tail(12))

# Identify acceleration/deceleration
recent_mom = monthly_costs['mom_growth'].tail(3).mean()
earlier_mom = monthly_costs['mom_growth'].iloc[-12:-3].mean() if len(monthly_costs) > 12 else monthly_costs['mom_growth'].head(3).mean()
acceleration = recent_mom - earlier_mom

print(f"\n‚ö° Growth Momentum:")
print(f"   Recent MoM (last 3 months): {recent_mom:.2f}%")
print(f"   Previous MoM (3 months prior): {earlier_mom:.2f}%")
print(f"   Acceleration: {acceleration:+.2f} percentage points")
if acceleration > 0:
    print(f"   Status: üöÄ ACCELERATING")
elif acceleration < 0:
    print(f"   Status: ‚¨áÔ∏è  DECELERATING")
else:
    print(f"   Status: ‚û°Ô∏è  STABLE")

## 6. Service-Level Consumption Patterns

In [None]:
# Service-level cost breakdown
service_summary = df.groupby('service').agg({
    'computedAmount': ['sum', 'mean', 'count'],
    'computedQuantity': 'sum',
    'resourceId': 'nunique'
}).reset_index()
service_summary.columns = ['service', 'total_cost', 'avg_cost_per_row', 'num_records', 'total_quantity', 'num_resources']
service_summary = service_summary.sort_values('total_cost', ascending=False)
service_summary['market_share'] = (service_summary['total_cost'] / service_summary['total_cost'].sum() * 100).round(2)
service_summary['rank'] = range(1, len(service_summary) + 1)

# Top services
top_services = service_summary.head(10)
print(f"‚úÖ Service-Level Analysis Completed")
print(f"\nüìä Top 10 Services by Cost:")
print(top_services[['rank', 'service', 'total_cost', 'market_share', 'num_resources']])

# Service growth trends
service_trends = df.groupby(['year_month', 'service']).agg({
    'computedAmount': 'sum'
}).reset_index()
service_trends = service_trends.sort_values(['service', 'year_month'])
service_trends['cost_change'] = service_trends.groupby('service')['computedAmount'].pct_change() * 100

# Calculate CAGR for each service (if enough data)
print(f"\nüìà Service Growth Analysis:")
service_cagr = []
for service in df['service'].unique()[:10]:  # Top services
    service_data = service_trends[service_trends['service'] == service].sort_values('year_month')
    if len(service_data) > 1:
        first_cost = service_data.iloc[0]['computedAmount']
        last_cost = service_data.iloc[-1]['computedAmount']
        periods = len(service_data) - 1
        if first_cost > 0 and periods > 0:
            cagr = ((last_cost / first_cost) ** (1 / periods) - 1) * 100
            service_cagr.append({'service': service, 'cagr': cagr, 'current_cost': last_cost})

if service_cagr:
    service_cagr_df = pd.DataFrame(service_cagr).sort_values('cagr', ascending=False)
    print(service_cagr_df.head(10))

# Create simple service consumption visualization
print("\nCreating service consumption visualization...")

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 7))

# Left chart: Top 10 Services by Cost (Horizontal bar chart)
top_10 = service_summary.head(10).copy()
colors_gradient = plt.cm.Blues(np.linspace(0.4, 0.9, len(top_10)))

ax1.barh(range(len(top_10)), top_10['total_cost'], color=colors_gradient)
ax1.set_yticks(range(len(top_10)))
ax1.set_yticklabels([s[:40] + '...' if len(s) > 40 else s for s in top_10['service']], fontsize=10)
ax1.set_xlabel('Total Cost ($)', fontsize=12, fontweight='bold')
ax1.set_title('Top 10 Services by Cost', fontsize=13, fontweight='bold', pad=15)
ax1.grid(True, alpha=0.3, axis='x')
ax1.invert_yaxis()

# Add cost labels and market share
for i, (cost, share) in enumerate(zip(top_10['total_cost'], top_10['market_share'])):
    ax1.text(cost, i, f' ${cost:,.0f} ({share:.1f}%)', 
             va='center', fontsize=9, fontweight='bold')

# Right chart: Market Share Pie Chart (Top 7 + Others)
top_7 = service_summary.head(7)
other_cost = service_summary.iloc[7:]['total_cost'].sum()

pie_data = list(top_7['total_cost']) + [other_cost]
pie_labels = [s[:25] + '...' if len(s) > 25 else s for s in top_7['service']] + ['Others']
colors_pie = plt.cm.Set3(np.linspace(0, 1, len(pie_data)))

wedges, texts, autotexts = ax2.pie(pie_data, labels=pie_labels, autopct='%1.1f%%',
                                     colors=colors_pie, startangle=90,
                                     textprops={'fontsize': 9})

for autotext in autotexts:
    autotext.set_color('white')
    autotext.set_fontweight('bold')
    autotext.set_fontsize(10)

ax2.set_title(f'Service Market Share Distribution\nTotal: ${service_summary["total_cost"].sum():,.0f}', 
              fontsize=13, fontweight='bold', pad=15)

# Add summary stats box
stats_text = f"""Summary:
‚Ä¢ Total Services: {len(service_summary)}
‚Ä¢ Top 10 Share: {top_10['market_share'].sum():.1f}%
‚Ä¢ Avg Cost/Service: ${service_summary['total_cost'].mean():,.0f}
‚Ä¢ Most Resources: {top_10.iloc[0]['service'][:25]}
  ({int(top_10.iloc[0]['num_resources'])} resources)"""

ax2.text(1.45, 0.5, stats_text, transform=ax2.transAxes,
         fontsize=10, verticalalignment='center', fontfamily='monospace',
         bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.3))

plt.suptitle('üì¶ SERVICE-LEVEL CONSUMPTION PATTERNS', fontsize=15, fontweight='bold')
plt.tight_layout()
plt.show()

print("‚úÖ Service consumption visualization created successfully!")

## 7. Cost Analysis and Revenue Projections

In [None]:
# Cost analysis by service category
service_cost_analysis = df.groupby('service').agg({
    'computedAmount': ['sum', 'min', 'max', 'mean', 'std'],
    'computedQuantity': 'sum',
    'resourceId': 'count'
}).round(2)
service_cost_analysis.columns = ['total_cost', 'min_cost', 'max_cost', 'avg_cost', 'std_dev', 'total_quantity', 'transactions']
service_cost_analysis = service_cost_analysis.sort_values('total_cost', ascending=False)

# Calculate cost per resource
service_cost_analysis['cost_per_resource'] = service_cost_analysis['total_cost'] / service_cost_analysis['transactions']

print(f"‚úÖ Cost Analysis Completed")
print(f"\nüí∞ Overall Financial Summary:")
print(f"   Total Spend: ${df['computedAmount'].sum():,.2f}")
print(f"   Average Transaction Size: ${df['computedAmount'].mean():,.2f}")
print(f"   Median Transaction Size: ${df['computedAmount'].median():,.2f}")
print(f"   Max Single Transaction: ${df['computedAmount'].max():,.2f}")
print(f"   Transactions: {len(df):,}")

print(f"\nüí∞ Service Cost Structure:")
print(service_cost_analysis.head(10))

# Revenue projections based on growth rates
current_monthly_cost = monthly_costs.iloc[-1]['total_cost']
print(f"\nüìä Revenue Projections (next 12 months):")
print(f"   Current Monthly Cost: ${current_monthly_cost:,.2f}")

# Conservative, moderate, and aggressive projections
growth_scenarios = [
    ('Conservative (5% MoM)', 0.05),
    ('Moderate (10% MoM)', 0.10),
    ('Aggressive (15% MoM)', 0.15)
]

for scenario_name, growth_rate in growth_scenarios:
    projection = current_monthly_cost
    total_12m = 0
    for month in range(12):
        projection = projection * (1 + growth_rate)
        total_12m += projection
    print(f"\n   {scenario_name}:")
    print(f"      Month 12 Cost: ${projection:,.2f}")
    print(f"      Total 12-Month: ${total_12m:,.2f}")
    print(f"      YoY Cost: ${current_monthly_cost * 12:,.2f}")

# Cost optimization opportunities
print(f"\nüéØ Cost Optimization Opportunities:")
print(f"   Services with high variance (potential optimization): ")
high_variance = service_cost_analysis[service_cost_analysis['std_dev'] > service_cost_analysis['std_dev'].quantile(0.75)].head()
for idx, (service, row) in enumerate(high_variance.iterrows(), 1):
    print(f"      {idx}. {service}: Std Dev ${row['std_dev']:,.2f}")

## 8. Identify High-Growth Services

In [None]:
# Identify high-growth services
growth_analysis = []
for service in df['service'].unique():
    service_data = df[df['service'] == service].copy()
    service_data = service_data.sort_values('timeUsageStarted')
    
    # Calculate growth metrics
    if len(service_data) > 1:
        first_date = service_data['date'].min()
        last_date = service_data['date'].max()
        days_active = (last_date - first_date).days + 1
        
        # Get first and last month costs
        first_month_idx = service_data.groupby('year_month')['computedAmount'].sum().index[0]
        last_month_idx = service_data.groupby('year_month')['computedAmount'].sum().index[-1]
        
        first_month_cost = service_data[service_data['year_month'] == first_month_idx]['computedAmount'].sum()
        last_month_cost = service_data[service_data['year_month'] == last_month_idx]['computedAmount'].sum()
        
        total_cost = service_data['computedAmount'].sum()
        num_resources = service_data['resourceId'].nunique()
        
        # Calculate growth rate
        if first_month_cost > 0:
            # Simple growth rate calculation
            months_active = len(service_data.groupby('year_month'))
            if months_active > 1:
                growth_rate = ((last_month_cost / first_month_cost) ** (1 / (months_active - 1)) - 1) * 100
            else:
                growth_rate = 0
        else:
            growth_rate = 0
        
        growth_analysis.append({
            'service': service,
            'total_cost': total_cost,
            'current_monthly': last_month_cost,
            'first_monthly': first_month_cost,
            'growth_rate': growth_rate,
            'num_resources': num_resources,
            'days_active': days_active,
            'market_share': (total_cost / df['computedAmount'].sum()) * 100
        })

growth_df = pd.DataFrame(growth_analysis).sort_values('growth_rate', ascending=False)

print(f"‚úÖ High-Growth Services Analysis Completed")
print(f"\nüöÄ Top 10 Highest-Growth Services:")
print(growth_df[['service', 'growth_rate', 'current_monthly', 'num_resources', 'market_share']].head(10))

# Categorize services
print(f"\nüìä Service Growth Categories:")
high_growth = growth_df[growth_df['growth_rate'] > growth_df['growth_rate'].quantile(0.75)]
moderate_growth = growth_df[(growth_df['growth_rate'] > growth_df['growth_rate'].quantile(0.25)) & 
                             (growth_df['growth_rate'] <= growth_df['growth_rate'].quantile(0.75))]
low_growth = growth_df[growth_df['growth_rate'] <= growth_df['growth_rate'].quantile(0.25)]

print(f"\nüî• High-Growth Services ({len(high_growth)}):")
if len(high_growth) > 0:
    print(high_growth[['service', 'growth_rate', 'current_monthly']].head(5))

print(f"\n‚ö° Emerging Services (First 2 months of activity):")
emerging = growth_df[growth_df['days_active'] < 60].sort_values('current_monthly', ascending=False)
if len(emerging) > 0:
    print(emerging[['service', 'current_monthly', 'num_resources']].head(5))

print(f"\nüìâ Services Approaching Saturation (Low growth, High cost):")
mature = growth_df[(growth_df['growth_rate'] < 5) & (growth_df['total_cost'] > growth_df['total_cost'].quantile(0.5))]
if len(mature) > 0:
    print(mature[['service', 'growth_rate', 'current_monthly', 'market_share']].head(5))

# Create high-growth services visualization
print("\nCreating high-growth services visualization...")

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 7))

# Left chart: Top 15 High-Growth Services (sorted by growth rate)
top_growth = growth_df.head(15).copy()

# Create color gradient based on growth rate
colors_growth = ['#2E7D32' if x > 0 else '#D32F2F' for x in top_growth['growth_rate']]

y_pos = np.arange(len(top_growth))
ax1.barh(y_pos, top_growth['growth_rate'], color=colors_growth, alpha=0.8)
ax1.set_yticks(y_pos)
ax1.set_yticklabels([s[:35] + '...' if len(s) > 35 else s for s in top_growth['service']], fontsize=9)
ax1.set_xlabel('Growth Rate (%)', fontsize=12, fontweight='bold')
ax1.set_title('Top 15 Services by Growth Rate', fontsize=13, fontweight='bold', pad=15)
ax1.axvline(x=0, color='black', linewidth=0.8, linestyle='-', alpha=0.5)
ax1.grid(True, alpha=0.3, axis='x')
ax1.invert_yaxis()

# Add growth rate labels
for i, (rate, cost) in enumerate(zip(top_growth['growth_rate'], top_growth['current_monthly'])):
    label = f'{rate:+.1f}% (${cost:,.0f}/mo)'
    x_pos = rate + (5 if rate > 0 else -5)
    ha = 'left' if rate > 0 else 'right'
    ax1.text(x_pos, i, label, va='center', fontsize=8, fontweight='bold')

# Right chart: Growth vs Market Share Bubble Chart
ax2.scatter(growth_df['growth_rate'], growth_df['market_share'], 
           s=growth_df['num_resources']*2, alpha=0.6, 
           c=growth_df['current_monthly'], cmap='viridis', edgecolors='black', linewidth=0.5)

# Add labels for top services
top_to_label = growth_df.nlargest(8, 'market_share')
for _, row in top_to_label.iterrows():
    ax2.annotate(row['service'][:20], 
                xy=(row['growth_rate'], row['market_share']),
                xytext=(5, 5), textcoords='offset points',
                fontsize=8, alpha=0.8, 
                bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.3))

ax2.set_xlabel('Growth Rate (%)', fontsize=12, fontweight='bold')
ax2.set_ylabel('Market Share (%)', fontsize=12, fontweight='bold')
ax2.set_title('Growth vs Market Share Analysis\n(Bubble size = # Resources)', 
              fontsize=13, fontweight='bold', pad=15)
ax2.axvline(x=0, color='red', linewidth=1, linestyle='--', alpha=0.5, label='Zero Growth')
ax2.axhline(y=5, color='green', linewidth=1, linestyle='--', alpha=0.5, label='5% Market Share')
ax2.grid(True, alpha=0.3)
ax2.legend(loc='upper right', fontsize=9)

# Add colorbar
cbar = plt.colorbar(ax2.collections[0], ax=ax2)
cbar.set_label('Monthly Cost ($)', rotation=270, labelpad=20, fontsize=10, fontweight='bold')

# Add summary stats
stats_text = f"""Growth Categories:
‚Ä¢ High Growth (>Q3): {len(high_growth)} services
‚Ä¢ Moderate Growth: {len(moderate_growth)} services
‚Ä¢ Low Growth (<Q1): {len(low_growth)} services
‚Ä¢ Emerging (<60d): {len(emerging)} services
‚Ä¢ Mature (low Œî, high $): {len(mature)} services

Avg Growth Rate: {growth_df['growth_rate'].mean():.1f}%
Median Growth: {growth_df['growth_rate'].median():.1f}%"""

ax2.text(1.35, 0.5, stats_text, transform=ax2.transAxes,
         fontsize=9, verticalalignment='center', fontfamily='monospace',
         bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.3))

plt.suptitle('üöÄ HIGH-GROWTH SERVICES ANALYSIS', fontsize=15, fontweight='bold')
plt.tight_layout()
plt.show()

print("‚úÖ High-growth services visualization created successfully!")

## 9. Regional and Compartment Analysis

In [None]:
# Regional analysis
regional_analysis = df.groupby('region').agg({
    'computedAmount': ['sum', 'mean'],
    'resourceId': 'nunique',
    'compartmentName': 'nunique',
    'service': 'nunique'
}).reset_index()
regional_analysis.columns = ['region', 'total_cost', 'avg_cost', 'num_resources', 'num_compartments', 'num_services']
regional_analysis = regional_analysis.sort_values('total_cost', ascending=False)
regional_analysis['market_share'] = (regional_analysis['total_cost'] / regional_analysis['total_cost'].sum() * 100).round(2)

# Regional growth trends
regional_trends = df.groupby(['year_month', 'region'])['computedAmount'].sum().reset_index()
regional_trends = regional_trends.sort_values(['region', 'year_month'])

# Compartment analysis
compartment_analysis = df.groupby('compartmentName').agg({
    'computedAmount': ['sum', 'mean', 'count'],
    'service': 'nunique',
    'region': 'nunique',
    'resourceId': 'nunique'
}).reset_index()
compartment_analysis.columns = ['compartment', 'total_cost', 'avg_cost', 'num_records', 'num_services', 'num_regions', 'num_resources']
compartment_analysis = compartment_analysis.sort_values('total_cost', ascending=False)
compartment_analysis['market_share'] = (compartment_analysis['total_cost'] / compartment_analysis['total_cost'].sum() * 100).round(2)

print(f"‚úÖ Regional and Compartment Analysis Completed")
print(f"\nüåç Regional Cost Distribution:")
print(regional_analysis[['region', 'total_cost', 'market_share', 'num_services', 'num_resources']].head(10))

print(f"\nüìä Regional Growth Hotspots:")
regional_growth = []
for region in df['region'].unique():
    region_data = regional_trends[regional_trends['region'] == region].sort_values('year_month')
    if len(region_data) > 1:
        first = region_data.iloc[0]['computedAmount']
        last = region_data.iloc[-1]['computedAmount']
        if first > 0:
            growth = ((last / first) - 1) * 100
            regional_growth.append({'region': region, 'growth_rate': growth, 'current_cost': last, 'num_months': len(region_data)})

if regional_growth:
    regional_growth_df = pd.DataFrame(regional_growth).sort_values('growth_rate', ascending=False)
    print(regional_growth_df.head(10))
else:
    print("   No multi-month data available for regional growth analysis")

print(f"\nüè¢ Top Compartments by Cost:")
print(compartment_analysis[['compartment', 'total_cost', 'market_share', 'num_services']].head(10))

print(f"\nüéØ Untapped Markets (Regions with low service adoption):")
low_adoption = regional_analysis[regional_analysis['num_services'] < regional_analysis['num_services'].median()].sort_values('total_cost')
if len(low_adoption) > 0:
    print(low_adoption[['region', 'total_cost', 'num_services', 'num_resources']].head(10))

## 10. Forecast Future Consumption

In [None]:
# Time series forecasting with comprehensive visualizations and decision insights
print(f"‚úÖ Starting Time Series Forecast Analysis")
print("="*80)

# Prepare data for forecasting
monthly_costs_ts = monthly_costs.set_index('year_month')['total_cost']
current_monthly = monthly_costs_ts.iloc[-1]

# Build multiple forecast models
forecast_results = {}
forecast_6m = {}
forecast_12m = {}

# 1. Linear Regression Forecast (Trend-based)
try:
    X_train = np.arange(len(monthly_costs_ts)).reshape(-1, 1)
    lr_model = LinearRegression()
    lr_model.fit(X_train, monthly_costs_ts.values)
    
    # 6-month forecast
    X_forecast_6m = np.arange(len(monthly_costs_ts), len(monthly_costs_ts) + 6).reshape(-1, 1)
    forecast_lr_6m = lr_model.predict(X_forecast_6m)
    forecast_6m['Linear Trend'] = forecast_lr_6m
    
    # 12-month forecast
    X_forecast_12m = np.arange(len(monthly_costs_ts), len(monthly_costs_ts) + 12).reshape(-1, 1)
    forecast_lr_12m = lr_model.predict(X_forecast_12m)
    forecast_12m['Linear Trend'] = forecast_lr_12m
    
    print("‚úÖ Linear Regression model trained successfully")
except Exception as e:
    print(f"‚ö†Ô∏è  Linear Regression failed: {e}")

# 2. Exponential Smoothing (handles trends)
if len(monthly_costs_ts) > 3:
    try:
        model_exp = ExponentialSmoothing(monthly_costs_ts, trend='add', seasonal=None)
        fitted_exp = model_exp.fit()
        forecast_6m['Exponential Smoothing'] = fitted_exp.forecast(steps=6)
        forecast_12m['Exponential Smoothing'] = fitted_exp.forecast(steps=12)
        print("‚úÖ Exponential Smoothing model trained successfully")
    except Exception as e:
        print(f"‚ö†Ô∏è  Exponential Smoothing failed: {e}")

# 3. Moving Average Forecast (Conservative)
try:
    ma_window = min(3, len(monthly_costs_ts))
    ma_value = monthly_costs_ts.tail(ma_window).mean()
    forecast_6m['Moving Average'] = np.array([ma_value] * 6)
    forecast_12m['Moving Average'] = np.array([ma_value] * 12)
    print("‚úÖ Moving Average forecast calculated")
except Exception as e:
    print(f"‚ö†Ô∏è  Moving Average failed: {e}")

# 4. Growth Rate Scenarios (Business Planning)
try:
    # Calculate recent growth trend
    recent_growth = monthly_costs_ts.pct_change().tail(3).mean()
    
    # Conservative: 50% of recent growth
    conservative_rate = recent_growth * 0.5
    conservative_6m = [current_monthly * (1 + conservative_rate) ** i for i in range(1, 7)]
    conservative_12m = [current_monthly * (1 + conservative_rate) ** i for i in range(1, 13)]
    forecast_6m['Conservative Growth'] = np.array(conservative_6m)
    forecast_12m['Conservative Growth'] = np.array(conservative_12m)
    
    # Aggressive: 150% of recent growth
    aggressive_rate = recent_growth * 1.5
    aggressive_6m = [current_monthly * (1 + aggressive_rate) ** i for i in range(1, 7)]
    aggressive_12m = [current_monthly * (1 + aggressive_rate) ** i for i in range(1, 13)]
    forecast_6m['Aggressive Growth'] = np.array(aggressive_6m)
    forecast_12m['Aggressive Growth'] = np.array(aggressive_12m)
    
    print(f"‚úÖ Growth scenarios calculated (Recent MoM: {recent_growth*100:+.2f}%)")
except Exception as e:
    print(f"‚ö†Ô∏è  Growth scenarios failed: {e}")

# Calculate forecast summary statistics
print(f"\nüìä FORECAST SUMMARY (6-Month Horizon):")
print("-"*80)
forecast_summary = []
for model_name, values in forecast_6m.items():
    avg_monthly = values.mean()
    total_6m = values.sum()
    month1 = values[0]
    month6 = values[-1]
    growth_vs_current = ((avg_monthly / current_monthly) - 1) * 100
    
    forecast_summary.append({
        'Model': model_name,
        'Avg Monthly': avg_monthly,
        'Total 6M': total_6m,
        'Month 1': month1,
        'Month 6': month6,
        'Growth %': growth_vs_current
    })
    
    print(f"\n{model_name}:")
    print(f"   Avg Monthly: ${avg_monthly:,.2f} ({growth_vs_current:+.1f}% vs current)")
    print(f"   Total 6M: ${total_6m:,.2f}")
    print(f"   Month 1: ${month1:,.2f} ‚Üí Month 6: ${month6:,.2f}")

forecast_summary_df = pd.DataFrame(forecast_summary)

# Ensemble forecast (average of all models)
if forecast_6m:
    ensemble_6m = np.mean([v for v in forecast_6m.values()], axis=0)
    ensemble_12m = np.mean([v for v in forecast_12m.values()], axis=0)
    
    print(f"\nüéØ ENSEMBLE FORECAST (Average of All Models):")
    print(f"   6-Month Avg: ${ensemble_6m.mean():,.2f} (Total: ${ensemble_6m.sum():,.2f})")
    print(f"   12-Month Avg: ${ensemble_12m.mean():,.2f} (Total: ${ensemble_12m.sum():,.2f})")
    print(f"   Confidence: {'High' if len(forecast_6m) >= 3 else 'Medium' if len(forecast_6m) == 2 else 'Low'}")

# Service-level forecasts for top services
print(f"\n\nüìà TOP 5 SERVICES - 6-Month Forecast:")
print("-"*80)
top_services_for_forecast = service_summary.head(5)['service'].tolist()
service_forecasts = []

for service in top_services_for_forecast:
    service_monthly = df[df['service'] == service].groupby('year_month')['computedAmount'].sum()
    if len(service_monthly) > 2:
        try:
            # Simple linear forecast for each service
            X_svc = np.arange(len(service_monthly)).reshape(-1, 1)
            y_svc = service_monthly.values
            svc_model = LinearRegression()
            svc_model.fit(X_svc, y_svc)
            
            X_svc_forecast = np.arange(len(service_monthly), len(service_monthly) + 6).reshape(-1, 1)
            svc_forecast = svc_model.predict(X_svc_forecast)
            
            current_svc = service_monthly.iloc[-1]
            forecast_avg = svc_forecast.mean()
            growth_pct = ((forecast_avg / current_svc) - 1) * 100
            
            service_forecasts.append({
                'service': service,
                'current': current_svc,
                'forecast_avg': forecast_avg,
                'forecast_6m': svc_forecast,
                'growth': growth_pct
            })
            
            print(f"\n{service[:50]}:")
            print(f"   Current: ${current_svc:,.2f}/mo ‚Üí Forecast: ${forecast_avg:,.2f}/mo ({growth_pct:+.1f}%)")
            print(f"   6-Month Total: ${svc_forecast.sum():,.2f}")
        except Exception as e:
            pass

# CREATE COMPREHENSIVE FORECAST VISUALIZATION
print("\n\nüìä Creating forecast visualization dashboard...")

fig = plt.figure(figsize=(20, 12))
gs = plt.GridSpec(3, 3, figure=fig, hspace=0.35, wspace=0.3)

# 1. Historical + Forecast Trend (Main Chart - Top Span)
ax1 = fig.add_subplot(gs[0, :])
months_historical = range(len(monthly_costs_ts))
months_forecast = range(len(monthly_costs_ts), len(monthly_costs_ts) + 12)

# Plot historical data
ax1.plot(months_historical, monthly_costs_ts.values, 'o-', color='navy', linewidth=2.5, 
         markersize=8, label='Historical Actual', zorder=5)

# Plot forecast models
colors_forecast = ['green', 'orange', 'red', 'purple', 'brown']
for idx, (model_name, values) in enumerate(forecast_12m.items()):
    ax1.plot(months_forecast, values, '--', color=colors_forecast[idx % len(colors_forecast)], 
             linewidth=2, alpha=0.7, label=f'{model_name} Forecast', marker='s', markersize=5)

# Plot ensemble
if forecast_12m:
    ax1.plot(months_forecast, ensemble_12m, color='black', linewidth=3, 
             alpha=0.8, label='Ensemble Forecast', marker='D', markersize=6, zorder=6)

ax1.axvline(x=len(monthly_costs_ts)-0.5, color='red', linestyle=':', linewidth=2, alpha=0.6)
ax1.text(len(monthly_costs_ts)-0.5, ax1.get_ylim()[1]*0.95, '‚Üê Historical | Forecast ‚Üí', 
         ha='center', fontsize=11, fontweight='bold', bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.5))

ax1.set_xlabel('Month Index', fontsize=12, fontweight='bold')
ax1.set_ylabel('Total Cost ($)', fontsize=12, fontweight='bold')
ax1.set_title('12-Month Consumption Forecast with Multiple Models', fontsize=14, fontweight='bold', pad=15)
ax1.legend(loc='upper left', fontsize=9, framealpha=0.9)
ax1.grid(True, alpha=0.3)

# 2. Forecast Comparison (Box Plot)
ax2 = fig.add_subplot(gs[1, 0])
if forecast_6m:
    box_data = [v for v in forecast_6m.values()]
    box_labels = [k[:15] for k in forecast_6m.keys()]
    bp = ax2.boxplot(box_data, labels=box_labels, patch_artist=True, vert=True)
    
    for patch, color in zip(bp['boxes'], colors_forecast):
        patch.set_facecolor(color)
        patch.set_alpha(0.6)
    
    ax2.axhline(y=current_monthly, color='red', linestyle='--', linewidth=2, label='Current Monthly')
    ax2.set_ylabel('Monthly Cost ($)', fontsize=11, fontweight='bold')
    ax2.set_title('6M Forecast Distribution by Model', fontsize=12, fontweight='bold')
    ax2.tick_params(axis='x', rotation=45, labelsize=9)
    ax2.legend(fontsize=9)
    ax2.grid(True, alpha=0.3, axis='y')

# 3. Scenario Planning (Conservative vs Aggressive)
ax3 = fig.add_subplot(gs[1, 1])
scenarios = ['Current', 'Conservative', 'Ensemble', 'Aggressive']
scenario_values = [
    current_monthly * 6,
    forecast_6m.get('Conservative Growth', [current_monthly]*6).sum() if 'Conservative Growth' in forecast_6m else current_monthly * 6,
    ensemble_6m.sum() if forecast_6m else current_monthly * 6,
    forecast_6m.get('Aggressive Growth', [current_monthly]*6).sum() if 'Aggressive Growth' in forecast_6m else current_monthly * 6
]
scenario_colors = ['gray', 'green', 'blue', 'red']

bars_scenario = ax3.bar(scenarios, scenario_values, color=scenario_colors, alpha=0.7, edgecolor='black', linewidth=1.5)
ax3.set_ylabel('Total 6-Month Cost ($)', fontsize=11, fontweight='bold')
ax3.set_title('Scenario Planning: 6-Month Total Cost', fontsize=12, fontweight='bold')
ax3.tick_params(axis='x', rotation=20)
ax3.grid(True, alpha=0.3, axis='y')

# Add value labels
for bar, val in zip(bars_scenario, scenario_values):
    height = bar.get_height()
    ax3.text(bar.get_x() + bar.get_width()/2., height,
            f'${val:,.0f}',
            ha='center', va='bottom', fontsize=10, fontweight='bold')

# 4. Top Services Forecast
ax4 = fig.add_subplot(gs[1, 2])
if service_forecasts:
    svc_names = [s['service'][:20] + '...' if len(s['service']) > 20 else s['service'] for s in service_forecasts]
    svc_current = [s['current'] for s in service_forecasts]
    svc_forecast = [s['forecast_avg'] for s in service_forecasts]
    
    x_svc = np.arange(len(svc_names))
    width = 0.35
    
    bars1_svc = ax4.barh(x_svc - width/2, svc_current, width, label='Current', color='lightblue', edgecolor='black')
    bars2_svc = ax4.barh(x_svc + width/2, svc_forecast, width, label='Forecast Avg', color='orange', edgecolor='black')
    
    ax4.set_yticks(x_svc)
    ax4.set_yticklabels(svc_names, fontsize=9)
    ax4.set_xlabel('Monthly Cost ($)', fontsize=11, fontweight='bold')
    ax4.set_title('Service-Level Forecast', fontsize=12, fontweight='bold')
    ax4.legend(fontsize=9)
    ax4.grid(True, alpha=0.3, axis='x')

# 5. Monthly Progression (Stacked Forecast)
ax5 = fig.add_subplot(gs[2, :2])
if service_forecasts and len(service_forecasts) >= 3:
    months_labels = [f'M{i+1}' for i in range(6)]
    
    # Create stacked area for top 3 services
    svc_stack_data = []
    svc_stack_labels = []
    for svc in service_forecasts[:3]:
        svc_stack_data.append(svc['forecast_6m'])
        svc_stack_labels.append(svc['service'][:25])
    
    svc_stack_data = np.array(svc_stack_data)
    
    ax5.stackplot(range(6), *svc_stack_data, labels=svc_stack_labels, alpha=0.7, 
                  colors=['#FF6B6B', '#4ECDC4', '#45B7D1'])
    
    ax5.set_xticks(range(6))
    ax5.set_xticklabels(months_labels)
    ax5.set_ylabel('Monthly Cost ($)', fontsize=11, fontweight='bold')
    ax5.set_xlabel('Forecast Month', fontsize=11, fontweight='bold')
    ax5.set_title('Top 3 Services - Monthly Progression', fontsize=12, fontweight='bold')
    ax5.legend(loc='upper left', fontsize=9)
    ax5.grid(True, alpha=0.3, axis='y')

# 6. Decision Dashboard (Text Summary)
ax6 = fig.add_subplot(gs[2, 2])
ax6.axis('off')

# Calculate key metrics for decision-making
if forecast_6m:
    ensemble_growth = ((ensemble_6m.mean() / current_monthly) - 1) * 100
    ensemble_12m_total = ensemble_12m.sum()
    confidence = 'HIGH' if len(forecast_6m) >= 4 else 'MEDIUM' if len(forecast_6m) >= 2 else 'LOW'
    
    # Determine budget recommendation
    if ensemble_growth > 10:
        budget_action = "‚¨ÜÔ∏è  INCREASE BUDGET"
        budget_detail = f"Expect +{ensemble_growth:.1f}% growth"
    elif ensemble_growth < -10:
        budget_action = "‚¨áÔ∏è  DECREASE BUDGET"
        budget_detail = f"Expect {ensemble_growth:.1f}% decline"
    else:
        budget_action = "‚û°Ô∏è  MAINTAIN BUDGET"
        budget_detail = f"Stable growth ({ensemble_growth:+.1f}%)"
    
    # Risk assessment
    forecast_variance = np.std([v.mean() for v in forecast_6m.values()])
    if forecast_variance > current_monthly * 0.2:
        risk_level = "üî¥ HIGH VARIANCE"
        risk_action = "Multiple scenarios advised"
    elif forecast_variance > current_monthly * 0.1:
        risk_level = "üü° MODERATE VARIANCE"
        risk_action = "Monitor closely"
    else:
        risk_level = "üü¢ LOW VARIANCE"
        risk_action = "Predictable trajectory"
    
    decision_text = f"""
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë  DECISION SUPPORT DASHBOARD       ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù

üìä FORECAST CONFIDENCE: {confidence}

üí∞ BUDGET PLANNING:
   {budget_action}
   {budget_detail}
   
   6M Budget: ${ensemble_6m.sum():,.0f}
   12M Budget: ${ensemble_12m_total:,.0f}

üìà GROWTH TRAJECTORY:
   Current: ${current_monthly:,.0f}/mo
   6M Avg: ${ensemble_6m.mean():,.0f}/mo
   12M Avg: ${ensemble_12m.mean():,.0f}/mo
   
‚ö†Ô∏è  RISK ASSESSMENT:
   {risk_level}
   Variance: ${forecast_variance:,.0f}
   Action: {risk_action}

üéØ KEY RECOMMENDATIONS:
   1. Budget for {ensemble_growth:+.1f}% change
   2. Review quarterly milestones
   3. Monitor top 5 services
   4. Plan capacity adjustments
   5. {"Prepare for scale-up" if ensemble_growth > 0 else "Optimize for efficiency"}
"""
else:
    decision_text = """
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë  DECISION SUPPORT DASHBOARD       ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù

‚ö†Ô∏è  INSUFFICIENT DATA

Please ensure sufficient historical
data (minimum 3 months) for 
accurate forecasting.
"""

ax6.text(0.05, 0.95, decision_text, transform=ax6.transAxes,
         fontsize=9, verticalalignment='top', fontfamily='monospace',
         bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.4, edgecolor='darkgreen', linewidth=2))

plt.suptitle('üîÆ FUTURE CONSUMPTION FORECAST & DECISION ANALYSIS', 
             fontsize=16, fontweight='bold', y=0.98)

plt.show()
print("‚úÖ Forecast visualization dashboard created successfully!")

# Export forecast data
print(f"\nüìÅ Exporting forecast data...")
if forecast_summary_df is not None and len(forecast_summary_df) > 0:
    forecast_export_path = '../output/forecast_analysis.csv'
    forecast_summary_df.to_csv(forecast_export_path, index=False)
    print(f"   Saved: {forecast_export_path}")
    
print("\n" + "="*80)
print("‚úÖ FORECAST ANALYSIS COMPLETE - Ready for Strategic Planning")
print("="*80)

## 11. Generate Sales Recommendations and Insights

In [None]:
# Generate comprehensive sales recommendations
print("=" * 80)
print("STRATEGIC SALES RECOMMENDATIONS & ACTION ITEMS")
print("=" * 80)

# 1. Upsell Opportunities
print("\nüîº UPSELL OPPORTUNITIES")
print("-" * 80)

print("\n1. Services Ready for Expansion:")
high_growth_services = growth_df[growth_df['growth_rate'] > 20].sort_values('current_monthly', ascending=False)
if len(high_growth_services) > 0:
    for idx, (_, service) in enumerate(high_growth_services.head(5).iterrows(), 1):
        print(f"\n   {idx}. {service['service']}")
        print(f"      Current Monthly: ${service['current_monthly']:,.2f}")
        print(f"      Growth Rate: {service['growth_rate']:.1f}%")
        print(f"      Resources: {int(service['num_resources'])}")
        print(f"      Action: Offer advanced features, consulting, or managed services")

# 2. Cross-sell Opportunities
print("\n\n‚ùå CROSS-SELL OPPORTUNITIES")
print("-" * 80)
print("\n1. Services Adoption Gaps by Region:")

for region in regional_analysis['region'].head(5).values:
    region_services = df[df['region'] == region]['service'].nunique()
    max_services = df['service'].nunique()
    adoption_rate = (region_services / max_services) * 100
    
    if adoption_rate < 70:
        print(f"\n   {region}:")
        print(f"      Service Adoption: {adoption_rate:.1f}% ({region_services}/{max_services})")
        
        # Find services in other regions not in this region
        all_services = set(df['service'].unique())
        region_services_set = set(df[df['region'] == region]['service'].unique())
        missing_services = all_services - region_services_set
        
        if missing_services:
            print(f"      Missing Services: {', '.join(list(missing_services)[:3])}")
            print(f"      Action: Target with service bundle offers")

# 3. Account Expansion
print("\n\nüìà ACCOUNT EXPANSION (Compartment-level Analysis)")
print("-" * 80)

# Find compartments with growth potential
compartment_growth = []
for comp in compartment_analysis['compartment'].head(20).values:
    comp_data = df[df['compartmentName'] == comp]
    comp_services = comp_data['service'].nunique()
    comp_cost = comp_data['computedAmount'].sum()
    services_potential = df['service'].nunique() - comp_services
    
    if services_potential > 3:
        compartment_growth.append({
            'compartment': comp,
            'current_cost': comp_cost,
            'service_count': comp_services,
            'expansion_potential': services_potential
        })

if compartment_growth:
    comp_growth_df = pd.DataFrame(compartment_growth).sort_values('current_cost', ascending=False)
    print("\nCompartments with High Expansion Potential:")
    for idx, (_, comp) in enumerate(comp_growth_df.head(5).iterrows(), 1):
        print(f"\n   {idx}. {comp['compartment']}")
        print(f"      Current Cost: ${comp['current_cost']:,.2f}")
        print(f"      Services Used: {int(comp['service_count'])}")
        print(f"      Services to Upsell: {int(comp['expansion_potential'])}")

# 4. New Market Opportunities
print("\n\nüåç NEW MARKET OPPORTUNITIES (Geographic Expansion)")
print("-" * 80)
print("\nRegions with Growth Potential:")

for idx, (_, region) in enumerate(low_adoption.head(5).iterrows(), 1):
    print(f"\n   {idx}. {region['region']}")
    print(f"      Current Spend: ${region['total_cost']:,.2f}")
    print(f"      Services Available: {int(region['num_services'])}")
    print(f"      Growth Potential: High (currently underutilized)")
    print(f"      Action: Targeted sales campaign for region-specific requirements")

# 5. Product Bundle Recommendations
print("\n\nüì¶ RECOMMENDED SERVICE BUNDLES")
print("-" * 80)

# Find services often used together
service_pairs = {}
for comp in df['compartmentName'].unique():
    comp_services = df[df['compartmentName'] == comp]['service'].unique()
    for i, svc1 in enumerate(comp_services):
        for svc2 in comp_services[i+1:]:
            pair = tuple(sorted([svc1, svc2]))
            service_pairs[pair] = service_pairs.get(pair, 0) + 1

if service_pairs:
    common_pairs = sorted(service_pairs.items(), key=lambda x: x[1], reverse=True)[:5]
    print("\nMost Common Service Combinations:")
    for idx, (pair, count) in enumerate(common_pairs, 1):
        print(f"   {idx}. {pair[0]} + {pair[1]} (used together in {count} compartments)")

# 6. Retention Focus
print("\n\n‚ö†Ô∏è  RETENTION FOCUS - Services at Risk")
print("-" * 80)

declining_services = growth_df[growth_df['growth_rate'] < -10].sort_values('current_monthly', ascending=False)
if len(declining_services) > 0:
    print("\nServices with Declining Usage (Potential Churn Risk):")
    for idx, (_, service) in enumerate(declining_services.head(5).iterrows(), 1):
        print(f"\n   {idx}. {service['service']}")
        print(f"      Current Monthly Cost: ${service['current_monthly']:,.2f}")
        print(f"      Decline Rate: {service['growth_rate']:.1f}%")
        print(f"      Action: Proactive support, optimization, feature showcases")
else:
    print("\n‚úÖ All tracked services showing stable or positive growth - low churn risk")

## 12. Sales Intelligence Dashboard - What to Sell & Where

In [None]:
# SALES INTELLIGENCE DASHBOARD - Actionable Insights for Revenue Growth
print("="*80)
print("üéØ SALES INTELLIGENCE DASHBOARD - WHAT TO SELL & WHERE")
print("="*80)

# ========================================
# 1. REVENUE CONCENTRATION ANALYSIS
# ========================================
print("\nüí∞ REVENUE CONCENTRATION ANALYSIS:")
print("-"*80)

# Calculate 80/20 rule - which services drive 80% of revenue
service_revenue_sorted = service_summary.sort_values('total_cost', ascending=False).copy()
service_revenue_sorted['cumulative_pct'] = (service_revenue_sorted['total_cost'].cumsum() / 
                                             service_revenue_sorted['total_cost'].sum() * 100)
services_for_80pct = len(service_revenue_sorted[service_revenue_sorted['cumulative_pct'] <= 80])

print(f"\nüéØ PARETO PRINCIPLE (80/20 Rule):")
print(f"   ‚Ä¢ {services_for_80pct} services generate 80% of revenue (${service_revenue_sorted.head(services_for_80pct)['total_cost'].sum():,.0f})")
print(f"   ‚Ä¢ {len(service_summary) - services_for_80pct} services generate 20% of revenue")
print(f"   ‚Üí DECISION: Focus retention efforts on top {services_for_80pct} services")

# Identify high-value services with expansion potential
high_value_services = service_revenue_sorted.head(10).copy()
high_value_with_penetration = []

for _, svc in high_value_services.iterrows():
    svc_name = svc['service']
    # Calculate penetration (% of compartments using this service)
    comps_using = len(df[df['service'] == svc_name]['compartmentName'].unique())
    total_comps = len(df['compartmentName'].unique())
    penetration = (comps_using / total_comps) * 100
    
    high_value_with_penetration.append({
        'service': svc_name,
        'revenue': svc['total_cost'],
        'market_share': svc['market_share'],
        'penetration': penetration,
        'comps_using': comps_using,
        'untapped_comps': total_comps - comps_using
    })

hvwp_df = pd.DataFrame(high_value_with_penetration)
print(f"\nüíé HIGH-VALUE SERVICES - EXPANSION POTENTIAL:")
print(hvwp_df[['service', 'revenue', 'penetration', 'untapped_comps']].head(5))

# ========================================
# 2. REGIONAL SALES OPPORTUNITIES
# ========================================
print("\n\nüåç REGIONAL SALES OPPORTUNITIES:")
print("-"*80)

# Calculate regional opportunity scores
regional_opportunities = []
for _, region_data in regional_analysis.head(10).iterrows():
    region_name = region_data['region']
    region_cost = region_data['total_cost']
    region_services = region_data['num_services']
    
    # Services available but not used in this region
    services_in_region = set(df[df['region'] == region_name]['service'].unique())
    all_services = set(df['service'].unique())
    missing_services = all_services - services_in_region
    
    # Calculate potential based on other regions' usage
    potential = len(missing_services) / len(all_services) * 100
    
    regional_opportunities.append({
        'region': region_name,
        'current_revenue': region_cost,
        'active_services': region_services,
        'missing_services': len(missing_services),
        'expansion_potential_pct': potential,
        'opportunity_score': potential * (region_cost / 100000)  # Weighted by current spend
    })

reg_opp_df = pd.DataFrame(regional_opportunities).sort_values('opportunity_score', ascending=False)

print(f"\nüéØ TOP 5 REGIONS FOR SERVICE EXPANSION:")
for idx, row in reg_opp_df.head(5).iterrows():
    print(f"\n{idx+1}. {row['region']}")
    print(f"   Current Revenue: ${row['current_revenue']:,.0f}")
    print(f"   Active Services: {row['active_services']}")
    print(f"   Missing Services: {row['missing_services']} ({row['expansion_potential_pct']:.1f}% untapped)")
    print(f"   ‚Üí ACTION: Introduce {min(5, row['missing_services'])} new services to this region")

# ========================================
# 3. CUSTOMER WALLET SHARE ANALYSIS
# ========================================
print("\n\nüíº CUSTOMER WALLET SHARE ANALYSIS:")
print("-"*80)

# Identify high-spending compartments with low service diversity (high potential for cross-sell)
comp_wallet_analysis = compartment_analysis.copy()
comp_wallet_analysis['services_per_dollar'] = comp_wallet_analysis['num_services'] / (comp_wallet_analysis['total_cost'] + 1)

# High spend, low diversity = prime cross-sell targets
cross_sell_targets = comp_wallet_analysis[
    (comp_wallet_analysis['total_cost'] > comp_wallet_analysis['total_cost'].quantile(0.75)) &
    (comp_wallet_analysis['num_services'] < comp_wallet_analysis['num_services'].median())
].sort_values('total_cost', ascending=False)

print(f"\nüéØ TOP 10 CROSS-SELL PRIORITY ACCOUNTS:")
print(f"   (High Revenue + Low Service Diversity = High Expansion Potential)\n")

for idx, row in cross_sell_targets.head(10).iterrows():
    potential_services = len(df['service'].unique()) - row['num_services']
    print(f"{idx+1}. {row['compartment'][:50]}")
    print(f"   Revenue: ${row['total_cost']:,.0f} | Services: {row['num_services']} | Potential Add: {potential_services}")

total_cross_sell_potential = len(cross_sell_targets) * avg_services_per_comp * (df['computedAmount'].mean() * 30)
print(f"\nüí∞ TOTAL CROSS-SELL POTENTIAL: ${total_cross_sell_potential:,.0f}")
print(f"   ‚Üí {len(cross_sell_targets)} high-value accounts √ó avg service revenue")

# ========================================
# 4. TRENDING SERVICES - WHAT'S HOT
# ========================================
print("\n\nüî• TRENDING SERVICES - WHAT TO PRIORITIZE:")
print("-"*80)

# Identify services with positive growth AND substantial revenue
growth_revenue_matrix = growth_df.merge(
    service_summary[['service', 'total_cost', 'market_share']], 
    on='service', 
    how='left',
    suffixes=('', '_svc')
)

# Hot services: High growth + decent market share
hot_services = growth_revenue_matrix[
    (growth_revenue_matrix['growth_rate'] > 0) & 
    (growth_revenue_matrix['market_share'] > 1)
].sort_values('growth_rate', ascending=False)

if len(hot_services) > 0:
    print(f"\nüî• HOT SERVICES (Positive Growth + >1% Market Share):")
    for idx, svc in hot_services.head(5).iterrows():
        print(f"\n{idx+1}. {svc['service']}")
        print(f"   Growth Rate: +{svc['growth_rate']:.1f}% | Market Share: {svc['market_share']:.1f}%")
        print(f"   Current Revenue: ${svc['current_monthly']:,.0f}/mo")
        print(f"   ‚Üí ACTION: Push this service to all regions & high-value accounts")
else:
    print("\n‚ö†Ô∏è  No services showing positive growth with >1% market share")
    print("   ‚Üí FOCUS: Retention and optimization strategies")

# Cold services: Declining but still generating revenue
declining_services = growth_revenue_matrix[
    (growth_revenue_matrix['growth_rate'] < -10) & 
    (growth_revenue_matrix['market_share'] > 2)
].sort_values('market_share', ascending=False)

if len(declining_services) > 0:
    print(f"\n‚ùÑÔ∏è  AT-RISK SERVICES (Declining >10% + >2% Market Share):")
    for idx, svc in declining_services.head(3).iterrows():
        print(f"\n{idx+1}. {svc['service']}")
        print(f"   Decline Rate: {svc['growth_rate']:.1f}% | Market Share: {svc['market_share']:.1f}%")
        print(f"   Current Revenue: ${svc['current_monthly']:,.0f}/mo")
        print(f"   ‚Üí ACTION: Retention campaign + investigate churn reasons")

# ========================================
# CREATE SALES INTELLIGENCE VISUALIZATION
# ========================================
print("\n\nüìä Creating Sales Intelligence Dashboard...")

fig = plt.figure(figsize=(20, 14))
gs = plt.GridSpec(4, 3, figure=fig, hspace=0.4, wspace=0.35)

# 1. REVENUE CONCENTRATION (Top Left - Pareto Chart)
ax1 = fig.add_subplot(gs[0, 0])
services_subset = service_revenue_sorted.head(20)
ax1.bar(range(len(services_subset)), services_subset['total_cost'], color='steelblue', alpha=0.7, label='Revenue')
ax1_twin = ax1.twinx()
ax1_twin.plot(range(len(services_subset)), services_subset['cumulative_pct'], 
              color='red', marker='o', linewidth=2.5, markersize=6, label='Cumulative %')
ax1_twin.axhline(y=80, color='orange', linestyle='--', linewidth=2, label='80% Mark')

ax1.set_xlabel('Services (Ranked)', fontsize=10, fontweight='bold')
ax1.set_ylabel('Revenue ($)', color='steelblue', fontsize=10, fontweight='bold')
ax1_twin.set_ylabel('Cumulative %', color='red', fontsize=10, fontweight='bold')
ax1.set_title(f'üéØ Revenue Concentration\n({services_for_80pct} services = 80% revenue)', 
              fontsize=11, fontweight='bold')
ax1.legend(loc='upper left', fontsize=8)
ax1_twin.legend(loc='upper right', fontsize=8)
ax1.grid(True, alpha=0.3, axis='y')

# 2. SERVICE PENETRATION - Expansion Opportunities
ax2 = fig.add_subplot(gs[0, 1])
penetration_data = hvwp_df.head(8).copy()
x_pen = np.arange(len(penetration_data))

bars_pen = ax2.barh(x_pen, penetration_data['penetration'], color='green', alpha=0.6, label='Current Penetration')
bars_gap = ax2.barh(x_pen, 100 - penetration_data['penetration'], left=penetration_data['penetration'],
                     color='lightcoral', alpha=0.6, label='Expansion Gap')

ax2.set_yticks(x_pen)
ax2.set_yticklabels([s[:25] + '...' if len(s) > 25 else s for s in penetration_data['service']], fontsize=9)
ax2.set_xlabel('Penetration Rate (%)', fontsize=10, fontweight='bold')
ax2.set_title('üíé Service Penetration\n(% of Compartments Using)', fontsize=11, fontweight='bold')
ax2.legend(fontsize=8)
ax2.grid(True, alpha=0.3, axis='x')

# Add penetration labels
for i, (pen, gap) in enumerate(zip(penetration_data['penetration'], 100 - penetration_data['penetration'])):
    ax2.text(pen/2, i, f'{pen:.0f}%', ha='center', va='center', fontsize=8, fontweight='bold', color='white')
    if gap > 10:
        ax2.text(pen + gap/2, i, f'{gap:.0f}% gap', ha='center', va='center', fontsize=8, fontweight='bold')

# 3. REGIONAL OPPORTUNITY HEATMAP
ax3 = fig.add_subplot(gs[0, 2])
reg_viz = reg_opp_df.head(8).sort_values('opportunity_score')
colors_reg_opp = plt.cm.RdYlGn(reg_viz['opportunity_score'] / reg_viz['opportunity_score'].max())

bars_reg = ax3.barh(range(len(reg_viz)), reg_viz['opportunity_score'], color=colors_reg_opp, edgecolor='black')
ax3.set_yticks(range(len(reg_viz)))
ax3.set_yticklabels(reg_viz['region'], fontsize=9)
ax3.set_xlabel('Opportunity Score', fontsize=10, fontweight='bold')
ax3.set_title('üåç Regional Expansion Priority\n(Weighted by Current Revenue)', fontsize=11, fontweight='bold')
ax3.grid(True, alpha=0.3, axis='x')

# Add missing services count
for i, (score, missing) in enumerate(zip(reg_viz['opportunity_score'], reg_viz['missing_services'])):
    ax3.text(score * 0.5, i, f'{missing} svcs', ha='center', va='center', 
             fontsize=8, fontweight='bold', color='white')

# 4. CROSS-SELL TARGET MATRIX (Spend vs Services)
ax4 = fig.add_subplot(gs[1, :2])
scatter_data = compartment_analysis.head(100).copy()

# Calculate bubble sizes based on potential
scatter_data['potential'] = (len(df['service'].unique()) - scatter_data['num_services']) * (scatter_data['total_cost'] / 10000)

scatter = ax4.scatter(scatter_data['num_services'], scatter_data['total_cost'],
                      s=scatter_data['potential']*10, alpha=0.6, 
                      c=scatter_data['num_services'], cmap='viridis', edgecolors='black', linewidth=0.5)

# Highlight cross-sell targets
if len(cross_sell_targets) > 0:
    cs_plot_data = cross_sell_targets.head(10)
    ax4.scatter(cs_plot_data['num_services'], cs_plot_data['total_cost'],
                s=500, marker='*', color='red', edgecolors='black', linewidth=2,
                label='üéØ Priority Targets', zorder=10)

# Add quadrant lines
median_cost = scatter_data['total_cost'].median()
median_svcs = scatter_data['num_services'].median()
ax4.axhline(y=median_cost, color='red', linestyle='--', linewidth=1.5, alpha=0.5)
ax4.axvline(x=median_svcs, color='red', linestyle='--', linewidth=1.5, alpha=0.5)

# Label quadrants
ax4.text(scatter_data['num_services'].quantile(0.75), scatter_data['total_cost'].quantile(0.85),
         'Enterprise\n(Retain)', ha='center', fontsize=9, fontweight='bold',
         bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.5))
ax4.text(scatter_data['num_services'].quantile(0.25), scatter_data['total_cost'].quantile(0.85),
         'HIGH CROSS-SELL\n(Expand Services)', ha='center', fontsize=9, fontweight='bold',
         bbox=dict(boxstyle='round', facecolor='orange', alpha=0.6))
ax4.text(scatter_data['num_services'].quantile(0.75), scatter_data['total_cost'].quantile(0.15),
         'Diverse Small\n(Optimize)', ha='center', fontsize=9, fontweight='bold',
         bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.5))
ax4.text(scatter_data['num_services'].quantile(0.25), scatter_data['total_cost'].quantile(0.15),
         'Starter\n(Upsell)', ha='center', fontsize=9, fontweight='bold',
         bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.5))

ax4.set_xlabel('Number of Services Used', fontsize=11, fontweight='bold')
ax4.set_ylabel('Total Revenue ($)', fontsize=11, fontweight='bold')
ax4.set_title('üíº Customer Wallet Share Matrix\n(Bubble size = Cross-sell Potential)', 
              fontsize=12, fontweight='bold')
ax4.legend(loc='upper left', fontsize=9)
ax4.grid(True, alpha=0.3)
cbar = plt.colorbar(scatter, ax=ax4)
cbar.set_label('Service Diversity', fontsize=9)

# 5. TRENDING SERVICES - Hot vs Cold
ax5 = fig.add_subplot(gs[1, 2])

# Combine hot and declining for visualization
trending_viz = []
if len(hot_services) > 0:
    for _, svc in hot_services.head(5).iterrows():
        trending_viz.append({'service': svc['service'][:20], 'growth': svc['growth_rate'], 'type': 'Hot'})
if len(declining_services) > 0:
    for _, svc in declining_services.head(5).iterrows():
        trending_viz.append({'service': svc['service'][:20], 'growth': svc['growth_rate'], 'type': 'Cold'})

if trending_viz:
    trend_df = pd.DataFrame(trending_viz).sort_values('growth')
    colors_trend = ['green' if g > 0 else 'red' for g in trend_df['growth']]
    
    bars_trend = ax5.barh(range(len(trend_df)), trend_df['growth'], color=colors_trend, alpha=0.7, edgecolor='black')
    ax5.set_yticks(range(len(trend_df)))
    ax5.set_yticklabels(trend_df['service'], fontsize=8)
    ax5.set_xlabel('Growth Rate (%)', fontsize=10, fontweight='bold')
    ax5.set_title('üî• Trending Services\n(Green=Push, Red=Retain)', fontsize=11, fontweight='bold')
    ax5.axvline(x=0, color='black', linestyle='-', linewidth=2)
    ax5.grid(True, alpha=0.3, axis='x')
    
    # Add labels
    for i, (bar, val) in enumerate(zip(bars_trend, trend_df['growth'])):
        label_x = val + (5 if val > 0 else -5)
        ax5.text(label_x, i, f'{val:+.1f}%', va='center', fontsize=8, fontweight='bold')
else:
    ax5.text(0.5, 0.5, 'Insufficient growth data', ha='center', va='center', 
             fontsize=12, transform=ax5.transAxes)
    ax5.axis('off')

# 6-9. SERVICE-SPECIFIC REVENUE OPPORTUNITIES
top_services_for_detail = service_summary.head(4)['service'].values

for plot_idx, service_name in enumerate(top_services_for_detail):
    ax_detail = fig.add_subplot(gs[2 + (plot_idx // 2), plot_idx % 2])
    
    # Get service data
    svc_data = df[df['service'] == service_name].copy()
    svc_regions = svc_data.groupby('region')['computedAmount'].sum().sort_values(ascending=False).head(8)
    
    # Calculate penetration per region
    svc_comps_per_region = svc_data.groupby('region')['compartmentName'].nunique()
    total_comps_per_region = df.groupby('region')['compartmentName'].nunique()
    penetration_per_region = (svc_comps_per_region / total_comps_per_region * 100).reindex(svc_regions.index, fill_value=0)
    
    # Plot
    x_svc = np.arange(len(svc_regions))
    bars_svc = ax_detail.bar(x_svc, svc_regions.values, color='teal', alpha=0.7, edgecolor='black')
    
    # Color code by penetration
    for bar, pen in zip(bars_svc, penetration_per_region):
        if pen > 50:
            bar.set_color('darkgreen')
        elif pen > 25:
            bar.set_color('orange')
        else:
            bar.set_color('red')
    
    ax_detail.set_xticks(x_svc)
    ax_detail.set_xticklabels([r[:12] for r in svc_regions.index], rotation=45, ha='right', fontsize=8)
    ax_detail.set_ylabel('Revenue ($)', fontsize=9, fontweight='bold')
    ax_detail.set_title(f'{service_name[:30]}\nRegional Distribution', fontsize=10, fontweight='bold')
    ax_detail.grid(True, alpha=0.3, axis='y')
    
    # Add penetration labels
    for i, (bar, rev, pen) in enumerate(zip(bars_svc, svc_regions.values, penetration_per_region)):
        ax_detail.text(i, bar.get_height(), f'{pen:.0f}%', ha='center', va='bottom', 
                      fontsize=7, fontweight='bold')

# 10. ACTIONABLE INSIGHTS SUMMARY
ax_summary = fig.add_subplot(gs[3, 2])
ax_summary.axis('off')

summary_text = f"""
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë   üéØ ACTIONABLE SALES PRIORITIES      ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù

1Ô∏è‚É£  RETENTION FOCUS
   ‚Ä¢ Protect top {services_for_80pct} services
   ‚Ä¢ ${service_revenue_sorted.head(services_for_80pct)['total_cost'].sum():,.0f} at risk

2Ô∏è‚É£  CROSS-SELL TARGETS
   ‚Ä¢ {len(cross_sell_targets)} high-value accounts
   ‚Ä¢ Low service diversity = high potential
   ‚Ä¢ Est. Potential: ${total_cross_sell_potential:,.0f}

3Ô∏è‚É£  REGIONAL EXPANSION
   ‚Ä¢ Top region: {reg_opp_df.iloc[0]['region']}
   ‚Ä¢ {reg_opp_df.iloc[0]['missing_services']} services to introduce
   ‚Ä¢ Focus on proven services from other regions

4Ô∏è‚É£  TRENDING SERVICES
   ‚Ä¢ Hot: {len(hot_services)} services growing
   ‚Ä¢ Cold: {len(declining_services)} services declining
   ‚Ä¢ Push hot services to all regions

5Ô∏è‚É£  QUICK WINS
   ‚Ä¢ Target compartments: <10 services + high spend
   ‚Ä¢ Introduce top 3 bundles
   ‚Ä¢ Focus on top 3 regions

üí∞ TOTAL OPPORTUNITY:
   Cross-sell + Upsell + Regional Expansion
   = ${total_cross_sell_potential + (len(reg_opp_df) * 50000):,.0f}
"""

ax_summary.text(0.05, 0.95, summary_text, transform=ax_summary.transAxes,
                fontsize=8, verticalalignment='top', fontfamily='monospace',
                bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.7, 
                         edgecolor='darkorange', linewidth=3))

plt.suptitle('üìä SALES INTELLIGENCE DASHBOARD - ACTIONABLE INSIGHTS FOR REVENUE GROWTH', 
             fontsize=17, fontweight='bold', y=0.995)

plt.show()
print("\n‚úÖ Sales Intelligence Dashboard created successfully!")
print("="*80)

In [None]:
# Additional detailed visualizations for specific insights

# Visualization 2: Service-level trends for top services
fig, axes = plt.subplots(2, 2, figsize=(16, 10))
fig.suptitle('Top 4 Services - Detailed Trend Analysis', fontsize=14, fontweight='bold')

top_4_services = service_summary.head(4)['service'].values

for idx, (ax, service) in enumerate(zip(axes.flat, top_4_services)):
    service_monthly = df[df['service'] == service].groupby('year_month').agg({
        'computedAmount': 'sum',
        'resourceId': 'nunique',
        'computedQuantity': 'sum'
    }).reset_index()
    
    ax2 = ax.twinx()
    
    # Bar chart for cost
    ax.bar(range(len(service_monthly)), service_monthly['computedAmount'].values, 
           color='skyblue', alpha=0.7, label='Cost')
    
    # Line chart for resource count
    ax2.plot(range(len(service_monthly)), service_monthly['resourceId'].values, 
            marker='o', color='red', linewidth=2, markersize=6, label='Resources')
    
    ax.set_xlabel('Month')
    ax.set_ylabel('Cost ($)', color='skyblue')
    ax2.set_ylabel('Number of Resources', color='red')
    ax.set_title(f'{service}', fontweight='bold')
    ax.set_xticks(range(len(service_monthly)))
    ax.set_xticklabels(service_monthly['year_month'].values, rotation=45, ha='right', fontsize=9)
    ax.grid(True, alpha=0.3, axis='y')
    
    # Add legends
    ax.legend(loc='upper left')
    ax2.legend(loc='upper right')

plt.tight_layout()
print("‚úÖ Top Services Trends visualization complete")
plt.show()

In [None]:
# Regional growth analysis visualization
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
fig.suptitle('Regional Analysis & Growth Opportunities', fontsize=14, fontweight='bold')

# Regional cost and service adoption
ax1 = axes[0]
top_regions_detail = regional_analysis.head(10)
x_pos = np.arange(len(top_regions_detail))
width = 0.35

ax1_bar = ax1.bar(x_pos - width/2, top_regions_detail['total_cost'].values, width, 
                   label='Total Cost', color='steelblue', alpha=0.8)
ax1_twin = ax1.twinx()
ax1_twin.bar(x_pos + width/2, top_regions_detail['num_services'].values, width,
             label='Number of Services', color='coral', alpha=0.8)

ax1.set_xlabel('Region')
ax1.set_ylabel('Total Cost ($)', color='steelblue')
ax1_twin.set_ylabel('Services Count', color='coral')
ax1.set_title('Regional Cost Distribution & Service Diversity')
ax1.set_xticks(x_pos)
ax1.set_xticklabels(top_regions_detail['region'].values, rotation=45, ha='right')
ax1.grid(True, alpha=0.3, axis='y')

# Regional growth rates
ax2 = axes[1]
# Check if regional_growth_df exists and has data
if 'regional_growth_df' in locals() and len(regional_growth_df) > 0:
    regional_growth_display = regional_growth_df.head(10).sort_values('growth_rate')
    colors_reg = ['green' if x > 0 else 'red' for x in regional_growth_display['growth_rate'].values]
    ax2.barh(range(len(regional_growth_display)), regional_growth_display['growth_rate'].values, 
             color=colors_reg, alpha=0.7)
    ax2.set_yticks(range(len(regional_growth_display)))
    ax2.set_yticklabels(regional_growth_display['region'].values)
    ax2.set_xlabel('Growth Rate (%)')
    ax2.set_title('Regional Growth Rates (Period-over-Period)')
    ax2.axvline(x=0, color='black', linestyle='-', linewidth=1)
    ax2.grid(True, alpha=0.3, axis='x')
else:
    ax2.text(0.5, 0.5, 'Insufficient multi-month data\nfor regional growth analysis', 
             ha='center', va='center', fontsize=12, transform=ax2.transAxes)
    ax2.set_title('Regional Growth Rates (Period-over-Period)')
    ax2.axis('off')

plt.tight_layout()
print("‚úÖ Regional Analysis visualization complete")
plt.show()

## 13. Cross-Selling Opportunity Analysis

Identify services that are frequently used together, service adoption gaps, and cross-selling opportunities across compartments and regions.

In [None]:
# Analyze service co-occurrence patterns for cross-selling opportunities

print("="*80)
print("CROSS-SELLING OPPORTUNITY ANALYSIS")
print("="*80)

# 1. Service Co-occurrence Analysis
print("\nüìä Analyzing service usage patterns across compartments...")

# Build service co-occurrence matrix
from itertools import combinations

service_cooccurrence = {}
compartments_with_service = {}

# Track which compartments use which services
for service in df['service'].unique():
    compartments_with_service[service] = set(df[df['service'] == service]['compartmentName'].unique())

# Calculate co-occurrence scores
for service1, service2 in combinations(df['service'].unique(), 2):
    comp1 = compartments_with_service[service1]
    comp2 = compartments_with_service[service2]
    
    # Jaccard similarity (intersection / union)
    intersection = len(comp1 & comp2)
    union = len(comp1 | comp2)
    
    if union > 0 and intersection > 0:
        jaccard = intersection / union
        support = intersection  # How many compartments use both
        
        service_cooccurrence[(service1, service2)] = {
            'jaccard': jaccard,
            'support': support,
            'comp1_only': len(comp1 - comp2),
            'comp2_only': len(comp2 - comp1),
            'both': intersection
        }

# Convert to DataFrame for analysis
cooccurrence_list = []
for (s1, s2), metrics in service_cooccurrence.items():
    cooccurrence_list.append({
        'service1': s1,
        'service2': s2,
        'jaccard_similarity': metrics['jaccard'],
        'compartments_both': metrics['both'],
        'compartments_s1_only': metrics['comp1_only'],
        'compartments_s2_only': metrics['comp2_only'],
        'cross_sell_potential': metrics['comp1_only'] + metrics['comp2_only']
    })

cooccurrence_df = pd.DataFrame(cooccurrence_list)
cooccurrence_df = cooccurrence_df.sort_values('compartments_both', ascending=False)

print(f"\n‚úÖ Analyzed {len(cooccurrence_df)} service pairs")
print(f"üìä Found {len(cooccurrence_df[cooccurrence_df['compartments_both'] > 5])} strong service associations (5+ compartments)")

# Top service pairs (frequently used together)
print("\nüîó TOP 10 SERVICE PAIRS - Frequently Used Together:")
print("-"*80)
top_pairs = cooccurrence_df.head(10)
for idx, row in top_pairs.iterrows():
    print(f"\n{row['service1'][:40]} + {row['service2'][:40]}")
    print(f"   Used together in: {row['compartments_both']} compartments")
    print(f"   Jaccard Similarity: {row['jaccard_similarity']:.3f}")
    print(f"   Cross-sell potential: {row['cross_sell_potential']} compartments")

In [None]:
# Visualize service co-occurrence network

# Create network visualization of top service relationships
fig, axes = plt.subplots(2, 2, figsize=(18, 14))
fig.suptitle('Cross-Selling Opportunity Analysis', fontsize=16, fontweight='bold')

# 1. Top Service Pairs Heatmap
ax1 = axes[0, 0]
top_15_services = service_summary.head(15)['service'].values

# Build adjacency matrix for top services
adjacency = np.zeros((len(top_15_services), len(top_15_services)))
for i, s1 in enumerate(top_15_services):
    for j, s2 in enumerate(top_15_services):
        if i != j:
            pair_data = cooccurrence_df[
                ((cooccurrence_df['service1'] == s1) & (cooccurrence_df['service2'] == s2)) |
                ((cooccurrence_df['service1'] == s2) & (cooccurrence_df['service2'] == s1))
            ]
            if not pair_data.empty:
                adjacency[i, j] = pair_data.iloc[0]['compartments_both']

im1 = ax1.imshow(adjacency, cmap='YlOrRd', aspect='auto')
ax1.set_xticks(range(len(top_15_services)))
ax1.set_yticks(range(len(top_15_services)))
ax1.set_xticklabels([s[:20] + '...' if len(s) > 20 else s for s in top_15_services], rotation=45, ha='right', fontsize=8)
ax1.set_yticklabels([s[:20] + '...' if len(s) > 20 else s for s in top_15_services], fontsize=8)
ax1.set_title('Service Co-occurrence Matrix\n(Number of shared compartments)', fontweight='bold')
plt.colorbar(im1, ax=ax1, label='Compartments')

# 2. Cross-sell potential by service
ax2 = axes[0, 1]
# Calculate cross-sell score for each service
cross_sell_scores = {}
for service in top_15_services:
    # Find all pairs where this service appears
    service_pairs = cooccurrence_df[
        (cooccurrence_df['service1'] == service) | (cooccurrence_df['service2'] == service)
    ]
    
    # Sum up cross-sell potential
    total_potential = 0
    for _, row in service_pairs.iterrows():
        if row['service1'] == service:
            total_potential += row['compartments_s2_only']
        else:
            total_potential += row['compartments_s1_only']
    
    cross_sell_scores[service] = total_potential

# Sort and plot
cross_sell_df = pd.DataFrame(list(cross_sell_scores.items()), columns=['service', 'cross_sell_potential'])
cross_sell_df = cross_sell_df.sort_values('cross_sell_potential', ascending=True)

ax2.barh(range(len(cross_sell_df)), cross_sell_df['cross_sell_potential'].values, 
         color=plt.cm.viridis(np.linspace(0, 1, len(cross_sell_df))))
ax2.set_yticks(range(len(cross_sell_df)))
ax2.set_yticklabels([s[:30] + '...' if len(s) > 30 else s for s in cross_sell_df['service'].values], fontsize=9)
ax2.set_xlabel('Cross-Sell Opportunities (Compartments)')
ax2.set_title('Service Cross-Sell Potential', fontweight='bold')
ax2.grid(True, alpha=0.3, axis='x')

# 3. Service Adoption Gap Analysis by Region
ax3 = axes[1, 0]
top_8_services = service_summary.head(8)['service'].values
top_8_regions = regional_analysis.head(8)['region'].values

# Build adoption matrix (1 if service used in region, 0 otherwise)
adoption_matrix = np.zeros((len(top_8_services), len(top_8_regions)))
for i, service in enumerate(top_8_services):
    for j, region in enumerate(top_8_regions):
        count = len(df[(df['service'] == service) & (df['region'] == region)])
        adoption_matrix[i, j] = 1 if count > 0 else 0

im3 = ax3.imshow(adoption_matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
ax3.set_xticks(range(len(top_8_regions)))
ax3.set_yticks(range(len(top_8_services)))
ax3.set_xticklabels(top_8_regions, rotation=45, ha='right', fontsize=9)
ax3.set_yticklabels([s[:25] + '...' if len(s) > 25 else s for s in top_8_services], fontsize=9)
ax3.set_title('Service Adoption by Region\n(Red = Gap Opportunity, Green = Adopted)', fontweight='bold')

# Add text annotations
for i in range(len(top_8_services)):
    for j in range(len(top_8_regions)):
        text = ax3.text(j, i, '‚úì' if adoption_matrix[i, j] == 1 else '‚úó',
                       ha="center", va="center", color="white" if adoption_matrix[i, j] == 1 else "black",
                       fontsize=10, fontweight='bold')

# 4. Top Cross-Sell Opportunities (specific recommendations)
ax4 = axes[1, 1]
ax4.axis('off')

# Find top specific cross-sell opportunities
recommendations = []
for _, row in cooccurrence_df.head(20).iterrows():
    if row['cross_sell_potential'] > 10:  # Significant opportunity
        recommendations.append({
            'primary': row['service1'][:35],
            'cross_sell': row['service2'][:35],
            'potential': row['cross_sell_potential'],
            'together': row['compartments_both']
        })

# Display as table
table_data = []
table_data.append(['Primary Service', 'Cross-Sell To', 'Potential', 'Current'])
table_data.append(['-'*35, '-'*35, '-'*10, '-'*10])

for rec in recommendations[:10]:
    table_data.append([
        rec['primary'][:35],
        rec['cross_sell'][:35],
        f"{rec['potential']} comps",
        f"{rec['together']} comps"
    ])

ax4.text(0.5, 0.95, 'TOP CROSS-SELL RECOMMENDATIONS', 
         ha='center', va='top', fontsize=14, fontweight='bold', transform=ax4.transAxes)

y_position = 0.88
for row in table_data:
    if row[0].startswith('-'):
        ax4.text(0.05, y_position, row[0], fontsize=8, family='monospace', transform=ax4.transAxes)
        ax4.text(0.42, y_position, row[1], fontsize=8, family='monospace', transform=ax4.transAxes)
        ax4.text(0.78, y_position, row[2], fontsize=8, family='monospace', transform=ax4.transAxes)
        ax4.text(0.90, y_position, row[3], fontsize=8, family='monospace', transform=ax4.transAxes)
    else:
        ax4.text(0.05, y_position, row[0], fontsize=8, family='monospace', transform=ax4.transAxes)
        ax4.text(0.42, y_position, row[1], fontsize=8, family='monospace', transform=ax4.transAxes)
        ax4.text(0.78, y_position, row[2], fontsize=8, family='monospace', transform=ax4.transAxes, 
                color='darkgreen' if 'comps' in row[2] else 'black')
        ax4.text(0.90, y_position, row[3], fontsize=8, family='monospace', transform=ax4.transAxes)
    y_position -= 0.08

plt.tight_layout()
print("‚úÖ Cross-Selling Analysis visualization complete")
plt.show()

In [None]:
# Service Bundle Analysis - Identify common service combinations

print("\n" + "="*80)
print("SERVICE BUNDLE RECOMMENDATIONS")
print("="*80)

# Find most common 3-service bundles
from collections import Counter

service_bundles = []
for comp in df['compartmentName'].unique():
    comp_services = sorted(df[df['compartmentName'] == comp]['service'].unique())
    if len(comp_services) >= 3:
        # Generate all 3-service combinations
        for combo in combinations(comp_services, 3):
            service_bundles.append(tuple(sorted(combo)))

bundle_counts = Counter(service_bundles)
top_bundles = bundle_counts.most_common(15)

print(f"\nüì¶ TOP 15 THREE-SERVICE BUNDLES:")
print("-"*80)
for idx, (bundle, count) in enumerate(top_bundles, 1):
    print(f"\n{idx}. Bundle used by {count} compartments:")
    for service in bundle:
        print(f"   ‚Ä¢ {service[:70]}")

# Visualize bundle popularity
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
fig.suptitle('Service Bundle Analysis', fontsize=14, fontweight='bold')

# Bundle frequency chart
ax1 = axes[0]
bundle_names = [f"Bundle {i+1}" for i in range(min(10, len(top_bundles)))]
bundle_freqs = [count for _, count in top_bundles[:10]]

bars = ax1.barh(range(len(bundle_names)), bundle_freqs, color=plt.cm.Paired(np.linspace(0, 1, len(bundle_names))))
ax1.set_yticks(range(len(bundle_names)))
ax1.set_yticklabels(bundle_names)
ax1.set_xlabel('Number of Compartments Using Bundle')
ax1.set_title('Most Popular 3-Service Bundles', fontweight='bold')
ax1.grid(True, alpha=0.3, axis='x')

# Add value labels
for i, (bar, freq) in enumerate(zip(bars, bundle_freqs)):
    ax1.text(freq + 0.5, i, str(freq), va='center', fontsize=10, fontweight='bold')

# Service diversity per compartment
ax2 = axes[1]
comp_service_counts = df.groupby('compartmentName')['service'].nunique().reset_index()
comp_service_counts.columns = ['compartment', 'num_services']

# Create histogram
bins = [1, 2, 3, 5, 10, 20, 50, 100]
hist, bin_edges = np.histogram(comp_service_counts['num_services'], bins=bins)

ax2.bar(range(len(hist)), hist, color='teal', alpha=0.7, edgecolor='black')
ax2.set_xticks(range(len(hist)))
ax2.set_xticklabels([f"{bins[i]}-{bins[i+1]}" for i in range(len(bins)-1)], rotation=45)
ax2.set_xlabel('Number of Services per Compartment')
ax2.set_ylabel('Number of Compartments')
ax2.set_title('Service Diversity Distribution', fontweight='bold')
ax2.grid(True, alpha=0.3, axis='y')

# Add percentage labels
total_comps = len(comp_service_counts)
for i, count in enumerate(hist):
    percentage = (count / total_comps) * 100
    ax2.text(i, count + max(hist)*0.02, f'{count}\n({percentage:.1f}%)', 
            ha='center', va='bottom', fontsize=9, fontweight='bold')

plt.tight_layout()
print("\n‚úÖ Service Bundle visualization complete")
plt.show()

# Calculate bundle uplift potential
print(f"\nüí° BUNDLE UPLIFT ANALYSIS:")
print("-"*80)
print(f"Total Compartments: {len(df['compartmentName'].unique())}")
print(f"Avg Services per Compartment: {comp_service_counts['num_services'].mean():.1f}")
print(f"Median Services per Compartment: {comp_service_counts['num_services'].median():.0f}")
print(f"\nCompartments with 1-2 services: {len(comp_service_counts[comp_service_counts['num_services'] <= 2])} "
      f"({len(comp_service_counts[comp_service_counts['num_services'] <= 2])/total_comps*100:.1f}%)")
print(f"   ‚Üí High potential for bundle upsell")
print(f"\nCompartments with 10+ services: {len(comp_service_counts[comp_service_counts['num_services'] >= 10])} "
      f"({len(comp_service_counts[comp_service_counts['num_services'] >= 10])/total_comps*100:.1f}%)")
print(f"   ‚Üí Premium customers, focus on optimization")

In [None]:
# Compartment Segmentation for Targeted Cross-Selling

print("\n" + "="*80)
print("COMPARTMENT SEGMENTATION FOR CROSS-SELLING")
print("="*80)

# Segment compartments by service adoption and spending
compartment_profiles = df.groupby('compartmentName').agg({
    'computedAmount': 'sum',
    'service': 'nunique',
    'resourceId': 'nunique',
    'region': 'nunique'
}).reset_index()
compartment_profiles.columns = ['compartment', 'total_spend', 'num_services', 'num_resources', 'num_regions']

# Calculate percentiles for segmentation
spend_q33 = compartment_profiles['total_spend'].quantile(0.33)
spend_q66 = compartment_profiles['total_spend'].quantile(0.66)
service_q33 = compartment_profiles['num_services'].quantile(0.33)
service_q66 = compartment_profiles['num_services'].quantile(0.66)

# Create segments
def segment_compartment(row):
    if row['total_spend'] >= spend_q66 and row['num_services'] >= service_q66:
        return 'Enterprise (High Spend, High Diversity)'
    elif row['total_spend'] >= spend_q66 and row['num_services'] < service_q66:
        return 'High Value (High Spend, Low Diversity)'
    elif row['total_spend'] < spend_q33 and row['num_services'] < service_q33:
        return 'Starter (Low Spend, Low Diversity)'
    elif row['total_spend'] < spend_q33 and row['num_services'] >= service_q66:
        return 'Diverse Small (Low Spend, High Diversity)'
    elif row['num_services'] >= service_q66:
        return 'Growing (Mid Spend, High Diversity)'
    elif row['total_spend'] >= spend_q66:
        return 'Focused High Value (High Spend, Mid Diversity)'
    else:
        return 'Standard (Mid Spend, Mid Diversity)'

compartment_profiles['segment'] = compartment_profiles.apply(segment_compartment, axis=1)

# Visualize segmentation
fig = plt.figure(figsize=(18, 10))

# 1. Scatter plot with segments
ax1 = plt.subplot(2, 2, 1)
segments = compartment_profiles['segment'].unique()
colors_seg = plt.cm.Set3(np.linspace(0, 1, len(segments)))
color_map = dict(zip(segments, colors_seg))

for segment in segments:
    seg_data = compartment_profiles[compartment_profiles['segment'] == segment]
    ax1.scatter(seg_data['num_services'], seg_data['total_spend'], 
               label=segment, alpha=0.6, s=100, color=color_map[segment], edgecolors='black', linewidth=0.5)

ax1.set_xlabel('Number of Services', fontsize=11, fontweight='bold')
ax1.set_ylabel('Total Spend ($)', fontsize=11, fontweight='bold')
ax1.set_title('Compartment Segmentation', fontsize=12, fontweight='bold')
ax1.legend(loc='best', fontsize=8)
ax1.grid(True, alpha=0.3)
ax1.axhline(y=spend_q66, color='red', linestyle='--', alpha=0.5, linewidth=1)
ax1.axhline(y=spend_q33, color='orange', linestyle='--', alpha=0.5, linewidth=1)
ax1.axvline(x=service_q66, color='red', linestyle='--', alpha=0.5, linewidth=1)
ax1.axvline(x=service_q33, color='orange', linestyle='--', alpha=0.5, linewidth=1)

# 2. Segment distribution
ax2 = plt.subplot(2, 2, 2)
segment_counts = compartment_profiles['segment'].value_counts()
colors_pie2 = [color_map[seg] for seg in segment_counts.index]
wedges, texts, autotexts = ax2.pie(segment_counts.values, labels=segment_counts.index, 
                                     autopct='%1.1f%%', colors=colors_pie2, startangle=90)
for autotext in autotexts:
    autotext.set_color('black')
    autotext.set_fontweight('bold')
    autotext.set_fontsize(9)
ax2.set_title('Compartment Distribution by Segment', fontsize=12, fontweight='bold')

# 3. Cross-sell opportunity by segment
ax3 = plt.subplot(2, 2, 3)
segment_opportunities = []
for segment in segments:
    seg_comps = compartment_profiles[compartment_profiles['segment'] == segment]['compartment'].values
    avg_services = compartment_profiles[compartment_profiles['segment'] == segment]['num_services'].mean()
    max_services_available = df['service'].nunique()
    opportunity = max_services_available - avg_services
    segment_opportunities.append({
        'segment': segment,
        'avg_services': avg_services,
        'opportunity': opportunity,
        'count': len(seg_comps)
    })

seg_opp_df = pd.DataFrame(segment_opportunities).sort_values('opportunity', ascending=True)
bars3 = ax3.barh(range(len(seg_opp_df)), seg_opp_df['opportunity'].values,
                color=[color_map[s] for s in seg_opp_df['segment'].values], alpha=0.7, edgecolor='black')
ax3.set_yticks(range(len(seg_opp_df)))
ax3.set_yticklabels([s[:30] for s in seg_opp_df['segment'].values], fontsize=9)
ax3.set_xlabel('Avg Services Gap (Cross-sell Potential)', fontsize=10, fontweight='bold')
ax3.set_title('Cross-Sell Opportunity by Segment', fontsize=12, fontweight='bold')
ax3.grid(True, alpha=0.3, axis='x')

# Add value labels
for i, (opp, count) in enumerate(zip(seg_opp_df['opportunity'].values, seg_opp_df['count'].values)):
    ax3.text(opp + 1, i, f'{opp:.1f} ({count} comps)', va='center', fontsize=8)

# 4. Segment recommendations
ax4 = plt.subplot(2, 2, 4)
ax4.axis('off')

recommendations_text = """
SEGMENT-SPECIFIC RECOMMENDATIONS:

üè¢ Enterprise (High Spend, High Diversity)
   ‚Ä¢ Focus: Optimization & Advanced Features
   ‚Ä¢ Action: Premium support, custom solutions
   ‚Ä¢ Cross-sell: Emerging services, add-ons

üíé High Value (High Spend, Low Diversity)
   ‚Ä¢ Focus: Service Expansion & Diversification
   ‚Ä¢ Action: Introduce complementary services
   ‚Ä¢ Cross-sell: HIGH PRIORITY - Bundles

üå± Starter (Low Spend, Low Diversity)
   ‚Ä¢ Focus: Education & Onboarding
   ‚Ä¢ Action: Starter bundles, free trials
   ‚Ä¢ Cross-sell: Foundational services

üìä Diverse Small (Low Spend, High Diversity)
   ‚Ä¢ Focus: Usage Optimization
   ‚Ä¢ Action: Identify unused services
   ‚Ä¢ Cross-sell: Consolidation opportunities

üöÄ Growing (Mid Spend, High Diversity)
   ‚Ä¢ Focus: Scale & Performance
   ‚Ä¢ Action: Growth packages, volume discounts
   ‚Ä¢ Cross-sell: Premium tiers

‚≠ê Focused High Value (High Spend, Mid Diversity)
   ‚Ä¢ Focus: Adjacent Service Adoption
   ‚Ä¢ Action: Targeted campaigns
   ‚Ä¢ Cross-sell: MEDIUM-HIGH PRIORITY

üìà Standard (Mid Spend, Mid Diversity)
   ‚Ä¢ Focus: Gradual Expansion
   ‚Ä¢ Action: Success stories, use cases
   ‚Ä¢ Cross-sell: Popular bundles
"""

ax4.text(0.05, 0.95, recommendations_text, transform=ax4.transAxes, 
        fontsize=9, verticalalignment='top', family='monospace',
        bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))

plt.tight_layout()
print(f"\n‚úÖ Compartment Segmentation complete")
print(f"\nüìä Segment Summary:")
for _, row in seg_opp_df.iterrows():
    print(f"   {row['segment']}: {row['count']} compartments, avg {row['avg_services']:.1f} services, "
          f"cross-sell gap: {row['opportunity']:.1f} services")
plt.show()

In [None]:
# Cross-Selling Action Plan Dashboard

fig = plt.figure(figsize=(18, 10))
fig.suptitle('Cross-Selling Action Plan Dashboard', fontsize=16, fontweight='bold')

# 1. Top Priority Cross-Sell Targets (Compartments)
ax1 = plt.subplot(2, 3, 1)
# Find compartments with high spend but low service diversity
high_value_low_div = compartment_profiles[
    (compartment_profiles['total_spend'] > compartment_profiles['total_spend'].quantile(0.75)) &
    (compartment_profiles['num_services'] < compartment_profiles['num_services'].quantile(0.5))
].sort_values('total_spend', ascending=False).head(10)

ax1.barh(range(len(high_value_low_div)), high_value_low_div['total_spend'].values,
        color='coral', alpha=0.7, edgecolor='black')
ax1.set_yticks(range(len(high_value_low_div)))
ax1.set_yticklabels([c[:25] + '...' if len(c) > 25 else c for c in high_value_low_div['compartment'].values], fontsize=8)
ax1.set_xlabel('Total Spend ($)')
ax1.set_title('üéØ Priority Accounts\n(High Spend, Low Diversity)', fontweight='bold', fontsize=11)
ax1.grid(True, alpha=0.3, axis='x')

# Add service count annotations
for i, (spend, num_svc) in enumerate(zip(high_value_low_div['total_spend'].values, high_value_low_div['num_services'].values)):
    ax1.text(spend * 0.5, i, f'{int(num_svc)} svcs', va='center', ha='center', 
            fontsize=8, fontweight='bold', color='white')

# 2. Service Penetration Rate
ax2 = plt.subplot(2, 3, 2)
# Calculate penetration rate for each service
service_penetration = []
total_compartments = len(df['compartmentName'].unique())
for service in service_summary.head(15)['service'].values:
    comps_with_service = len(df[df['service'] == service]['compartmentName'].unique())
    penetration = (comps_with_service / total_compartments) * 100
    service_penetration.append({
        'service': service,
        'penetration': penetration,
        'comps_count': comps_with_service
    })

pen_df = pd.DataFrame(service_penetration).sort_values('penetration')
colors_pen = ['green' if p > 50 else 'orange' if p > 25 else 'red' for p in pen_df['penetration'].values]

ax2.barh(range(len(pen_df)), pen_df['penetration'].values, color=colors_pen, alpha=0.7, edgecolor='black')
ax2.set_yticks(range(len(pen_df)))
ax2.set_yticklabels([s[:25] + '...' if len(s) > 25 else s for s in pen_df['service'].values], fontsize=8)
ax2.set_xlabel('Penetration Rate (%)')
ax2.set_title('üìä Service Penetration Rates\n(Red=Low, Orange=Medium, Green=High)', fontweight='bold', fontsize=11)
ax2.axvline(x=50, color='darkgreen', linestyle='--', alpha=0.5, linewidth=2)
ax2.axvline(x=25, color='orange', linestyle='--', alpha=0.5, linewidth=2)
ax2.grid(True, alpha=0.3, axis='x')

# 3. Regional Service Gap Matrix
ax3 = plt.subplot(2, 3, 3)
top_5_services = service_summary.head(5)['service'].values
top_6_regions = regional_analysis.head(6)['region'].values

gap_matrix = np.zeros((len(top_5_services), len(top_6_regions)))
for i, service in enumerate(top_5_services):
    for j, region in enumerate(top_6_regions):
        comps_in_region = len(df[df['region'] == region]['compartmentName'].unique())
        comps_with_service = len(df[(df['service'] == service) & (df['region'] == region)]['compartmentName'].unique())
        gap_matrix[i, j] = ((comps_in_region - comps_with_service) / comps_in_region * 100) if comps_in_region > 0 else 0

im3 = ax3.imshow(gap_matrix, cmap='Reds', aspect='auto')
ax3.set_xticks(range(len(top_6_regions)))
ax3.set_yticks(range(len(top_5_services)))
ax3.set_xticklabels(top_6_regions, rotation=45, ha='right', fontsize=8)
ax3.set_yticklabels([s[:20] + '...' if len(s) > 20 else s for s in top_5_services], fontsize=8)
ax3.set_title('üó∫Ô∏è Regional Service Gaps (%)\n(Darker = More Opportunity)', fontweight='bold', fontsize=11)
plt.colorbar(im3, ax=ax3, label='Gap %')

# 4. Cross-Sell Revenue Potential
ax4 = plt.subplot(2, 3, 4)
# Estimate revenue potential from cross-selling
revenue_potential = []
for service in service_summary.head(10)['service'].values:
    avg_cost_per_comp = df[df['service'] == service].groupby('compartmentName')['computedAmount'].sum().mean()
    comps_with_service = len(df[df['service'] == service]['compartmentName'].unique())
    comps_without = total_compartments - comps_with_service
    potential_revenue = avg_cost_per_comp * comps_without
    
    revenue_potential.append({
        'service': service,
        'potential': potential_revenue,
        'targets': comps_without
    })

rev_df = pd.DataFrame(revenue_potential).sort_values('potential', ascending=False)

ax4.bar(range(len(rev_df)), rev_df['potential'].values, 
       color=plt.cm.plasma(np.linspace(0, 1, len(rev_df))), alpha=0.7, edgecolor='black')
ax4.set_xticks(range(len(rev_df)))
ax4.set_xticklabels([s[:15] + '...' if len(s) > 15 else s for s in rev_df['service'].values], 
                    rotation=45, ha='right', fontsize=8)
ax4.set_ylabel('Potential Revenue ($)')
ax4.set_title('üí∞ Cross-Sell Revenue Potential\n(If adopted by all compartments)', fontweight='bold', fontsize=11)
ax4.grid(True, alpha=0.3, axis='y')

# Add total potential
total_potential = rev_df['potential'].sum()
ax4.text(0.5, 0.95, f'Total Potential: ${total_potential:,.0f}', 
        transform=ax4.transAxes, ha='center', fontsize=10, fontweight='bold',
        bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.7))

# 5. Recommended Action Priority Matrix
ax5 = plt.subplot(2, 3, 5)
# Create action items based on segments
action_priorities = []

for _, row in seg_opp_df.iterrows():
    segment = row['segment']
    count = row['count']
    opportunity = row['opportunity']
    
    # Calculate priority score (opportunity * count)
    priority_score = opportunity * count
    
    action_priorities.append({
        'segment': segment,
        'priority_score': priority_score,
        'compartments': count,
        'avg_gap': opportunity
    })

action_df = pd.DataFrame(action_priorities).sort_values('priority_score', ascending=False)

# Create bubble chart
colors_action = plt.cm.Set2(np.linspace(0, 1, len(action_df)))
for i, row in action_df.iterrows():
    ax5.scatter(row['avg_gap'], row['compartments'], s=row['priority_score']*20, 
               alpha=0.6, color=colors_action[i], edgecolors='black', linewidth=1.5)
    ax5.text(row['avg_gap'], row['compartments'], action_df.iloc[i].name + 1, 
            ha='center', va='center', fontweight='bold', fontsize=9)

ax5.set_xlabel('Avg Service Gap (Opportunity)', fontweight='bold')
ax5.set_ylabel('Number of Compartments', fontweight='bold')
ax5.set_title('üéØ Action Priority Matrix\n(Bubble size = Total Opportunity)', fontweight='bold', fontsize=11)
ax5.grid(True, alpha=0.3)

# 6. Key Metrics Summary
ax6 = plt.subplot(2, 3, 6)
ax6.axis('off')

# Calculate key metrics
total_cross_sell_opps = cooccurrence_df['cross_sell_potential'].sum()
avg_services_per_comp = compartment_profiles['num_services'].mean()
high_priority_comps = len(high_value_low_div)
total_rev_potential = rev_df['potential'].sum()

metrics_text = f"""
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë   CROSS-SELLING KEY METRICS      ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù

üìä Overall Metrics:
   ‚Ä¢ Total Compartments: {total_compartments:,}
   ‚Ä¢ Avg Services/Comp: {avg_services_per_comp:.1f}
   ‚Ä¢ Total Services: {df['service'].nunique()}

üéØ Opportunity Metrics:
   ‚Ä¢ Cross-sell Opportunities: {total_cross_sell_opps:,.0f}
   ‚Ä¢ High-Priority Accounts: {high_priority_comps}
   ‚Ä¢ Revenue Potential: ${total_rev_potential:,.0f}

üìà Top Actions:
   1. Target High Value/Low Div segment
   2. Push top service bundles
   3. Fill regional service gaps
   4. Upsell to Starter segment

üèÜ Focus Areas:
   ‚Ä¢ {rev_df.iloc[0]['service'][:35]}
     Potential: ${rev_df.iloc[0]['potential']:,.0f}
   
   ‚Ä¢ {rev_df.iloc[1]['service'][:35]}
     Potential: ${rev_df.iloc[1]['potential']:,.0f}
   
   ‚Ä¢ {rev_df.iloc[2]['service'][:35]}
     Potential: ${rev_df.iloc[2]['potential']:,.0f}

üí° Next Steps:
   ‚Üí Create targeted campaigns
   ‚Üí Develop bundle offers
   ‚Üí Train sales on co-sell patterns
"""

ax6.text(0.05, 0.95, metrics_text, transform=ax6.transAxes, 
        fontsize=9, verticalalignment='top', family='monospace',
        bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.3))

plt.tight_layout()
print("‚úÖ Cross-Selling Action Plan Dashboard complete")
plt.show()

print("\n" + "="*80)
print("CROSS-SELLING ANALYSIS COMPLETE")
print("="*80)
print(f"\n‚úÖ Total potential cross-sell opportunities identified: {total_cross_sell_opps:,.0f}")
print(f"üí∞ Estimated revenue potential: ${total_rev_potential:,.0f}")
print(f"üéØ High-priority accounts for immediate action: {high_priority_comps}")
print(f"\nüìä All visualizations embedded in notebook")

In [None]:
# Create a summary report as dataframe export
print("\n" + "="*80)
print("EXECUTIVE SUMMARY - KEY METRICS")
print("="*80)

summary_metrics = {
    'Metric': [
        'Total Historical Cost',
        'Total Transactions',
        'Average Transaction Size',
        'Daily Average Cost',
        'Monthly Average Cost',
        'Current Daily Cost',
        'Current Month Cost (partial)',
        '',
        'Unique Services',
        'Unique Regions',
        'Unique Compartments',
        'Unique Resources',
        '',
        'Overall Growth Rate (daily)',
        'Overall Growth Rate (annualized)',
        'Recent MoM Growth',
        'Growth Momentum',
        '',
        'Top Service Market Share',
        'Top 3 Services Market Share',
        'Top 5 Services Market Share',
        '',
        'Services with Positive Growth',
        'Services with Negative Growth',
        'High-Growth Services (>20%)',
        'Emerging Services (<60 days)',
    ],
    'Value': [
        f"${df['computedAmount'].sum():,.2f}",
        f"{len(df):,}",
        f"${df['computedAmount'].mean():,.2f}",
        f"${daily_costs['computedAmount'].mean():,.2f}",
        f"${monthly_costs['total_cost'].mean():,.2f}",
        f"${daily_costs['computedAmount'].iloc[-1]:,.2f}",
        f"${monthly_costs['total_cost'].iloc[-1]:,.2f}",
        '',
        f"{df['service'].nunique()}",
        f"{df['region'].nunique()}",
        f"{df['compartmentName'].nunique()}",
        f"{df['resourceId'].nunique()}",
        '',
        f"{daily_growth_rate:.3f}%",
        f"{daily_growth_rate * 365:.2f}%",
        f"{recent_mom:.2f}%",
        f"{'üöÄ Accelerating' if acceleration > 0 else '‚¨áÔ∏è Decelerating' if acceleration < 0 else '‚û°Ô∏è Stable'}",
        '',
        f"{service_summary.iloc[0]['market_share']:.1f}%",
        f"{service_summary.head(3)['market_share'].sum():.1f}%",
        f"{service_summary.head(5)['market_share'].sum():.1f}%",
        '',
        f"{len(growth_df[growth_df['growth_rate'] > 0])}",
        f"{len(growth_df[growth_df['growth_rate'] < 0])}",
        f"{len(high_growth)}",
        f"{len(emerging) if 'emerging' in locals() else 0}",
    ]
}

summary_df = pd.DataFrame(summary_metrics)
print("\n" + summary_df.to_string(index=False))

# Export key datasets to CSV for further analysis
print("\n" + "="*80)
print("EXPORTING ANALYSIS RESULTS TO CSV")
print("="*80)

# Export monthly trends
monthly_export = monthly_costs[['year_month', 'total_cost', 'num_services', 'num_regions', 'mom_growth']]
monthly_export.to_csv('../output/growth_analysis_monthly_trends.csv', index=False)
print("‚úÖ Monthly trends exported: growth_analysis_monthly_trends.csv")

# Export service analysis
service_export = service_summary[['rank', 'service', 'total_cost', 'market_share', 'num_resources', 'avg_cost_per_row']]
service_export.to_csv('../output/growth_analysis_service_breakdown.csv', index=False)
print("‚úÖ Service breakdown exported: growth_analysis_service_breakdown.csv")

# Export growth analysis
growth_export = growth_df[['service', 'total_cost', 'current_monthly', 'growth_rate', 'num_resources', 'market_share']]
growth_export.to_csv('../output/growth_analysis_service_growth_rates.csv', index=False)
print("‚úÖ Service growth rates exported: growth_analysis_service_growth_rates.csv")

# Export regional analysis
regional_export = regional_analysis[['region', 'total_cost', 'market_share', 'num_services', 'num_resources']]
regional_export.to_csv('../output/growth_analysis_regional_breakdown.csv', index=False)
print("‚úÖ Regional analysis exported: growth_analysis_regional_breakdown.csv")

# Export compartment analysis
compartment_export = compartment_analysis[['compartment', 'total_cost', 'market_share', 'num_services', 'num_resources']]
compartment_export.to_csv('../output/growth_analysis_compartment_breakdown.csv', index=False)
print("‚úÖ Compartment analysis exported: growth_analysis_compartment_breakdown.csv")

print("\n" + "="*80)
print("ANALYSIS COMPLETE")
print("="*80)
print(f"\n‚úÖ All visualizations and exports have been saved to ../output/")
print(f"üìä Open the generated CSV files for detailed reporting and further analysis")
print(f"üìà Share the PNG visualizations with stakeholders for decision-making")

## 14. Upselling Analysis - Premium Tier Upgrades

Identify opportunities to upgrade customers to premium/higher-tier services within existing service categories. Focus on customers already using base-tier services who could benefit from enterprise features.

In [None]:
# Upselling Analysis: Identify service tier upgrade opportunities
print("=== UPSELLING ANALYSIS: PREMIUM TIER UPGRADES ===\n")

# Define upselling pathways (base service -> premium service mappings)
upselling_pathways = {
    # Compute upgrades
    'Compute': {
        'premium': 'Container Engine Service',
        'description': 'Upgrade to Container Engine (OKE) for cloud-native workloads',
        'value_proposition': 'Enable microservices architecture, improve deployment speed'
    },
    'Block Storage': {
        'premium': 'File Storage',
        'description': 'Add File Storage for shared file systems',
        'value_proposition': 'Enable multi-instance access, NFS protocol support'
    },
    
    # Database upgrades
    'Database': {
        'premium': 'Autonomous Data Warehouse',
        'description': 'Upgrade to Autonomous Database for self-driving capabilities',
        'value_proposition': 'Eliminate manual tuning, 99.995% availability SLA'
    },
    'MySQL': {
        'premium': 'Database Management',
        'description': 'Add Database Management for comprehensive database monitoring',
        'value_proposition': 'Automated performance insights, diagnostics and fleet management'
    },
    
    # Networking upgrades
    'Virtual Cloud Network': {
        'premium': 'Load Balancer',
        'description': 'Add Load Balancer for high availability',
        'value_proposition': 'Ensure application uptime, distribute traffic efficiently'
    },
    'Load Balancer': {
        'premium': 'Web Application Firewall',
        'description': 'Upgrade to Web Application Firewall for security',
        'value_proposition': 'Protect against OWASP Top 10, DDoS protection'
    },
    
    # Storage upgrades
    'Object Storage': {
        'premium': 'Archive Storage',
        'description': 'Add Archive Storage for long-term data retention',
        'value_proposition': 'Reduce storage costs by 90% for infrequently accessed data'
    },
    
    # Observability upgrades
    'Telemetry': {
        'premium': 'Logging Analytics',
        'description': 'Upgrade to Logging Analytics for advanced insights',
        'value_proposition': 'ML-powered log analysis, faster troubleshooting'
    },
    'Logging': {
        'premium': 'Application Performance Monitoring',
        'description': 'Add APM for application-level monitoring',
        'value_proposition': 'End-to-end transaction tracing, performance optimization'
    },
    
    # Security upgrades
    'Oracle Cloud Guard Service': {
        'premium': 'Vulnerability Scanning Service',
        'description': 'Add Vulnerability Scanning for comprehensive security assessment',
        'value_proposition': 'Automated vulnerability detection, compliance monitoring'
    },
    
    # Data & AI upgrades
    'Data Flow': {
        'premium': 'Data Integration',
        'description': 'Upgrade to Data Integration for enterprise ETL/ELT',
        'value_proposition': 'No-code data pipelines, advanced transformations'
    },
    'Data Integration': {
        'premium': 'Data Science',
        'description': 'Add Data Science platform for ML capabilities',
        'value_proposition': 'Build and deploy ML models, predictive analytics'
    },
    'Analytics': {
        'premium': 'Oracle AI Data Platform',
        'description': 'Upgrade to AI Data Platform for unified data and AI',
        'value_proposition': 'Integrated data lakehouse, AI-powered analytics'
    }
}

# Calculate upselling opportunities
upselling_opportunities = []

for base_service, upgrade_info in upselling_pathways.items():
    premium_service = upgrade_info['premium']
    
    # Find compartments using base service but not premium service
    comps_with_base = set(df[df['service'] == base_service]['compartmentName'].unique())
    comps_with_premium = set(df[df['service'] == premium_service]['compartmentName'].unique())
    
    upsell_targets = comps_with_base - comps_with_premium
    
    if len(upsell_targets) > 0:
        # Calculate potential revenue (assume 30% of base service cost)
        base_spend = df[df['service'] == base_service]['computedAmount'].sum()
        avg_spend_per_comp = base_spend / len(comps_with_base) if len(comps_with_base) > 0 else 0
        potential_revenue = len(upsell_targets) * avg_spend_per_comp * 0.30
        
        upselling_opportunities.append({
            'base_service': base_service,
            'premium_service': premium_service,
            'compartments_with_base': len(comps_with_base),
            'compartments_with_premium': len(comps_with_premium),
            'upsell_targets': len(upsell_targets),
            'conversion_rate': len(comps_with_premium) / len(comps_with_base) if len(comps_with_base) > 0 else 0,
            'base_service_revenue': base_spend,
            'potential_upsell_revenue': potential_revenue,
            'description': upgrade_info['description'],
            'value_proposition': upgrade_info['value_proposition']
        })

upsell_df = pd.DataFrame(upselling_opportunities)
upsell_df = upsell_df.sort_values('potential_upsell_revenue', ascending=False)

print(f"Total Upselling Opportunities Found: {len(upsell_df)}")
print(f"Total Potential Upsell Revenue: ${upsell_df['potential_upsell_revenue'].sum():,.2f}")
print(f"Total Compartments with Upsell Potential: {upsell_df['upsell_targets'].sum()}")
print("\n" + "="*80 + "\n")

# Display top opportunities
print("TOP 10 UPSELLING OPPORTUNITIES:\n")
for idx, row in upsell_df.head(10).iterrows():
    print(f"{row['base_service']} ‚Üí {row['premium_service']}")
    print(f"  Current Users: {row['compartments_with_base']} | Already Upgraded: {row['compartments_with_premium']}")
    print(f"  Upsell Targets: {row['upsell_targets']} compartments")
    print(f"  Conversion Rate: {row['conversion_rate']:.1%}")
    print(f"  Potential Revenue: ${row['potential_upsell_revenue']:,.2f}")
    print(f"  üìä {row['description']}")
    print(f"  üí° Value: {row['value_proposition']}")
    print()


## 15. Regional Focus Analysis - Top 3 Regions

Most customers operate in 2-3 regions. Focus sales efforts on the most active regions where growth potential is highest.

In [None]:
# Regional Focus: Analyze top 3 regions for concentrated sales efforts
print("=== REGIONAL FOCUS: TOP 3 REGIONS FOR SALES EXPANSION ===\n")

# Identify top 3 regions by cost
top_3_regions = df.groupby('region').agg({
    'computedAmount': 'sum',
    'compartmentName': 'nunique',
    'service': 'nunique'
}).rename(columns={
    'computedAmount': 'total_cost',
    'compartmentName': 'compartments',
    'service': 'services_used'
}).sort_values('total_cost', ascending=False).head(3)

print("TOP 3 REGIONS BY REVENUE:\n")
for region, data in top_3_regions.iterrows():
    pct_of_total = (data['total_cost'] / df['computedAmount'].sum()) * 100
    print(f"üìç {region}")
    print(f"   Revenue: ${data['total_cost']:,.2f} ({pct_of_total:.1f}% of total)")
    print(f"   Compartments: {data['compartments']}")
    print(f"   Services Used: {data['services_used']}")
    print()

print("\n" + "="*80 + "\n")

# Detailed analysis for each top region
top_3_region_names = top_3_regions.index.tolist()
regional_insights = []

for region in top_3_region_names:
    region_df = df[df['region'] == region]
    
    # Service diversity
    services_in_region = region_df['service'].nunique()
    total_services = df['service'].nunique()
    service_coverage = services_in_region / total_services
    
    # Top services
    top_services_region = region_df.groupby('service')['computedAmount'].sum().sort_values(ascending=False).head(5)
    
    # Compartment analysis
    comps_in_region = region_df['compartmentName'].nunique()
    avg_cost_per_comp = region_df['computedAmount'].sum() / comps_in_region
    
    # Cross-sell opportunities (services used in other top regions but not here)
    services_in_region_set = set(region_df['service'].unique())
    other_regions = [r for r in top_3_region_names if r != region]
    services_in_other_top_regions = set()
    for other_region in other_regions:
        services_in_other_top_regions.update(df[df['region'] == other_region]['service'].unique())
    
    cross_sell_opps = services_in_other_top_regions - services_in_region_set
    cross_sell_count = len(cross_sell_opps)
    
    # Upselling opportunities in this region
    region_upsell_opps = []
    for base_service, upgrade_info in upselling_pathways.items():
        premium_service = upgrade_info['premium']
        comps_with_base = set(region_df[region_df['service'] == base_service]['compartmentName'].unique())
        comps_with_premium = set(region_df[region_df['service'] == premium_service]['compartmentName'].unique())
        upsell_targets = comps_with_base - comps_with_premium
        
        if len(upsell_targets) > 0:
            region_upsell_opps.append({
                'pathway': f"{base_service} ‚Üí {premium_service}",
                'targets': len(upsell_targets)
            })
    
    regional_insights.append({
        'region': region,
        'revenue': region_df['computedAmount'].sum(),
        'compartments': comps_in_region,
        'services_used': services_in_region,
        'service_coverage': service_coverage,
        'avg_cost_per_comp': avg_cost_per_comp,
        'cross_sell_opportunities': cross_sell_count,
        'upsell_opportunities': len(region_upsell_opps),
        'top_services': top_services_region.to_dict(),
        'top_upsell_pathways': sorted(region_upsell_opps, key=lambda x: x['targets'], reverse=True)[:3]
    })

# Display detailed insights
for insight in regional_insights:
    print(f"{'='*80}")
    print(f"REGION: {insight['region']}")
    print(f"{'='*80}\n")
    
    print(f"üí∞ REVENUE METRICS:")
    print(f"   Total Revenue: ${insight['revenue']:,.2f}")
    print(f"   Average per Compartment: ${insight['avg_cost_per_comp']:,.2f}")
    print(f"   Compartments: {insight['compartments']}")
    print()
    
    print(f"üìä SERVICE ADOPTION:")
    print(f"   Services in Use: {insight['services_used']}/{total_services} ({insight['service_coverage']:.1%} coverage)")
    print(f"   Cross-Sell Opportunities: {insight['cross_sell_opportunities']} services not yet adopted")
    print(f"   Upsell Opportunities: {insight['upsell_opportunities']} upgrade pathways available")
    print()
    
    print(f"üîù TOP 5 SERVICES BY REVENUE:")
    for idx, (service, cost) in enumerate(insight['top_services'].items(), 1):
        print(f"   {idx}. {service}: ${cost:,.2f}")
    print()
    
    if insight['top_upsell_pathways']:
        print(f"üéØ TOP UPSELLING OPPORTUNITIES:")
        for idx, pathway in enumerate(insight['top_upsell_pathways'], 1):
            print(f"   {idx}. {pathway['pathway']}")
            print(f"      ‚Üí {pathway['targets']} compartments ready to upgrade")
    print("\n")

print("="*80)
print("\n‚úÖ FOCUS STRATEGY: Concentrate sales efforts on these 3 regions which represent")
print(f"   {(top_3_regions['total_cost'].sum() / df['computedAmount'].sum() * 100):.1f}% of total revenue.")


In [None]:
# Visualizations: Upselling and Regional Focus Dashboard
print("Creating comprehensive upselling and regional focus visualizations...\n")

fig = plt.figure(figsize=(20, 16))
gs = fig.add_gridspec(4, 3, hspace=0.35, wspace=0.3)

# 1. Top Upselling Opportunities
ax1 = fig.add_subplot(gs[0, :])
top_10_upsell = upsell_df.head(10)
pathways = [f"{row['base_service']}\n‚Üí {row['premium_service']}" for _, row in top_10_upsell.iterrows()]
revenues = top_10_upsell['potential_upsell_revenue'].values
targets = top_10_upsell['upsell_targets'].values

x = np.arange(len(pathways))
width = 0.35

bars1 = ax1.bar(x - width/2, revenues, width, label='Potential Revenue ($)', color='#2E7D32', alpha=0.8)
ax1_twin = ax1.twinx()
bars2 = ax1_twin.bar(x + width/2, targets, width, label='Target Compartments', color='#1976D2', alpha=0.8)

ax1.set_xlabel('Upselling Pathway', fontsize=11, fontweight='bold')
ax1.set_ylabel('Potential Revenue ($)', fontsize=10, fontweight='bold', color='#2E7D32')
ax1_twin.set_ylabel('Target Compartments', fontsize=10, fontweight='bold', color='#1976D2')
ax1.set_title('Top 10 Upselling Opportunities - Premium Tier Upgrades', fontsize=13, fontweight='bold', pad=15)
ax1.set_xticks(x)
ax1.set_xticklabels(pathways, rotation=45, ha='right', fontsize=9)
ax1.tick_params(axis='y', labelcolor='#2E7D32')
ax1_twin.tick_params(axis='y', labelcolor='#1976D2')
ax1.grid(True, alpha=0.3, axis='y')

# Add value labels on bars
for bar, val in zip(bars1, revenues):
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height,
             f'${val:,.0f}', ha='center', va='bottom', fontsize=8, fontweight='bold', color='#2E7D32')

# 2. Upselling Conversion Funnel
ax2 = fig.add_subplot(gs[1, 0])
top_5_funnel = upsell_df.head(5)
funnel_data = []
for _, row in top_5_funnel.iterrows():
    funnel_data.append({
        'stage': f"{row['base_service'][:15]}...",
        'base_users': row['compartments_with_base'],
        'converted': row['compartments_with_premium'],
        'targets': row['upsell_targets']
    })

stages = [d['stage'] for d in funnel_data]
base = [d['base_users'] for d in funnel_data]
converted = [d['converted'] for d in funnel_data]
targets_data = [d['targets'] for d in funnel_data]

y_pos = np.arange(len(stages))
ax2.barh(y_pos, base, color='#E0E0E0', label='Base Users', alpha=0.7)
ax2.barh(y_pos, converted, color='#4CAF50', label='Already Upgraded', alpha=0.9)

ax2.set_yticks(y_pos)
ax2.set_yticklabels(stages, fontsize=9)
ax2.set_xlabel('Number of Compartments', fontsize=10, fontweight='bold')
ax2.set_title('Upselling Conversion Funnel\nTop 5 Pathways', fontsize=11, fontweight='bold')
ax2.legend(loc='lower right', fontsize=8)
ax2.grid(True, alpha=0.3, axis='x')

# Add target labels
for i, (target, total) in enumerate(zip(targets_data, base)):
    conversion = (total - target) / total * 100 if total > 0 else 0
    ax2.text(total + 1, i, f'{target} targets\n({conversion:.0f}% conv.)', 
             va='center', fontsize=8, fontweight='bold', color='#D32F2F')

# 3. Regional Revenue Distribution (Top 3)
ax3 = fig.add_subplot(gs[1, 1])
region_revenues = top_3_regions['total_cost'].values
region_names = [name[:20] for name in top_3_regions.index]
colors_region = ['#1976D2', '#388E3C', '#F57C00']

wedges, texts, autotexts = ax3.pie(region_revenues, labels=region_names, autopct='%1.1f%%',
                                     colors=colors_region, startangle=90, textprops={'fontsize': 9})
for autotext in autotexts:
    autotext.set_color('white')
    autotext.set_fontweight('bold')
    autotext.set_fontsize(10)

ax3.set_title(f'Top 3 Regions Revenue Distribution\nTotal: ${top_3_regions["total_cost"].sum():,.0f}',
              fontsize=11, fontweight='bold')

# 4. Service Coverage by Top 3 Regions
ax4 = fig.add_subplot(gs[1, 2])
region_names_short = [insight['region'][:15] + '...' if len(insight['region']) > 15 else insight['region'] 
                       for insight in regional_insights]
services_used = [insight['services_used'] for insight in regional_insights]
services_available = [total_services] * len(regional_insights)

x_pos = np.arange(len(region_names_short))
width = 0.35

bars1 = ax4.bar(x_pos - width/2, services_available, width, label='Total Available', 
                color='#E0E0E0', alpha=0.7)
bars2 = ax4.bar(x_pos + width/2, services_used, width, label='Currently Used', 
                color='#1976D2', alpha=0.9)

ax4.set_ylabel('Number of Services', fontsize=10, fontweight='bold')
ax4.set_title('Service Adoption Coverage\nTop 3 Regions', fontsize=11, fontweight='bold')
ax4.set_xticks(x_pos)
ax4.set_xticklabels(region_names_short, rotation=30, ha='right', fontsize=9)
ax4.legend(fontsize=8)
ax4.grid(True, alpha=0.3, axis='y')

# Add coverage percentage
for i, (used, available) in enumerate(zip(services_used, services_available)):
    coverage = (used / available) * 100
    ax4.text(i, used + 2, f'{coverage:.0f}%', ha='center', fontsize=9, fontweight='bold', color='#1976D2')

# 5. Cross-Sell vs Upsell Opportunities by Region
ax5 = fig.add_subplot(gs[2, 0])
regions_short = [insight['region'][:15] for insight in regional_insights]
cross_sell = [insight['cross_sell_opportunities'] for insight in regional_insights]
upsell = [insight['upsell_opportunities'] for insight in regional_insights]

x = np.arange(len(regions_short))
width = 0.35

bars1 = ax5.bar(x - width/2, cross_sell, width, label='Cross-Sell Opps', color='#FF9800', alpha=0.8)
bars2 = ax5.bar(x + width/2, upsell, width, label='Upsell Opps', color='#9C27B0', alpha=0.8)

ax5.set_ylabel('Number of Opportunities', fontsize=10, fontweight='bold')
ax5.set_title('Growth Opportunities by Region\n(Cross-Sell vs Upsell)', fontsize=11, fontweight='bold')
ax5.set_xticks(x)
ax5.set_xticklabels(regions_short, rotation=30, ha='right', fontsize=9)
ax5.legend(fontsize=8)
ax5.grid(True, alpha=0.3, axis='y')

# Add value labels
for bars in [bars1, bars2]:
    for bar in bars:
        height = bar.get_height()
        ax5.text(bar.get_x() + bar.get_width()/2., height,
                 f'{int(height)}', ha='center', va='bottom', fontsize=8, fontweight='bold')

# 6. Revenue per Compartment (Top 3 Regions)
ax6 = fig.add_subplot(gs[2, 1])
avg_costs = [insight['avg_cost_per_comp'] for insight in regional_insights]
compartments = [insight['compartments'] for insight in regional_insights]

colors_bars = ['#1976D2', '#388E3C', '#F57C00']
bars = ax6.bar(regions_short, avg_costs, color=colors_bars, alpha=0.8)

ax6.set_ylabel('Average Cost per Compartment ($)', fontsize=10, fontweight='bold')
ax6.set_title('Revenue Efficiency by Region\nAvg. Cost per Compartment', fontsize=11, fontweight='bold')
ax6.set_xticklabels(regions_short, rotation=30, ha='right', fontsize=9)
ax6.grid(True, alpha=0.3, axis='y')

# Add value labels with compartment count
for bar, cost, comp_count in zip(bars, avg_costs, compartments):
    height = bar.get_height()
    ax6.text(bar.get_x() + bar.get_width()/2., height,
             f'${cost:,.2f}\n({comp_count} comps)', ha='center', va='bottom', fontsize=8, fontweight='bold')

# 7. Upselling Revenue Potential by Category
ax7 = fig.add_subplot(gs[2, 2])
# Categorize upselling pathways
categories = {
    'Compute': ['COMPUTE', 'COMPUTE_MANAGEMENT', 'CONTAINER_ENGINE'],
    'Database': ['DATABASE', 'DATABASE_TOOLS', 'AUTONOMOUS_DATABASE', 'DATA_SAFE'],
    'Networking': ['VIRTUAL_CLOUD_NETWORK', 'LOAD_BALANCER', 'VPN_CONNECT', 'FASTCONNECT', 'WAF'],
    'Storage': ['BLOCK_STORAGE', 'FILE_STORAGE', 'OBJECT_STORAGE', 'ARCHIVE_STORAGE'],
    'Observability': ['MONITORING', 'LOGGING', 'LOGGING_ANALYTICS', 'APPLICATION_PERFORMANCE_MONITORING'],
    'Security': ['IDENTITY', 'CLOUD_GUARD'],
    'Data & AI': ['DATA_CATALOG', 'DATA_INTEGRATION', 'DATA_SCIENCE']
}

category_revenue = {}
for category, services in categories.items():
    revenue = upsell_df[upsell_df['base_service'].isin(services)]['potential_upsell_revenue'].sum()
    if revenue > 0:
        category_revenue[category] = revenue

sorted_categories = sorted(category_revenue.items(), key=lambda x: x[1], reverse=True)
cat_names = [c[0] for c in sorted_categories]
cat_revenues = [c[1] for c in sorted_categories]

colors_cat = plt.cm.Set3(np.linspace(0, 1, len(cat_names)))
bars = ax7.barh(cat_names, cat_revenues, color=colors_cat, alpha=0.8)

ax7.set_xlabel('Potential Revenue ($)', fontsize=10, fontweight='bold')
ax7.set_title('Upselling Revenue Potential\nby Service Category', fontsize=11, fontweight='bold')
ax7.grid(True, alpha=0.3, axis='x')

# Add value labels
for bar, revenue in zip(bars, cat_revenues):
    width = bar.get_width()
    ax7.text(width, bar.get_y() + bar.get_height()/2.,
             f' ${revenue:,.0f}', ha='left', va='center', fontsize=9, fontweight='bold')

# 8. Regional Service Heatmap (Top 3 Regions, Top Services)
ax8 = fig.add_subplot(gs[3, :2])

# Create service-region matrix for top services
top_services_global = df.groupby('service')['computedAmount'].sum().sort_values(ascending=False).head(12).index
region_service_spend = pd.DataFrame(index=top_3_region_names, columns=top_services_global)

for region in top_3_region_names:
    for service in top_services_global:
        spend = df[(df['region'] == region) & (df['service'] == service)]['computedAmount'].sum()
        region_service_spend.loc[region, service] = spend

region_service_spend = region_service_spend.fillna(0).astype(float)

im = ax8.imshow(region_service_spend.values, cmap='YlOrRd', aspect='auto')
ax8.set_xticks(np.arange(len(top_services_global)))
ax8.set_yticks(np.arange(len(top_3_region_names)))
ax8.set_xticklabels(top_services_global, rotation=45, ha='right', fontsize=9)
ax8.set_yticklabels([name[:25] for name in top_3_region_names], fontsize=9)
ax8.set_title('Service Spending Heatmap: Top 3 Regions √ó Top 12 Services', fontsize=12, fontweight='bold', pad=15)

# Add colorbar
cbar = plt.colorbar(im, ax=ax8)
cbar.set_label('Spending ($)', rotation=270, labelpad=20, fontsize=10, fontweight='bold')

# Add text annotations
for i in range(len(top_3_region_names)):
    for j in range(len(top_services_global)):
        value = region_service_spend.values[i, j]
        if value > 0:
            text = ax8.text(j, i, f'${value:.0f}',
                           ha="center", va="center", color="black" if value < region_service_spend.values.max()/2 else "white",
                           fontsize=7, fontweight='bold')

# 9. Sales Priority Matrix
ax9 = fig.add_subplot(gs[3, 2])

# Create priority matrix data
priority_data = []
for insight in regional_insights:
    total_opps = insight['cross_sell_opportunities'] + insight['upsell_opportunities']
    revenue_potential = insight['avg_cost_per_comp'] * total_opps * 0.25  # Estimated 25% conversion
    priority_data.append({
        'region': insight['region'][:15],
        'opportunities': total_opps,
        'revenue_potential': revenue_potential,
        'compartments': insight['compartments']
    })

regions_plot = [d['region'] for d in priority_data]
opps = [d['opportunities'] for d in priority_data]
revenues_plot = [d['revenue_potential'] for d in priority_data]
sizes = [d['compartments'] * 3 for d in priority_data]

scatter = ax9.scatter(opps, revenues_plot, s=sizes, c=colors_region, alpha=0.6, edgecolors='black', linewidth=2)

for i, region in enumerate(regions_plot):
    ax9.annotate(region, (opps[i], revenues_plot[i]), fontsize=9, fontweight='bold', 
                 xytext=(5, 5), textcoords='offset points')

ax9.set_xlabel('Total Opportunities (Cross-Sell + Upsell)', fontsize=10, fontweight='bold')
ax9.set_ylabel('Estimated Revenue Potential ($)', fontsize=10, fontweight='bold')
ax9.set_title('Sales Priority Matrix\nTop 3 Regions\n(Bubble size = # Compartments)', 
              fontsize=11, fontweight='bold')
ax9.grid(True, alpha=0.3)

# Add quadrant lines
ax9.axhline(y=np.median(revenues_plot), color='gray', linestyle='--', alpha=0.5, linewidth=1)
ax9.axvline(x=np.median(opps), color='gray', linestyle='--', alpha=0.5, linewidth=1)

# Add quadrant labels
ax9.text(0.95, 0.95, 'HIGH PRIORITY', transform=ax9.transAxes, fontsize=9, 
         fontweight='bold', color='#D32F2F', ha='right', va='top',
         bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.3))

plt.suptitle('üéØ UPSELLING & REGIONAL FOCUS DASHBOARD', fontsize=16, fontweight='bold', y=0.995)

plt.show()

print("‚úÖ Upselling and Regional Focus Dashboard created successfully!")


In [None]:
# Export Upselling and Regional Analysis Data
print("Exporting upselling and regional focus analysis data...\n")

# Export upselling opportunities
upsell_export = upsell_df.copy()
upsell_export.to_csv('../output/upselling_opportunities.csv', index=False)
print(f"‚úÖ Exported: upselling_opportunities.csv ({len(upsell_export)} opportunities)")

# Export regional insights
regional_export = pd.DataFrame([{
    'region': insight['region'],
    'revenue': insight['revenue'],
    'compartments': insight['compartments'],
    'services_used': insight['services_used'],
    'service_coverage_pct': insight['service_coverage'] * 100,
    'avg_cost_per_compartment': insight['avg_cost_per_comp'],
    'cross_sell_opportunities': insight['cross_sell_opportunities'],
    'upsell_opportunities': insight['upsell_opportunities'],
    'top_service_1': list(insight['top_services'].keys())[0] if insight['top_services'] else '',
    'top_service_1_revenue': list(insight['top_services'].values())[0] if insight['top_services'] else 0,
    'top_service_2': list(insight['top_services'].keys())[1] if len(insight['top_services']) > 1 else '',
    'top_service_2_revenue': list(insight['top_services'].values())[1] if len(insight['top_services']) > 1 else 0,
    'top_service_3': list(insight['top_services'].keys())[2] if len(insight['top_services']) > 2 else '',
    'top_service_3_revenue': list(insight['top_services'].values())[2] if len(insight['top_services']) > 2 else 0
} for insight in regional_insights])

regional_export.to_csv('../output/regional_focus_top3.csv', index=False)
print(f"‚úÖ Exported: regional_focus_top3.csv ({len(regional_export)} regions)")

# Create executive summary
exec_summary = {
    'total_upsell_opportunities': len(upsell_df),
    'total_upsell_revenue_potential': upsell_df['potential_upsell_revenue'].sum(),
    'total_compartments_with_upsell_potential': upsell_df['upsell_targets'].sum(),
    'top_3_regions': top_3_region_names,
    'top_3_regions_revenue': top_3_regions['total_cost'].sum(),
    'top_3_regions_pct_of_total': (top_3_regions['total_cost'].sum() / df['computedAmount'].sum()) * 100,
    'total_cross_sell_opps_in_top_3': sum([i['cross_sell_opportunities'] for i in regional_insights]),
    'total_upsell_opps_in_top_3': sum([i['upsell_opportunities'] for i in regional_insights]),
    'avg_service_coverage_top_3': np.mean([i['service_coverage'] for i in regional_insights]) * 100
}

print("\n" + "="*80)
print("EXECUTIVE SUMMARY - UPSELLING & REGIONAL FOCUS")
print("="*80)
print(f"\nüí∞ UPSELLING POTENTIAL:")
print(f"   Total Opportunities: {exec_summary['total_upsell_opportunities']} upgrade pathways")
print(f"   Revenue Potential: ${exec_summary['total_upsell_revenue_potential']:,.2f}")
print(f"   Target Compartments: {exec_summary['total_compartments_with_upsell_potential']}")

print(f"\nüìç TOP 3 REGIONS:")
print(f"   Regions: {', '.join(exec_summary['top_3_regions'])}")
print(f"   Combined Revenue: ${exec_summary['top_3_regions_revenue']:,.2f}")
print(f"   % of Total: {exec_summary['top_3_regions_pct_of_total']:.1f}%")
print(f"   Avg Service Coverage: {exec_summary['avg_service_coverage_top_3']:.1f}%")

print(f"\nüéØ GROWTH OPPORTUNITIES IN TOP 3 REGIONS:")
print(f"   Cross-Sell Opportunities: {exec_summary['total_cross_sell_opps_in_top_3']} services")
print(f"   Upsell Opportunities: {exec_summary['total_upsell_opps_in_top_3']} pathways")

print("\n" + "="*80)
print("\n‚úÖ All upselling and regional focus analysis completed!")
print("üìä Review the dashboard above for visual insights.")
print("üìÅ Check output folder for exported CSV files.")


## 16. Cost Tracking & Tagging Analysis for Upsell/Cross-Sell

Analyze resource tagging patterns to identify opportunities for improved cost governance and targeted sales campaigns. Untagged or poorly tagged resources represent opportunities to sell tagging/governance solutions.

In [None]:
# Tagging Analysis: Identify cost governance and compliance opportunities
print("=== COST TRACKING & TAGGING ANALYSIS ===\n")

# Analyze tagging compliance
df['has_cost_center'] = df['cost_center'] != 'Untagged'
df['has_environment'] = df['environment'] != 'Untagged'
df['has_team'] = df['team'] != 'Untagged'
df['has_any_tag'] = df['has_cost_center'] | df['has_environment'] | df['has_team']

# Overall tagging statistics
total_cost = df['computedAmount'].sum()
tagged_cost = df[df['has_any_tag']]['computedAmount'].sum()
untagged_cost = df[~df['has_any_tag']]['computedAmount'].sum()
tagging_coverage_pct = (tagged_cost / total_cost) * 100

print(f"üìä OVERALL TAGGING COMPLIANCE:")
print(f"   Total Cost: ${total_cost:,.2f}")
print(f"   Tagged Resources: ${tagged_cost:,.2f} ({tagging_coverage_pct:.1f}%)")
print(f"   Untagged Resources: ${untagged_cost:,.2f} ({100-tagging_coverage_pct:.1f}%)")
print()

# Tagging by standard keys
cost_center_tagged = df[df['has_cost_center']]['computedAmount'].sum()
environment_tagged = df[df['has_environment']]['computedAmount'].sum()
team_tagged = df[df['has_team']]['computedAmount'].sum()

print(f"üìã TAG KEY COVERAGE:")
print(f"   CostCenter Tag: ${cost_center_tagged:,.2f} ({cost_center_tagged/total_cost*100:.1f}%)")
print(f"   Environment Tag: ${environment_tagged:,.2f} ({environment_tagged/total_cost*100:.1f}%)")
print(f"   Team Tag: ${team_tagged:,.2f} ({team_tagged/total_cost*100:.1f}%)")
print("\n" + "="*80 + "\n")

# Tagging compliance by compartment (identify governance opportunities)
compartment_tagging = df.groupby('compartmentName').agg({
    'computedAmount': 'sum',
    'has_any_tag': 'mean',
    'has_cost_center': 'mean',
    'has_environment': 'mean',
    'has_team': 'mean',
    'service': 'nunique'
}).rename(columns={
    'computedAmount': 'total_cost',
    'has_any_tag': 'overall_compliance',
    'has_cost_center': 'cost_center_compliance',
    'has_environment': 'environment_compliance',
    'has_team': 'team_compliance',
    'service': 'num_services'
}).reset_index()

compartment_tagging = compartment_tagging.sort_values('total_cost', ascending=False)

# Identify governance upsell targets (high spend, low compliance)
governance_targets = compartment_tagging[
    (compartment_tagging['total_cost'] > compartment_tagging['total_cost'].quantile(0.50)) &
    (compartment_tagging['overall_compliance'] < 0.30)
].copy()

print(f"üéØ GOVERNANCE UPSELL OPPORTUNITIES:")
print(f"   High-spend, Low-compliance Compartments: {len(governance_targets)}")
print(f"   Combined Cost: ${governance_targets['total_cost'].sum():,.2f}")
print(f"   Average Compliance: {governance_targets['overall_compliance'].mean()*100:.1f}%")
print()

if len(governance_targets) > 0:
    print(f"TOP 10 GOVERNANCE TARGETS:\n")
    for idx, row in governance_targets.head(10).iterrows():
        print(f"üìÅ {row['compartmentName'][:50]}")
        print(f"   Cost: ${row['total_cost']:,.2f} | Services: {row['num_services']}")
        print(f"   Compliance: {row['overall_compliance']*100:.0f}% | CostCenter: {row['cost_center_compliance']*100:.0f}% | Environment: {row['environment_compliance']*100:.0f}%")
        print(f"   üí° Opportunity: Implement tagging policies, cost allocation, showback/chargeback")
        print()

print("="*80 + "\n")

# Tagging patterns by region (identify regional compliance gaps)
regional_tagging = df.groupby('region').agg({
    'computedAmount': 'sum',
    'has_any_tag': 'mean',
    'compartmentName': 'nunique'
}).rename(columns={
    'computedAmount': 'total_cost',
    'has_any_tag': 'compliance_rate',
    'compartmentName': 'num_compartments'
}).sort_values('total_cost', ascending=False).head(10)

print(f"üåç REGIONAL TAGGING COMPLIANCE (Top 10 Regions):\n")
for region, data in regional_tagging.iterrows():
    compliance_status = "‚úÖ Good" if data['compliance_rate'] > 0.70 else "‚ö†Ô∏è Poor" if data['compliance_rate'] < 0.30 else "üìä Medium"
    print(f"{region[:35]:35} | Cost: ${data['total_cost']:>10,.2f} | Compliance: {data['compliance_rate']*100:>5.1f}% {compliance_status}")

print("\n" + "="*80 + "\n")

# Service-level tagging analysis (which services are poorly tagged?)
service_tagging = df.groupby('service').agg({
    'computedAmount': 'sum',
    'has_any_tag': 'mean',
    'compartmentName': 'nunique'
}).rename(columns={
    'computedAmount': 'total_cost',
    'has_any_tag': 'compliance_rate',
    'compartmentName': 'num_compartments'
}).sort_values('total_cost', ascending=False)

poorly_tagged_services = service_tagging[service_tagging['compliance_rate'] < 0.30].copy()
poorly_tagged_services = poorly_tagged_services[poorly_tagged_services['total_cost'] > 10]  # Only significant services

print(f"‚ö†Ô∏è POORLY TAGGED SERVICES (High Cost, Low Compliance):\n")
if len(poorly_tagged_services) > 0:
    for service, data in poorly_tagged_services.head(10).iterrows():
        print(f"{service[:40]:40} | ${data['total_cost']:>10,.2f} | {data['compliance_rate']*100:>5.1f}% tagged")
else:
    print("   ‚úÖ No major services with poor tagging compliance")

print("\n" + "="*80 + "\n")

# Tag value analysis - identify most common tag values
print(f"üìë TAG VALUE DISTRIBUTION:\n")

# Cost Center distribution
cost_centers = df[df['has_cost_center']].groupby('cost_center')['computedAmount'].sum().sort_values(ascending=False)
print(f"Top 10 Cost Centers:")
for cc, cost in cost_centers.head(10).items():
    print(f"   {cc[:30]:30} | ${cost:>10,.2f}")
print()

# Environment distribution
environments = df[df['has_environment']].groupby('environment')['computedAmount'].sum().sort_values(ascending=False)
print(f"Environment Breakdown:")
for env, cost in environments.head(10).items():
    print(f"   {env[:30]:30} | ${cost:>10,.2f}")
print()

# Team distribution
teams = df[df['has_team']].groupby('team')['computedAmount'].sum().sort_values(ascending=False)
print(f"Top 10 Teams:")
for team, cost in teams.head(10).items():
    print(f"   {team[:30]:30} | ${cost:>10,.2f}")

print("\n" + "="*80)

# Calculate tagging solution revenue opportunity
governance_opportunity = {
    'untagged_cost': untagged_cost,
    'poorly_tagged_compartments': len(governance_targets),
    'poorly_tagged_compartment_cost': governance_targets['total_cost'].sum() if len(governance_targets) > 0 else 0,
    'estimated_governance_solution_revenue': untagged_cost * 0.02,  # 2% of untagged cost as solution revenue
    'showback_chargeback_opportunity': governance_targets['total_cost'].sum() * 0.015 if len(governance_targets) > 0 else 0  # 1.5% for implementation
}

print(f"\nüí∞ GOVERNANCE SOLUTION OPPORTUNITY:")
print(f"   Untagged Resources: ${governance_opportunity['untagged_cost']:,.2f}")
print(f"   Governance Solution Revenue Potential: ${governance_opportunity['estimated_governance_solution_revenue']:,.2f}")
print(f"   Showback/Chargeback Implementation Revenue: ${governance_opportunity['showback_chargeback_opportunity']:,.2f}")
print(f"   Total Governance Revenue Opportunity: ${governance_opportunity['estimated_governance_solution_revenue'] + governance_opportunity['showback_chargeback_opportunity']:,.2f}")


In [None]:
# Tagging Analysis Visualizations
print("Creating comprehensive tagging analysis dashboard...\n")

fig = plt.figure(figsize=(20, 14))
gs = fig.add_gridspec(3, 3, hspace=0.35, wspace=0.3)

# 1. Overall Tagging Compliance (Pie Chart)
ax1 = fig.add_subplot(gs[0, 0])
tagging_data = [tagged_cost, untagged_cost]
tagging_labels = ['Tagged\nResources', 'Untagged\nResources']
colors_tag = ['#4CAF50', '#F44336']
explode = (0.05, 0.1)

wedges, texts, autotexts = ax1.pie(tagging_data, labels=tagging_labels, autopct='%1.1f%%',
                                     colors=colors_tag, explode=explode, startangle=90,
                                     textprops={'fontsize': 10, 'fontweight': 'bold'})
for autotext in autotexts:
    autotext.set_color('white')
    autotext.set_fontsize(11)

ax1.set_title(f'Overall Tagging Compliance\nTotal: ${total_cost:,.0f}', 
              fontsize=12, fontweight='bold', pad=15)

# 2. Tag Key Coverage Breakdown
ax2 = fig.add_subplot(gs[0, 1])
tag_keys = ['CostCenter', 'Environment', 'Team', 'Untagged']
tag_costs = [cost_center_tagged, environment_tagged, team_tagged, untagged_cost]
colors_keys = ['#2196F3', '#FF9800', '#9C27B0', '#E0E0E0']

bars = ax2.barh(tag_keys, tag_costs, color=colors_keys, alpha=0.8)
ax2.set_xlabel('Cost ($)', fontsize=10, fontweight='bold')
ax2.set_title('Tag Key Coverage by Cost', fontsize=12, fontweight='bold', pad=15)
ax2.grid(True, alpha=0.3, axis='x')

# Add value labels
for bar, cost in zip(bars, tag_costs):
    width = bar.get_width()
    pct = (cost / total_cost) * 100
    ax2.text(width, bar.get_y() + bar.get_height()/2.,
             f' ${cost:,.0f} ({pct:.1f}%)', ha='left', va='center', 
             fontsize=9, fontweight='bold')

# 3. Top 10 Governance Targets
ax3 = fig.add_subplot(gs[0, 2])
if len(governance_targets) > 0:
    top_gov_targets = governance_targets.head(10).copy()
    comp_names_short = [name[:20] + '...' if len(name) > 20 else name 
                        for name in top_gov_targets['compartmentName']]
    costs = top_gov_targets['total_cost'].values
    compliance = top_gov_targets['overall_compliance'].values
    
    y_pos = np.arange(len(comp_names_short))
    
    # Create horizontal bars colored by compliance
    colors_comp = ['#F44336' if c < 0.2 else '#FF9800' if c < 0.3 else '#FFC107' 
                   for c in compliance]
    bars = ax3.barh(y_pos, costs, color=colors_comp, alpha=0.8)
    
    ax3.set_yticks(y_pos)
    ax3.set_yticklabels(comp_names_short, fontsize=8)
    ax3.set_xlabel('Cost ($)', fontsize=10, fontweight='bold')
    ax3.set_title('Top 10 Governance Targets\n(High Cost, Low Compliance)', 
                  fontsize=11, fontweight='bold', pad=15)
    ax3.grid(True, alpha=0.3, axis='x')
    
    # Add compliance percentage labels
    for i, (cost, comp) in enumerate(zip(costs, compliance)):
        ax3.text(cost + (costs.max() * 0.02), i, f'{comp*100:.0f}%', 
                va='center', fontsize=8, fontweight='bold', color='#D32F2F')
else:
    ax3.text(0.5, 0.5, 'No governance targets\nidentified', 
             ha='center', va='center', fontsize=12, transform=ax3.transAxes)
    ax3.set_xlim(0, 1)
    ax3.set_ylim(0, 1)
    ax3.axis('off')

# 4. Regional Tagging Compliance
ax4 = fig.add_subplot(gs[1, 0])
top_regions_tag = regional_tagging.head(8)
region_names_tag = [r[:20] for r in top_regions_tag.index]
compliance_rates = top_regions_tag['compliance_rate'].values * 100
region_costs_tag = top_regions_tag['total_cost'].values

x_pos = np.arange(len(region_names_tag))
width = 0.35

# Color bars by compliance level
colors_compliance = ['#4CAF50' if c > 70 else '#FFC107' if c > 30 else '#F44336' 
                     for c in compliance_rates]
bars = ax4.bar(x_pos, compliance_rates, color=colors_compliance, alpha=0.8)

ax4.set_ylabel('Compliance Rate (%)', fontsize=10, fontweight='bold')
ax4.set_title('Regional Tagging Compliance\nTop 8 Regions', fontsize=11, fontweight='bold')
ax4.set_xticks(x_pos)
ax4.set_xticklabels(region_names_tag, rotation=45, ha='right', fontsize=8)
ax4.axhline(y=70, color='green', linestyle='--', alpha=0.5, linewidth=1, label='Target: 70%')
ax4.axhline(y=30, color='red', linestyle='--', alpha=0.5, linewidth=1, label='Critical: 30%')
ax4.legend(loc='upper right', fontsize=8)
ax4.grid(True, alpha=0.3, axis='y')

# Add value labels
for bar, rate in zip(bars, compliance_rates):
    height = bar.get_height()
    ax4.text(bar.get_x() + bar.get_width()/2., height + 2,
             f'{rate:.0f}%', ha='center', va='bottom', fontsize=8, fontweight='bold')

# 5. Tagging Compliance by Service (Top 10 by cost)
ax5 = fig.add_subplot(gs[1, 1])
top_services_tag = service_tagging.head(10)
service_names_short = [s[:25] + '...' if len(s) > 25 else s for s in top_services_tag.index]
service_compliance = top_services_tag['compliance_rate'].values * 100
service_costs_tag = top_services_tag['total_cost'].values

y_pos = np.arange(len(service_names_short))

# Base bars for total cost (grey)
ax5.barh(y_pos, [100] * len(service_names_short), color='#E0E0E0', alpha=0.3, label='Untagged')

# Overlay bars for compliance (colored)
colors_svc = ['#4CAF50' if c > 70 else '#FFC107' if c > 30 else '#F44336' 
              for c in service_compliance]
bars = ax5.barh(y_pos, service_compliance, color=colors_svc, alpha=0.8, label='Tagged')

ax5.set_yticks(y_pos)
ax5.set_yticklabels(service_names_short, fontsize=8)
ax5.set_xlabel('Compliance Rate (%)', fontsize=10, fontweight='bold')
ax5.set_title('Service Tagging Compliance\nTop 10 Services by Cost', 
              fontsize=11, fontweight='bold', pad=15)
ax5.set_xlim(0, 100)
ax5.grid(True, alpha=0.3, axis='x')

# Add compliance labels
for i, rate in enumerate(service_compliance):
    ax5.text(rate + 2, i, f'{rate:.0f}%', va='center', fontsize=8, fontweight='bold')

# 6. Cost Center Distribution (Top 10)
ax6 = fig.add_subplot(gs[1, 2])
if len(cost_centers) > 0:
    top_cc = cost_centers.head(10)
    cc_names = [cc[:20] + '...' if len(cc) > 20 else cc for cc in top_cc.index]
    cc_costs = top_cc.values
    
    colors_cc = plt.cm.Set3(np.linspace(0, 1, len(cc_names)))
    bars = ax6.barh(cc_names, cc_costs, color=colors_cc, alpha=0.8)
    
    ax6.set_xlabel('Cost ($)', fontsize=10, fontweight='bold')
    ax6.set_title('Top 10 Cost Centers', fontsize=11, fontweight='bold', pad=15)
    ax6.grid(True, alpha=0.3, axis='x')
    
    # Add value labels
    for bar, cost in zip(bars, cc_costs):
        width = bar.get_width()
        ax6.text(width, bar.get_y() + bar.get_height()/2.,
                 f' ${cost:,.0f}', ha='left', va='center', fontsize=8, fontweight='bold')
else:
    ax6.text(0.5, 0.5, 'No cost center\ntags found', 
             ha='center', va='center', fontsize=12, transform=ax6.transAxes)
    ax6.axis('off')

# 7. Environment Distribution
ax7 = fig.add_subplot(gs[2, 0])
if len(environments) > 0:
    env_data = environments.values
    env_labels = environments.index.tolist()
    colors_env = ['#1976D2', '#388E3C', '#F57C00', '#C2185B', '#7B1FA2'][:len(env_labels)]
    
    wedges, texts, autotexts = ax7.pie(env_data, labels=env_labels, autopct='%1.1f%%',
                                        colors=colors_env, startangle=90,
                                        textprops={'fontsize': 9})
    for autotext in autotexts:
        autotext.set_color('white')
        autotext.set_fontweight('bold')
        autotext.set_fontsize(10)
    
    ax7.set_title(f'Environment Distribution\nTotal: ${env_data.sum():,.0f}', 
                  fontsize=11, fontweight='bold')
else:
    ax7.text(0.5, 0.5, 'No environment\ntags found', 
             ha='center', va='center', fontsize=12, transform=ax7.transAxes)
    ax7.axis('off')

# 8. Governance Opportunity Summary
ax8 = fig.add_subplot(gs[2, 1])
ax8.axis('off')

summary_text = f"""
GOVERNANCE SOLUTION OPPORTUNITIES

üìä Tagging Compliance
   ‚Ä¢ Overall Coverage: {tagging_coverage_pct:.1f}%
   ‚Ä¢ Tagged Cost: ${tagged_cost:,.0f}
   ‚Ä¢ Untagged Cost: ${untagged_cost:,.0f}

üéØ Target Opportunities
   ‚Ä¢ High-Risk Compartments: {len(governance_targets)}
   ‚Ä¢ Combined Cost: ${governance_targets['total_cost'].sum() if len(governance_targets) > 0 else 0:,.0f}
   ‚Ä¢ Avg. Compliance: {governance_targets['overall_compliance'].mean()*100 if len(governance_targets) > 0 else 0:.1f}%

üí∞ Revenue Potential
   ‚Ä¢ Tagging Solution: ${governance_opportunity['estimated_governance_solution_revenue']:,.0f}
   ‚Ä¢ Showback/Chargeback: ${governance_opportunity['showback_chargeback_opportunity']:,.0f}
   ‚Ä¢ Total Opportunity: ${governance_opportunity['estimated_governance_solution_revenue'] + governance_opportunity['showback_chargeback_opportunity']:,.0f}

‚úÖ Recommended Actions
   1. Implement tag policies for untagged resources
   2. Deploy cost allocation/showback for targets
   3. Enable automated tagging workflows
   4. Establish governance training programs
"""

ax8.text(0.05, 0.95, summary_text, transform=ax8.transAxes,
         fontsize=10, verticalalignment='top', fontfamily='monospace',
         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))

# 9. Compliance Heatmap: Top Regions vs Tag Keys
ax9 = fig.add_subplot(gs[2, 2])

# Create compliance matrix
top_regions_for_heatmap = regional_tagging.head(6).index
tag_key_compliance = pd.DataFrame(index=top_regions_for_heatmap, 
                                  columns=['CostCenter', 'Environment', 'Team'])

for region in top_regions_for_heatmap:
    region_data = df[df['region'] == region]
    tag_key_compliance.loc[region, 'CostCenter'] = region_data['has_cost_center'].mean() * 100
    tag_key_compliance.loc[region, 'Environment'] = region_data['has_environment'].mean() * 100
    tag_key_compliance.loc[region, 'Team'] = region_data['has_team'].mean() * 100

tag_key_compliance = tag_key_compliance.astype(float)

im = ax9.imshow(tag_key_compliance.values, cmap='RdYlGn', aspect='auto', vmin=0, vmax=100)
ax9.set_xticks(np.arange(len(tag_key_compliance.columns)))
ax9.set_yticks(np.arange(len(tag_key_compliance.index)))
ax9.set_xticklabels(tag_key_compliance.columns, fontsize=9, fontweight='bold')
ax9.set_yticklabels([r[:20] for r in tag_key_compliance.index], fontsize=8)
ax9.set_title('Tag Compliance Heatmap\nTop 6 Regions √ó Tag Keys', fontsize=11, fontweight='bold', pad=15)

# Add colorbar
cbar = plt.colorbar(im, ax=ax9)
cbar.set_label('Compliance %', rotation=270, labelpad=20, fontsize=9, fontweight='bold')

# Add text annotations
for i in range(len(tag_key_compliance.index)):
    for j in range(len(tag_key_compliance.columns)):
        value = tag_key_compliance.values[i, j]
        color = 'white' if value < 50 else 'black'
        text = ax9.text(j, i, f'{value:.0f}%',
                       ha="center", va="center", color=color,
                       fontsize=9, fontweight='bold')

plt.suptitle('üìã COST TRACKING & TAGGING ANALYSIS DASHBOARD', fontsize=16, fontweight='bold', y=0.995)

plt.show()

print("‚úÖ Tagging analysis dashboard created successfully!")


In [None]:
# Export Tagging Analysis Results
print("Exporting tagging and governance analysis data...\n")

# Export governance targets
if len(governance_targets) > 0:
    governance_export = governance_targets.copy()
    governance_export['governance_solution_priority'] = governance_export.apply(
        lambda x: 'HIGH' if x['total_cost'] > governance_targets['total_cost'].quantile(0.75) else 
                  'MEDIUM' if x['total_cost'] > governance_targets['total_cost'].quantile(0.50) else 'LOW',
        axis=1
    )
    governance_export.to_csv('../output/tagging_governance_targets.csv', index=False)
    print(f"‚úÖ Exported: tagging_governance_targets.csv ({len(governance_export)} targets)")
else:
    print("‚ö†Ô∏è No governance targets to export")

# Export regional tagging analysis
regional_tag_export = regional_tagging.copy()
regional_tag_export.to_csv('../output/regional_tagging_compliance.csv')
print(f"‚úÖ Exported: regional_tagging_compliance.csv ({len(regional_tag_export)} regions)")

# Export service tagging analysis
service_tag_export = service_tagging.copy()
service_tag_export['compliance_status'] = service_tag_export['compliance_rate'].apply(
    lambda x: 'Good' if x > 0.70 else 'Medium' if x > 0.30 else 'Poor'
)
service_tag_export.to_csv('../output/service_tagging_compliance.csv')
print(f"‚úÖ Exported: service_tagging_compliance.csv ({len(service_tag_export)} services)")

# Export compartment-level tagging for governance teams
compartment_tag_export = compartment_tagging.copy()
compartment_tag_export['governance_priority'] = compartment_tag_export.apply(
    lambda x: 'CRITICAL' if x['total_cost'] > 50 and x['overall_compliance'] < 0.20 else
              'HIGH' if x['total_cost'] > 20 and x['overall_compliance'] < 0.40 else
              'MEDIUM' if x['overall_compliance'] < 0.60 else 'LOW',
    axis=1
)
compartment_tag_export.to_csv('../output/compartment_tagging_analysis.csv', index=False)
print(f"‚úÖ Exported: compartment_tagging_analysis.csv ({len(compartment_tag_export)} compartments)")

# Create comprehensive tagging summary
tagging_summary = {
    'overall_compliance': {
        'total_cost': total_cost,
        'tagged_cost': tagged_cost,
        'untagged_cost': untagged_cost,
        'tagging_coverage_pct': tagging_coverage_pct
    },
    'tag_key_coverage': {
        'cost_center_tagged': cost_center_tagged,
        'cost_center_pct': (cost_center_tagged / total_cost) * 100,
        'environment_tagged': environment_tagged,
        'environment_pct': (environment_tagged / total_cost) * 100,
        'team_tagged': team_tagged,
        'team_pct': (team_tagged / total_cost) * 100
    },
    'governance_targets': {
        'num_targets': len(governance_targets),
        'target_cost': governance_targets['total_cost'].sum() if len(governance_targets) > 0 else 0,
        'avg_compliance': governance_targets['overall_compliance'].mean() if len(governance_targets) > 0 else 0
    },
    'revenue_opportunity': governance_opportunity
}

print("\n" + "="*80)
print("TAGGING ANALYSIS SUMMARY")
print("="*80)
print(f"\nüìä OVERALL COMPLIANCE:")
print(f"   Tagging Coverage: {tagging_coverage_pct:.1f}%")
print(f"   Tagged Resources: ${tagged_cost:,.2f}")
print(f"   Untagged Resources: ${untagged_cost:,.2f}")

print(f"\nüéØ GOVERNANCE OPPORTUNITIES:")
print(f"   High-Risk Compartments: {len(governance_targets)}")
print(f"   Total Cost at Risk: ${governance_targets['total_cost'].sum() if len(governance_targets) > 0 else 0:,.2f}")

print(f"\nüí∞ REVENUE POTENTIAL:")
print(f"   Tagging Solution Revenue: ${governance_opportunity['estimated_governance_solution_revenue']:,.2f}")
print(f"   Showback/Chargeback Revenue: ${governance_opportunity['showback_chargeback_opportunity']:,.2f}")
print(f"   Total Governance Opportunity: ${governance_opportunity['estimated_governance_solution_revenue'] + governance_opportunity['showback_chargeback_opportunity']:,.2f}")

print("\n" + "="*80)
print("\n‚úÖ All tagging and governance analysis completed!")
print("üìä Review the dashboard above for visual insights.")
print("üìÅ Check output folder for exported CSV files.")
print("\nüí° NEXT STEPS:")
print("   1. Share governance targets CSV with account teams")
print("   2. Develop tagging policy enforcement plan")
print("   3. Propose cost allocation/showback implementation")
print("   4. Schedule governance training for low-compliance teams")
