# Table of Contents

1. [Business Presentation: Upselling Opportunity Analysis](#business-presentation-upselling-opportunity-analysis)
2. [Current Upselling Performance](#current-upselling-performance)
3. [Customer Portfolio Segmentation](#customer-portfolio-segmentation)
4. [Age Demographics](#age-demographics)
5. [Contract Lifecycle Analysis](#contract-lifecycle-analysis)
6. [Data Plan Analysis](#data-plan-analysis)
7. [Customer Value Segmentation: Premium vs Standard Customers](#customer-value-segmentation-premium-vs-standard-customers)
8. [Business Impact Projection: The ML Model Opportunity](#business-impact-projection-the-ml-model-opportunity)
9. [Strategic Recommendations](#strategic-recommendations)

---

# Business Presentation: Upselling Opportunity Analysis

This notebook presents key visualizations that demonstrate the business opportunity for implementing an ML-driven upselling model for Magenta's customer portfolio.

## Executive Summary

- **100,000 contracts** across **58,495 unique customers** in our analysis
- Currently only **7.05% of contracts** have been upsold
- Significant untapped opportunity exists across customer segments
- Multi-contract customers show higher upselling potential
- Strategic targeting based on contract lifecycle and customer behavior patterns

In [1]:
# Import required libraries
import pickle
from pathlib import Path
import polars as pl
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

FONT_FAMILY = 'Teleneo'

# Set Polars configuration for better display
_ = pl.Config.set_tbl_cols(None)
_ = pl.Config.set_fmt_str_lengths(500)
_ = pl.Config.set_fmt_float("full")

# Define Magenta brand color
MAGENTA_COLOR = 'rgb(226,0,116)'
MAGENTA_LIGHT = 'rgba(226,0,116,0.7)'
MAGENTA_DARK = 'rgb(180,0,93)'
GRAY_COLOR = 'rgb(128,128,128)'

# Update default color scheme for all plots
color_sequence = [MAGENTA_COLOR, GRAY_COLOR, MAGENTA_LIGHT, MAGENTA_DARK]
px.defaults.color_discrete_sequence = color_sequence

# Set default font for all plotly figures
px.defaults.template = "plotly_white"

In [None]:
# Define paths and load data function
base_dir = Path('/workspaces/data-scientist-at-magenta')
code_dir = base_dir / 'notebooks'
data_dir = code_dir / "data"
raw_dir = data_dir / "raw"
features_dir = data_dir / 'features'
train_dir = data_dir / 'train'
images_dir = code_dir / 'images'

def load_artifact(targ_file: str):
    targ_path = raw_dir / targ_file
    
    if not targ_path.exists():
        raise FileNotFoundError(f'Artifact {targ_file} not found in {raw_dir}')

    with open(targ_path, 'rb') as fp:
        test_artifact = pickle.load(fp)

    return pl.from_pandas(test_artifact)

In [3]:
# Load and prepare core data
core_data = load_artifact('core_data')

# Convert boolean columns
core_data = core_data.with_columns(
    pl.col('has_done_upselling').cast(pl.Boolean),
    pl.col('has_special_offer').cast(pl.Boolean),
    pl.col('is_magenta1_customer').cast(pl.Boolean)
)

# Calculate derived metrics
core_data = core_data.with_columns(
    (pl.col('contract_lifetime_days') + pl.col('remaining_binding_days')).alias('contract_binding_days'),
    (pl.col('contract_lifetime_days') / (pl.col('contract_lifetime_days') + pl.col('remaining_binding_days'))).alias('completion_rate'),
    (pl.col('gross_mrc') / pl.col('available_gb')).alias('cost_per_gb')
)

print(f"Data loaded: {core_data.shape[0]:,} contracts from {core_data.select(pl.col('customer_id')).n_unique():,} unique customers")

Data loaded: 100,000 contracts from 58,495 unique customers


## 1. Current Upselling Performance

In [15]:
# Calculate upselling statistics
total_contracts = core_data.shape[0]
upsold_contracts = core_data.filter(pl.col('has_done_upselling')).shape[0]
not_upsold_contracts = total_contracts - upsold_contracts

# Create presentation-ready pie chart
fig = go.Figure(data=[go.Pie(
    labels=['Not Upsold', 'Upsold'],
    values=[not_upsold_contracts, upsold_contracts],
    hole=0,  # Changed from donut to full pie chart
    marker_colors=[GRAY_COLOR, MAGENTA_COLOR],
    textinfo='label+percent+value',
    textfont_size=18,
    textfont_color='white',
    textposition='auto',
    pull=[0, 0.1],  # Pull out the "Upsold" segment slightly for emphasis
    marker=dict(
        line=dict(color='white', width=3)  # Add white border for better separation
    )
)])

fig.update_layout(
    title={
        'text': 'Current Upselling Performance',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 24, 'family': FONT_FAMILY, 'color': 'black'}
    },
    font=dict(size=16, family=FONT_FAMILY, color='black'),
    showlegend=True,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.1,
        xanchor="center",
        x=0.5,
        font=dict(size=18)
    ),
    width=1200,  # High resolution for presentation
    height=800,
    plot_bgcolor='rgba(0,0,0,0)',  # Transparent plot background
    paper_bgcolor='rgba(0,0,0,0)',  # Transparent paper background
    margin=dict(t=100, b=80, l=50, r=50)  # Adjust margins for better spacing
)

# Display the chart
fig.show()

# Save the chart in multiple high-quality formats for presentation use
print("Saving chart in presentation-ready formats...")

# Save as PDF (high quality for printing)
fig.write_image(images_dir / "upselling_performance_chart.pdf", 
                width=1200, height=800, scale=2)

Saving chart in presentation-ready formats...


## 2. Customer Portfolio Segmentation

In [5]:
# Calculate customer portfolio analysis
customer_portfolio = (
    core_data
    .group_by('customer_id')
    .agg([
        pl.col('rating_account_id').count().alias('portfolio_size'),
        pl.col('gross_mrc').sum().alias('total_value'),
        pl.col('has_done_upselling').max().alias('any_upselling'),
        pl.col('has_done_upselling').sum().alias('count_upselling'),
        (pl.col('has_done_upselling').sum() / pl.col('rating_account_id').count() * 100).round(2).alias('upselling_rate')
    ])
)

# Create portfolio summary
portfolio_summary = (
    customer_portfolio
    .group_by('portfolio_size')
    .agg([
        pl.col('customer_id').n_unique().alias('total_customers'),
        (pl.col('any_upselling').mean() * 100).round(2).alias('customers_with_upselling_percent'),
        pl.col('total_value').mean().round(2).alias('avg_total_spend'),
        pl.col('upselling_rate').mean().round(2).alias('avg_upselling_rate')
    ])
    .sort('portfolio_size', descending=False)
    .filter(pl.col('portfolio_size') <= 8)  # Focus on main segments
)

# Create visualization
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        'Customer Distribution by Portfolio Size',
        'Average Revenue per Customer Segment',
        'Upselling Success Rate by Segment',
        'Customer Value vs Upselling Opportunity'
    ),
    specs=[[{"secondary_y": False}, {"secondary_y": False}],
           [{"secondary_y": False}, {"secondary_y": True}]]
)

# Customer distribution
fig.add_trace(
    go.Bar(
        x=portfolio_summary['portfolio_size'],
        y=portfolio_summary['total_customers'],
        name='Customers',
        marker_color=MAGENTA_COLOR,
        text=portfolio_summary['total_customers'],
        textposition='auto'
    ),
    row=1, col=1
)

# Average revenue per customer
fig.add_trace(
    go.Bar(
        x=portfolio_summary['portfolio_size'],
        y=portfolio_summary['avg_total_spend'],
        name='Avg Revenue (€)',
        marker_color=MAGENTA_LIGHT,
        text=[f'€{x:.0f}' for x in portfolio_summary['avg_total_spend']],
        textposition='auto'
    ),
    row=1, col=2
)

# Upselling success rate
fig.add_trace(
    go.Bar(
        x=portfolio_summary['portfolio_size'],
        y=portfolio_summary['customers_with_upselling_percent'],
        name='Upselling Success Rate (%)',
        marker_color=MAGENTA_DARK,
        text=[f'{x:.1f}%' for x in portfolio_summary['customers_with_upselling_percent']],
        textposition='auto'
    ),
    row=2, col=1
)

# Combined view: Value vs Opportunity
fig.add_trace(
    go.Scatter(
        x=portfolio_summary['avg_total_spend'],
        y=portfolio_summary['customers_with_upselling_percent'],
        mode='markers+text',
        marker=dict(
            size=portfolio_summary['total_customers'].to_numpy() / 1000,
            color=MAGENTA_COLOR,
            opacity=0.7,
            sizemode='area',
            sizeref=2. * max(portfolio_summary['total_customers']) / (40. ** 2),
            sizemin=10
        ),
        text=[f'{x} contracts' for x in portfolio_summary['portfolio_size']],
        textposition='middle center',
        name='Portfolio Segments',
        hovertemplate='<b>%{text}</b><br>' +
                     'Avg Revenue: €%{x:.0f}<br>' +
                     'Upselling Rate: %{y:.1f}%<br>' +
                     '<extra></extra>'
    ),
    row=2, col=2
)

# Update layout
fig.update_xaxes(title_text="Portfolio Size (Number of Contracts)", row=1, col=1)
fig.update_yaxes(title_text="Number of Customers", row=1, col=1)
fig.update_xaxes(title_text="Portfolio Size (Number of Contracts)", row=1, col=2)
fig.update_yaxes(title_text="Average Revenue (€)", row=1, col=2)
fig.update_xaxes(title_text="Portfolio Size (Number of Contracts)", row=2, col=1)
fig.update_yaxes(title_text="Upselling Success Rate (%)", row=2, col=1)
fig.update_xaxes(title_text="Average Revenue per Customer (€)", row=2, col=2)
fig.update_yaxes(title_text="Upselling Success Rate (%)", row=2, col=2)

fig.update_layout(
    title={
        'text': 'Customer Portfolio Analysis',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 18, 'family': FONT_FAMILY}
    },
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgb(250,250,250)',
    height=800,
    showlegend=False,
    font=dict(size=10, family=FONT_FAMILY)
)

fig.show()

In [31]:
# Calculate customer portfolio analysis
customer_portfolio = (
    core_data
    .group_by('customer_id')
    .agg([
        pl.col('rating_account_id').count().alias('portfolio_size'),
        pl.col('gross_mrc').sum().alias('total_value'),
        pl.col('has_done_upselling').max().alias('any_upselling'),
        pl.col('has_done_upselling').sum().alias('count_upselling'),
        (pl.col('has_done_upselling').sum() / pl.col('rating_account_id').count() * 100).round(2).alias('upselling_rate')
    ])
)

# Create portfolio summary
portfolio_summary = (
    customer_portfolio
    .group_by('portfolio_size')
    .agg([
        pl.col('customer_id').n_unique().alias('total_customers'),
        (pl.col('any_upselling').mean() * 100).round(2).alias('customers_with_upselling_percent'),
        pl.col('total_value').mean().round(2).alias('avg_total_spend'),
        pl.col('upselling_rate').mean().round(2).alias('avg_upselling_rate')
    ])
    .sort('portfolio_size', descending=False)
    .filter(pl.col('portfolio_size') <= 8)  # Focus on main segments
)

# Plot 1: Customer Distribution by Portfolio Size
fig1 = go.Figure()
fig1.add_trace(
    go.Bar(
        x=portfolio_summary['portfolio_size'],
        y=portfolio_summary['total_customers'],
        name='Customers',
        marker_color=MAGENTA_COLOR,
        text=portfolio_summary['total_customers'],
        textposition='auto',
        textfont=dict(size=16)
    )
)

fig1.update_layout(
    title={
        'text': 'Customer Distribution by Portfolio Size',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 22, 'family': FONT_FAMILY}
    },
    xaxis_title="Portfolio Size (Number of Contracts)",
    yaxis_title="Number of Customers",
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgb(250,250,250)',
    height=600,
    width=1400,
    showlegend=False,
    font=dict(size=16, family=FONT_FAMILY),
    xaxis=dict(title_font=dict(size=18)),
    yaxis=dict(title_font=dict(size=18))
)
fig1.show()

# Save as PDF (high quality for printing)
fig1.write_image(images_dir / "portfolio_size.pdf", 
                width=1400, height=600, scale=2)

# Plot 2: Average Revenue per Customer Segment
fig2 = go.Figure()
fig2.add_trace(
    go.Bar(
        x=portfolio_summary['portfolio_size'],
        y=portfolio_summary['avg_total_spend'],
        name='Avg Revenue (€)',
        marker_color=MAGENTA_LIGHT,
        text=[f'€{x:.0f}' for x in portfolio_summary['avg_total_spend']],
        textposition='auto',
        textfont=dict(size=16)
    )
)

fig2.update_layout(
    title={
        'text': 'Average Revenue per Customer Segment',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 22, 'family': FONT_FAMILY}
    },
    xaxis_title="Portfolio Size (Number of Contracts)",
    yaxis_title="Average Revenue (€)",
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgb(250,250,250)',
    height=600,
    width=1400,
    showlegend=False,
    font=dict(size=16, family=FONT_FAMILY),
    xaxis=dict(title_font=dict(size=18)),
    yaxis=dict(title_font=dict(size=18))
)
fig2.show()

# Save as PDF (high quality for printing)
fig2.write_image(images_dir / "portfolio_size_revenue_segments.pdf", 
                width=1400, height=600, scale=2)

# Plot 3: Upselling Success Rate by Segment
fig3 = go.Figure()
fig3.add_trace(
    go.Bar(
        x=portfolio_summary['portfolio_size'],
        y=portfolio_summary['customers_with_upselling_percent'],
        name='Upselling Success Rate (%)',
        marker_color=MAGENTA_DARK,
        text=[f'{x:.1f}%' for x in portfolio_summary['customers_with_upselling_percent']],
        textposition='auto',
        textfont=dict(size=16)
    )
)

fig3.update_layout(
    title={
        'text': 'Upselling Success Rate by Segment',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 22, 'family': FONT_FAMILY}
    },
    xaxis_title="Portfolio Size (Number of Contracts)",
    yaxis_title="Upselling Success Rate (%)",
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgb(250,250,250)',
    height=600,
    width=1400,
    showlegend=False,
    font=dict(size=16, family=FONT_FAMILY),
    xaxis=dict(title_font=dict(size=18)),
    yaxis=dict(title_font=dict(size=18))
)
fig3.show()

# Save as PDF (high quality for printing)
fig3.write_image(images_dir / "portfolio_size_upselling_rate.pdf", 
                width=1400, height=600, scale=2)

# Plot 4: Customer Value vs Upselling Opportunity (Bubble Chart)
fig4 = go.Figure()
fig4.add_trace(
    go.Scatter(
        x=portfolio_summary['avg_total_spend'],
        y=portfolio_summary['customers_with_upselling_percent'],
        mode='markers+text',
        marker=dict(
            size=portfolio_summary['total_customers'].to_numpy() / 30,  # Larger bubbles
            color=MAGENTA_COLOR,
            opacity=0.7,
            sizemode='area',
            sizeref=2. * max(portfolio_summary['total_customers']) / (50. ** 2),
            sizemin=20,
            line=dict(width=3, color='white')
        ),
        text=[f'{x} contracts' for x in portfolio_summary['portfolio_size']],
        textposition='top center',
        textfont=dict(color='black', size=18, family=FONT_FAMILY),
        name='Portfolio Segments',
        hovertemplate='<b>%{text}</b><br>' +
                     'Avg Revenue: €%{x:.0f}<br>' +
                     'Upselling Rate: %{y:.1f}%<br>' +
                     'Customers: %{customdata}<br>' +
                     '<extra></extra>',
        customdata=portfolio_summary['total_customers']
    )
)

fig4.update_layout(
    title={
        'text': 'Customer Value vs Upselling Opportunity<br><sub>Bubble size represents number of customers in segment</sub>',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 22, 'family': FONT_FAMILY}
    },
    xaxis_title="Average Revenue per Customer (€)",
    yaxis_title="Upselling Success Rate (%)",
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgb(250,250,250)',
    height=700,
    width=1400,
    showlegend=False,
    font=dict(size=16, family=FONT_FAMILY),
    xaxis=dict(title_font=dict(size=18)),
    yaxis=dict(title_font=dict(size=18)),
    annotations=[
        dict(
            x=0.02,
            y=0.98,
            xref='paper',
            yref='paper',
            text='<b>Quadrant Analysis:</b><br>' +
                 'Top Right: High Value + High Upselling<br>' +
                 'Top Left: Low Value + High Upselling<br>' +
                 'Bottom Right: High Value + Low Upselling<br>' +
                 'Bottom Left: Low Value + Low Upselling',
            showarrow=False,
            font=dict(size=14, color=GRAY_COLOR, family=FONT_FAMILY),
            align='left',
            bgcolor='rgba(255,255,255,0.9)',
            bordercolor=GRAY_COLOR,
            borderwidth=1,
            borderpad=10
        )
    ]
)
fig4.show()

# Save as PDF (high quality for printing)
fig4.write_image(images_dir / "portfolio_size_opportunity.pdf", 
                width=1400, height=700, scale=2)

print("\nPortfolio Analysis Summary:")
print("=" * 50)
total_customers = portfolio_summary['total_customers'].sum()
print(f"Total customers analyzed: {total_customers:,}")
print(f"Average portfolio size: {(portfolio_summary['portfolio_size'] * portfolio_summary['total_customers']).sum() / total_customers:.1f} contracts")
print(f"Overall upselling rate: {(portfolio_summary['customers_with_upselling_percent'] * portfolio_summary['total_customers']).sum() / total_customers:.1f}%")
print(f"Average customer value: €{(portfolio_summary['avg_total_spend'] * portfolio_summary['total_customers']).sum() / total_customers:.0f}")


Portfolio Analysis Summary:
Total customers analyzed: 58,495
Average portfolio size: 1.7 contracts
Overall upselling rate: 11.5%
Average customer value: €64


## 3. Age Demographics

In [32]:
# Create age bins for analysis
bins = [18, 25, 35, 45, 55, 65, 75, 85, 95, 100]
labels = ["18-24", "25-34", "35-44", "45-54", "55-64", "65-74", "75-84", "85-94", "95-100"]

# Create age bins
binned_age_data = core_data.with_columns(
    pl.when(pl.col("age") < 18).then(pl.lit('18-'))
    .otherwise(pl.lit('+100'))
    .alias("age_bin")
)

for i in range(len(bins)-1):
    binned_age_data = binned_age_data.with_columns(
        pl.when(pl.col("age").is_between(bins[i], bins[i+1], closed="left")).then(pl.lit(labels[i]))
        .otherwise(pl.col('age_bin'))
        .alias("age_bin")
    )

# Group by age bin and calculate metrics
age_analysis = binned_age_data.group_by("age_bin").agg([
    pl.len().alias("total_contracts"),
    (pl.col("has_done_upselling") == True).sum().alias("upsold_contracts"),
    (pl.col("has_done_upselling") == False).sum().alias("not_upsold_contracts"),
    (pl.col("has_done_upselling").mean() * 100).round(2).alias("upselling_rate"),
    pl.col("gross_mrc").mean().round(2).alias("avg_revenue")
]).sort('age_bin')

# Remove outlier bins
age_analysis = age_analysis.filter(~pl.col('age_bin').is_in(['18-', '+100']))

# Create stacked bar chart
fig = go.Figure()

fig.add_trace(go.Bar(
    name='Not Upsold',
    x=age_analysis['age_bin'],
    y=age_analysis['not_upsold_contracts'],
    marker_color=GRAY_COLOR,
    text=[f'{x:,}' for x in age_analysis['not_upsold_contracts']],
    textposition='inside',
    textfont=dict(size=16)
))

fig.add_trace(go.Bar(
    name='Upsold',
    x=age_analysis['age_bin'],
    y=age_analysis['upsold_contracts'],
    marker_color=MAGENTA_COLOR,
    text=[f'{x:,}' for x in age_analysis['upsold_contracts']],
    textposition='inside',
    textfont=dict(size=16)
))

# Add upselling rate as line on secondary y-axis
fig.add_trace(go.Scatter(
    x=age_analysis['age_bin'],
    y=age_analysis['upselling_rate'],
    mode='lines+markers',
    name='Upselling Rate (%)',
    line=dict(color=MAGENTA_DARK, width=3),
    marker=dict(size=8, color=MAGENTA_DARK),
    text=[f'{x:.1f}%' for x in age_analysis['upselling_rate']],
    textposition='top center',
    textfont=dict(size=16),
    yaxis='y2'
))

fig.update_layout(
    title={
        'text': 'Age Demographics: Upselling Patterns Across Customer Segments',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 22, 'family': FONT_FAMILY}
    },
    xaxis_title='Age Groups',
    yaxis_title='Number of Contracts',
    yaxis2=dict(
        title='Upselling Rate (%)',
        overlaying='y',
        side='right',
        range=[0, max(age_analysis['upselling_rate']) * 1.2],
        title_font=dict(size=18)
    ),
    barmode='stack',
    height=600,
    width=1400,
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgb(250,250,250)',
    font=dict(family=FONT_FAMILY, size=16),
    xaxis=dict(title_font=dict(size=18)),
    yaxis=dict(title_font=dict(size=18)),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1,
        font=dict(size=16)
    )
)

fig.show()
# Save as PDF (high quality for printing)
fig.write_image(images_dir / "age.pdf", 
                width=1400, height=600, scale=2)

## 4. Contract Lifecycle Analysis

In [35]:
# Create contract lifetime bins (in days, then convert to months for display)
lifetime_bins = list(range(0, 1860, 200))
lifetime_labels = [f"{lifetime_bins[i]//30}-{(lifetime_bins[i+1]-1)//30}m" for i in range(len(lifetime_bins)-1)]

# Create lifetime analysis
binned_lifetime_data = core_data.with_columns(
    pl.when(pl.col("contract_lifetime_days") < 0).then(pl.lit('0-'))
    .otherwise(pl.lit('1860+'))
    .alias("lifetime_bin")
)

for i in range(len(lifetime_bins)-1):
    binned_lifetime_data = binned_lifetime_data.with_columns(
        pl.when(pl.col("contract_lifetime_days").is_between(lifetime_bins[i], lifetime_bins[i+1], closed="left")).then(pl.lit(lifetime_labels[i]))
        .otherwise(pl.col('lifetime_bin'))
        .alias("lifetime_bin")
    )

# Calculate lifecycle metrics and sort properly
lifecycle_analysis = binned_lifetime_data.group_by("lifetime_bin").agg([
    pl.len().alias("total_contracts"),
    (pl.col("has_done_upselling").mean() * 100).round(2).alias("upselling_rate"),
    pl.col("gross_mrc").mean().round(2).alias("avg_revenue"),
    pl.col("completion_rate").mean().round(2).alias("avg_completion_rate")
])

# Remove outlier bins
lifecycle_analysis = lifecycle_analysis.filter(~pl.col('lifetime_bin').is_in(['0-', '1860+']))

# Create proper sorting order
sort_order = [f"{lifetime_bins[i]//30}-{(lifetime_bins[i+1]-1)//30}m" for i in range(len(lifetime_bins)-1)]

# Sort by converting to a numeric representation for proper ordering
def get_sort_key(bin_label):
    if bin_label in ['0-', '1860+']:
        return -1 if bin_label == '0-' else 999
    # Extract the first number from the label (e.g., "0-6m" -> 0)
    return int(bin_label.split('-')[0])

lifecycle_analysis = lifecycle_analysis.with_columns(
    pl.col("lifetime_bin").map_elements(lambda x: get_sort_key(x), return_dtype=pl.Int32).alias("sort_key")
).sort("sort_key").drop("sort_key")

# Create combined visualization
fig = make_subplots(
    rows=2, cols=1,
    subplot_titles=(
        'Contract Distribution by Lifetime (Months)',
        'Upselling Success Rate vs Contract Maturity'
    ),
    vertical_spacing=0.2,
    specs=[[{"secondary_y": False}], [{"secondary_y": True}]]
)

# Contract distribution
fig.add_trace(
    go.Bar(
        x=lifecycle_analysis['lifetime_bin'],
        y=lifecycle_analysis['total_contracts'],
        name='Contract Count',
        marker_color=MAGENTA_COLOR,
        text=[f'{x:,}' for x in lifecycle_analysis['total_contracts']],
        textposition='auto',
        textfont=dict(size=16)
    ),
    row=1, col=1
)

# Upselling rate
fig.add_trace(
    go.Scatter(
        x=lifecycle_analysis['lifetime_bin'],
        y=lifecycle_analysis['upselling_rate'],
        mode='lines+markers',
        name='Upselling Rate (%)',
        line=dict(color=MAGENTA_COLOR, width=3),
        marker=dict(size=8, color=MAGENTA_COLOR),
        text=[f'{x:.1f}%' for x in lifecycle_analysis['upselling_rate']],
        textposition='top center',
        textfont=dict(size=16)
    ),
    row=2, col=1
)

# Completion rate on secondary y-axis
fig.add_trace(
    go.Scatter(
        x=lifecycle_analysis['lifetime_bin'],
        y=lifecycle_analysis['avg_completion_rate'] * 100,
        mode='lines+markers',
        name='Avg Completion Rate (%)',
        line=dict(color=MAGENTA_LIGHT, width=2, dash='dash'),
        marker=dict(size=6, color=MAGENTA_LIGHT),
    ),
    row=2, col=1,
    secondary_y=True
)

fig.update_xaxes(title_text="Contract Lifetime (Months)", row=1, col=1, title_font=dict(size=18))
fig.update_yaxes(title_text="Number of Contracts", row=1, col=1, title_font=dict(size=18))
fig.update_xaxes(title_text="Contract Lifetime (Months)", row=2, col=1, title_font=dict(size=18))
fig.update_yaxes(title_text="Upselling Rate (%)", row=2, col=1, title_font=dict(size=18))

fig.update_yaxes(title_text="Average Completion Rate (%)", secondary_y=True, row=2, col=1, title_font=dict(size=18))

fig.update_layout(
    title={
        'text': 'Contract Lifecycle Analysis',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 22, 'family': FONT_FAMILY}
    },
    height=800,
    width=1400,
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgb(250,250,250)',
    showlegend=True,
    font=dict(family=FONT_FAMILY, size=16),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1,
        font=dict(size=16)
    )
)

fig.show()
# Save as PDF (high quality for printing)
fig.write_image(images_dir / "contract_lifetime.pdf", 
                width=1400, height=800, scale=2)

## 5. Data Plan Analysis

In [40]:
## Define revenue bins and labels for segmentation
revenue_bins = [0, 20, 35, 50, 100]
revenue_labels = [
    'Budget (€0-20)',
    'Standard (€20-35)',
    'Premium (€35-50)',
    'Enterprise (€50+)'
]

## Create revenue segmentation with clear logic
binned_revenue_data = core_data.with_columns(
    pl.when(pl.col("gross_mrc") < 0).then(pl.lit('Budget (€0-20)'))
    .otherwise(pl.lit('Enterprise (€50+)'))
    .alias("revenue_segment")
)

for i in range(len(revenue_bins) - 1):
    binned_revenue_data = binned_revenue_data.with_columns(
        pl.when(
            pl.col("gross_mrc").is_between(
                revenue_bins[i], revenue_bins[i + 1], closed="left"
            )
        )
        .then(pl.lit(revenue_labels[i]))
        .otherwise(pl.col('revenue_segment'))
        .alias("revenue_segment")
    )

## Aggregate data for segment analysis
segment_analysis = binned_revenue_data.group_by("revenue_segment").agg([
    pl.len().alias('total_contracts'),
    (pl.col('has_done_upselling').mean() * 100).round(2).alias('upselling_rate'),
    pl.col('gross_mrc').mean().round(2).alias('avg_revenue'),
    pl.col('available_gb').mean().round(2).alias('avg_data_gb'),
    (pl.col('is_magenta1_customer').mean() * 100).round(2).alias('magenta1_rate'),
    (pl.col('has_special_offer').mean() * 100).round(2).alias('special_offer_rate')
]).sort('avg_revenue')

## Visualization: Bubble chart setup with increased font sizes
fig = go.Figure()

segment_colors = ['#95A5A6', '#E74C3C', '#E91E63', '#8E24AA']

marker_sizes = [
    max(30, min(100, contracts / 200))
    for contracts in segment_analysis['total_contracts']
]

fig.add_trace(go.Scatter(
    x=segment_analysis['avg_revenue'],
    y=segment_analysis['upselling_rate'],
    mode='markers+text',
    marker=dict(
        size=marker_sizes,
        color=segment_colors,
        opacity=0.8,
        sizemode='diameter',
        line=dict(width=3, color='white')
    ),
    text=[
        f"{segment.split('(')[0].strip()}<br>{contracts:,} contracts"
        for segment, contracts in zip(
            segment_analysis['revenue_segment'], segment_analysis['total_contracts']
        )
    ],
    textposition='middle center',
    textfont=dict(color='black', size=16, family=FONT_FAMILY, weight='bold'),  # Increased font size
    hovertemplate=(
        '<b>%{customdata[0]}</b><br>'
        'Avg Revenue: €%{x:.0f}<br>'
        'Upselling Rate: %{y:.1f}%<br>'
        'Total Contracts: %{customdata[1]:,}<br>'
        'Avg Data: %{customdata[2]:.0f} GB<br>'
        'Magenta1 Rate: %{customdata[3]:.1f}%<br>'
        'Special Offers: %{customdata[4]:.1f}%<br>'
        '<extra></extra>'
    ),
    customdata=list(zip(
        segment_analysis['revenue_segment'],
        segment_analysis['total_contracts'],
        segment_analysis['avg_data_gb'],
        segment_analysis['magenta1_rate'],
        segment_analysis['special_offer_rate']
    )),
    name='Customer Segments'
))

## Add quadrant reference lines
fig.add_hline(
    y=segment_analysis['upselling_rate'].mean(),
    line_dash="dash",
    line_color="gray",
    opacity=0.5,
    annotation_text="Avg Upselling Rate",
    annotation_position="bottom right"
)
fig.add_vline(
    x=segment_analysis['avg_revenue'].mean(),
    line_dash="dash",
    line_color="gray",
    opacity=0.5,
    annotation_text="Avg Revenue",
    annotation_position="top left"
)

## Layout improvements with font size increased to 16
fig.update_layout(
    title={
        'text': 'Customer Value Segmentation Analysis<br><sub>Bubble size represents number of contracts</sub>',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 24, 'family': FONT_FAMILY, 'color': '#2C3E50'}  # Larger title font size
    },
    xaxis_title='Average Monthly Revenue (€)',
    yaxis_title='Upselling Success Rate (%)',
    height=700,
    width=1200,
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='white',
    showlegend=False,
    font=dict(family=FONT_FAMILY, size=16, color='#2C3E50'),  # Increase axis and tick font size
    xaxis=dict(
        gridcolor='lightgray',
        gridwidth=1,
        zeroline=False,
        showline=True,
        linewidth=2,
        linecolor='#2C3E50',
        tickfont=dict(size=16)
    ),
    yaxis=dict(
        gridcolor='lightgray',
        gridwidth=1,
        zeroline=False,
        showline=True,
        linewidth=2,
        linecolor='#2C3E50',
        tickfont=dict(size=16)
    ),
    annotations=[
        dict(
            x=0.9,
            y=0.9,
            xref='paper',
            yref='paper',
            text=(
                '<b>Quadrant Analysis:</b><br>'
                '• Top Right: High Value, High Upselling<br>'
                '• Top Left: Low Value, High Upselling<br>'
                '• Bottom Right: High Value, Low Upselling<br>'
                '• Bottom Left: Low Value, Low Upselling'
            ),
            showarrow=False,
            font=dict(size=16, color='#2C3E50', family=FONT_FAMILY),
            align='right',
            bgcolor='rgba(255,255,255,0.9)',
            bordercolor='#BDC3C7',
            borderwidth=1,
            borderpad=10
        )
    ]
)

fig.show()

fig.write_image(images_dir / "customer_value_segmentation.pdf", width=1400, height=900, scale=2)

## Summary Table Output with larger font in print statements if needed (depends on console)
print("Analysis\n")

total_contracts = core_data.shape[0]
segment_data = segment_analysis.rows()

for row in segment_data:
    segment, contracts, upselling_rate, avg_revenue, avg_data, magenta1_rate, special_offer_rate = row
    print(f"{segment}:")
    print(f"  • {contracts:,} contracts ({contracts / total_contracts * 100:.1f}% of portfolio)")
    print(f"  • Upselling Rate: {upselling_rate:.1f}%")
    print(f"  • Average Revenue: €{avg_revenue:.0f}")
    print(f"  • Average Data Usage: {avg_data:.0f} GB\n")

print("Statistics:")
print("=" * 60)

best_segment = max(segment_data, key=lambda x: x[3] * (x[2] / 100))
print(f"• Best Segment: {best_segment[0]} (€{best_segment[3]:.0f} avg revenue, {best_segment[2]:.1f}% upselling)")

biggest_opportunity = max(segment_data, key=lambda x: x[1])
print(f"• Largest Segment: {biggest_opportunity[0]} ({biggest_opportunity[1]:,} contracts)")

print("=" * 60)

Analysis

Budget (€0-20):
  • 23,813 contracts (23.8% of portfolio)
  • Upselling Rate: 7.9%
  • Average Revenue: €12
  • Average Data Usage: 25 GB

Standard (€20-35):
  • 22,196 contracts (22.2% of portfolio)
  • Upselling Rate: 7.9%
  • Average Revenue: €28
  • Average Data Usage: 25 GB

Premium (€35-50):
  • 21,894 contracts (21.9% of portfolio)
  • Upselling Rate: 6.2%
  • Average Revenue: €42
  • Average Data Usage: 25 GB

Enterprise (€50+):
  • 32,097 contracts (32.1% of portfolio)
  • Upselling Rate: 6.4%
  • Average Revenue: €60
  • Average Data Usage: 25 GB

Statistics:
• Best Segment: Enterprise (€50+) (€60 avg revenue, 6.4% upselling)
• Largest Segment: Enterprise (€50+) (32,097 contracts)


## 6. Customer Value Segmentation: Premium vs Standard Customers

Understanding customer value distribution helps prioritize upselling efforts and resource allocation.

In [38]:
## Define revenue bins and labels for segmentation
revenue_bins = [0, 20, 35, 50, 100]
revenue_labels = [
    'Budget (€0-20)',
    'Standard (€20-35)',
    'Premium (€35-50)',
    'Enterprise (€50+)'
]

## Create revenue segmentation with clear logic
binned_revenue_data = core_data.with_columns(
    # Start by labeling negative values as 'Budget' and others as 'Enterprise'
    pl.when(pl.col("gross_mrc") < 0).then(pl.lit('Budget (€0-20)'))
    .otherwise(pl.lit('Enterprise (€50+)'))
    .alias("revenue_segment")
)

# Update revenue_segment for bins between defined ranges
for i in range(len(revenue_bins) - 1):
    binned_revenue_data = binned_revenue_data.with_columns(
        pl.when(
            pl.col("gross_mrc").is_between(
                revenue_bins[i], revenue_bins[i + 1], closed="left"
            )
        )
        .then(pl.lit(revenue_labels[i]))
        .otherwise(pl.col('revenue_segment'))
        .alias("revenue_segment")
    )

## Aggregate data for segment analysis
segment_analysis = binned_revenue_data.group_by("revenue_segment").agg([
    pl.len().alias('total_contracts'),
    (pl.col('has_done_upselling').mean() * 100).round(2).alias('upselling_rate'),
    pl.col('gross_mrc').mean().round(2).alias('avg_revenue'),
    pl.col('available_gb').mean().round(2).alias('avg_data_gb'),
    (pl.col('is_magenta1_customer').mean() * 100).round(2).alias('magenta1_rate'),
    (pl.col('has_special_offer').mean() * 100).round(2).alias('special_offer_rate')
]).sort('avg_revenue')

## Visualization: Bubble chart setup with better readability and size scaling
fig = go.Figure()

segment_colors = ['#95A5A6', '#E74C3C', '#E91E63', '#8E24AA']  # Gray, Red, Pink, Purple

# Compute marker sizes with better scaling (min 30, max 100)
marker_sizes = [
    max(30, min(100, contracts / 200))
    for contracts in segment_analysis['total_contracts']
]

fig.add_trace(go.Scatter(
    x=segment_analysis['avg_revenue'],
    y=segment_analysis['upselling_rate'],
    mode='markers+text',
    marker=dict(
        size=marker_sizes,
        color=segment_colors,
        opacity=0.8,
        sizemode='diameter',
        line=dict(width=3, color='white')
    ),
    text=[
        f"{segment.split('(')[0].strip()}<br>{contracts:,} contracts"
        for segment, contracts in zip(
            segment_analysis['revenue_segment'], segment_analysis['total_contracts']
        )
    ],
    textposition='middle center',
    textfont=dict(color='black', size=14, family=FONT_FAMILY, weight='bold'),
    hovertemplate=(
        '<b>%{customdata[0]}</b><br>'
        'Avg Revenue: €%{x:.0f}<br>'
        'Upselling Rate: %{y:.1f}%<br>'
        'Total Contracts: %{customdata[1]:,}<br>'
        'Avg Data: %{customdata[2]:.0f} GB<br>'
        'Magenta1 Rate: %{customdata[3]:.1f}%<br>'
        'Special Offers: %{customdata[4]:.1f}%<br>'
        '<extra></extra>'
    ),
    customdata=list(zip(
        segment_analysis['revenue_segment'],
        segment_analysis['total_contracts'],
        segment_analysis['avg_data_gb'],
        segment_analysis['magenta1_rate'],
        segment_analysis['special_offer_rate']
    )),
    name='Customer Segments'
))

## Add quadrant reference lines
fig.add_hline(
    y=segment_analysis['upselling_rate'].mean(),
    line_dash="dash",
    line_color="gray",
    opacity=0.5,
    annotation_text="Avg Upselling Rate",
    annotation_position="bottom right"
)
fig.add_vline(
    x=segment_analysis['avg_revenue'].mean(),
    line_dash="dash",
    line_color="gray",
    opacity=0.5,
    annotation_text="Avg Revenue",
    annotation_position="top left"
)

## Layout improvements for better readability and wider chart
fig.update_layout(
    title={
        'text': 'Customer Value Segmentation Analysis<br><sub>Bubble size represents number of contracts</sub>',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 20, 'family': FONT_FAMILY, 'color': '#2C3E50'}
    },
    xaxis_title='Average Monthly Revenue (€)',
    yaxis_title='Upselling Success Rate (%)',
    height=700,
    width=1200,  # Widened width for better spacing
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='white',
    showlegend=False,
    font=dict(family=FONT_FAMILY, size=12, color='#2C3E50'),
    xaxis=dict(
        gridcolor='lightgray',
        gridwidth=1,
        zeroline=False,
        showline=True,
        linewidth=2,
        linecolor='#2C3E50'
    ),
    yaxis=dict(
        gridcolor='lightgray',
        gridwidth=1,
        zeroline=False,
        showline=True,
        linewidth=2,
        linecolor='#2C3E50'
    ),
    annotations=[
        dict(
            x=0.9,
            y=0.9,
            xref='paper',
            yref='paper',
            text=(
                '<b>Quadrant Analysis:</b><br>'
                '• Top Right: High Value, High Upselling<br>'
                '• Top Left: Low Value, High Upselling<br>'
                '• Bottom Right: High Value, Low Upselling<br>'
                '• Bottom Left: Low Value, Low Upselling'
            ),
            showarrow=False,
            font=dict(size=16, color='#2C3E50', family=FONT_FAMILY),
            align='right',
            bgcolor='rgba(255,255,255,0.9)',
            bordercolor='#BDC3C7',
            borderwidth=1,
            borderpad=10
        )
    ]
)

fig.show()

# Save the chart with larger width and height for clarity
fig.write_image(images_dir / "customer_value_segmentation.pdf", width=1400, height=900, scale=2)


## Summary Table Output (clean and readable)
print("Analysis\n")

total_contracts = core_data.shape[0]
segment_data = segment_analysis.rows()

for row in segment_data:
    segment, contracts, upselling_rate, avg_revenue, avg_data, magenta1_rate, special_offer_rate = row
    print(f"{segment}:")
    print(f"  • {contracts:,} contracts ({contracts / total_contracts * 100:.1f}% of portfolio)")
    print(f"  • Upselling Rate: {upselling_rate:.1f}%")
    print(f"  • Average Revenue: €{avg_revenue:.0f}")
    print(f"  • Average Data Usage: {avg_data:.0f} GB\n")

print("Statistics:")
print("=" * 60)

best_segment = max(segment_data, key=lambda x: x[3] * (x[2] / 100))
print(f"• Best Segment: {best_segment[0]} (€{best_segment[3]:.0f} avg revenue, {best_segment[2]:.1f}% upselling)")

biggest_opportunity = max(segment_data, key=lambda x: x[1])
print(f"• Largest Segment: {biggest_opportunity[0]} ({biggest_opportunity[1]:,} contracts)")

print("=" * 60)

Analysis

Budget (€0-20):
  • 23,813 contracts (23.8% of portfolio)
  • Upselling Rate: 7.9%
  • Average Revenue: €12
  • Average Data Usage: 25 GB

Standard (€20-35):
  • 22,196 contracts (22.2% of portfolio)
  • Upselling Rate: 7.9%
  • Average Revenue: €28
  • Average Data Usage: 25 GB

Premium (€35-50):
  • 21,894 contracts (21.9% of portfolio)
  • Upselling Rate: 6.2%
  • Average Revenue: €42
  • Average Data Usage: 25 GB

Enterprise (€50+):
  • 32,097 contracts (32.1% of portfolio)
  • Upselling Rate: 6.4%
  • Average Revenue: €60
  • Average Data Usage: 25 GB

Statistics:
• Best Segment: Enterprise (€50+) (€60 avg revenue, 6.4% upselling)
• Largest Segment: Enterprise (€50+) (32,097 contracts)


## 7. Business Impact Projection: The ML Model Opportunity

Quantifying the potential business impact of implementing an ML-driven upselling model.

In [47]:
# Calculate current state metrics
current_upselling_rate = (upsold_contracts / total_contracts) * 100
current_monthly_revenue = core_data.select(pl.col('gross_mrc').sum()).item()
avg_revenue_per_contract = core_data.select(pl.col('gross_mrc').mean()).item()
avg_upselling_revenue = core_data.filter(pl.col('has_done_upselling')).select(pl.col('gross_mrc').mean()).item()
avg_non_upselling_revenue = core_data.filter(~pl.col('has_done_upselling')).select(pl.col('gross_mrc').mean()).item()
revenue_uplift_per_upsell = abs(avg_upselling_revenue - avg_non_upselling_revenue)  # absolute difference

# Define improvement scenarios
scenarios = [
    {'name': 'Conservative', 'improvement': 1.5, 'description': '50% improvement in upselling rate'},
    {'name': 'Moderate', 'improvement': 2.0, 'description': '100% improvement in upselling rate'},
    {'name': 'Aggressive', 'improvement': 3.0, 'description': '200% improvement in upselling rate'}
]

# Calculate projections for each scenario
projections = []
for scenario in scenarios:
    new_upselling_rate = min(current_upselling_rate * scenario['improvement'], 25)  # cap at 25%
    additional_upsells = (new_upselling_rate - current_upselling_rate) / 100 * total_contracts
    monthly_revenue_increase = abs(additional_upsells * revenue_uplift_per_upsell)
    annual_revenue_increase = abs(monthly_revenue_increase * 12)
    
    projections.append({
        'scenario': scenario['name'],
        'description': scenario['description'],
        'new_upselling_rate': new_upselling_rate,
        'additional_upsells': abs(additional_upsells),
        'monthly_increase': monthly_revenue_increase,
        'annual_increase': annual_revenue_increase
    })

# Visualization setup: 2x2 subplots
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        'Projected Upselling Rate Improvement',
        'Monthly Revenue Impact',
        'Annual Revenue Projection',
        'ROI Potential'
    )
)

scenario_names = [p['scenario'] for p in projections]
colors = [MAGENTA_LIGHT, MAGENTA_COLOR, MAGENTA_DARK]

# Current upselling rate bars
fig.add_trace(
    go.Bar(
        x=scenario_names,
        y=[current_upselling_rate] * 3,
        name='Current Rate',
        marker_color=GRAY_COLOR,
        text=[f'{current_upselling_rate:.1f}%'] * 3,
        textposition='auto',
        textfont=dict(size=16, family=FONT_FAMILY)  # Increase font size
    ),
    row=1, col=1
)

# Projected upselling rate bars
fig.add_trace(
    go.Bar(
        x=scenario_names,
        y=[p['new_upselling_rate'] for p in projections],
        name='Projected Rate',
        marker_color=colors,
        text=[f'{p["new_upselling_rate"]:.1f}%' for p in projections],
        textposition='auto',
        textfont=dict(size=16, family=FONT_FAMILY)
    ),
    row=1, col=1
)

# Monthly revenue increase bars
fig.add_trace(
    go.Bar(
        x=scenario_names,
        y=[p['monthly_increase'] for p in projections],
        name='Monthly Revenue Increase',
        marker_color=colors,
        text=[f'€{p["monthly_increase"]:,.0f}' for p in projections],
        textposition='auto',
        textfont=dict(size=16, family=FONT_FAMILY)
    ),
    row=1, col=2
)

# Annual revenue increase bars
fig.add_trace(
    go.Bar(
        x=scenario_names,
        y=[p['annual_increase'] for p in projections],
        name='Annual Revenue Increase',
        marker_color=colors,
        text=[f'€{p["annual_increase"]/1_000_000:.1f}M' for p in projections],
        textposition='auto',
        textfont=dict(size=16, family=FONT_FAMILY)
    ),
    row=2, col=1
)

# ROI calculation and bars (assuming 15% implementation cost)
implementation_cost = 500_000
roi_values = [
    abs((p['annual_increase'] - implementation_cost) / implementation_cost * 100)
    for p in projections
]

fig.add_trace(
    go.Bar(
        x=scenario_names,
        y=roi_values,
        name='ROI (%)',
        marker_color=colors,
        text=[f'{roi:.0f}%' for roi in roi_values],
        textposition='auto',
        textfont=dict(size=16, family=FONT_FAMILY)
    ),
    row=2, col=2
)

# Update axes titles with larger font size
fig.update_yaxes(title_text="Upselling Rate (%)", row=1, col=1, title_font=dict(size=16, family=FONT_FAMILY))
fig.update_yaxes(title_text="Monthly Revenue (€)", row=1, col=2, title_font=dict(size=16, family=FONT_FAMILY))
fig.update_yaxes(title_text="Annual Revenue (€)", row=2, col=1, title_font=dict(size=16, family=FONT_FAMILY))
fig.update_yaxes(title_text="ROI (%)", row=2, col=2, title_font=dict(size=16, family=FONT_FAMILY))

# Update overall layout with increased font sizes
fig.update_layout(
    title={
        'text': 'Business Impact Projection',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 24, 'family': FONT_FAMILY}
    },
    height=800,
    width=1600,
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgb(250,250,250)',
    showlegend=False,
    font=dict(family=FONT_FAMILY, size=16),
)

fig.show()

fig.write_image(images_dir / "business_impact.pdf", width=2000, height=800, scale=2)

# Business case summary printout
print("\nBusiness Case Summary:\n")
print(f"Current State:")
print(f"  • {current_upselling_rate:.1f}% upselling rate")
print(f"  • €{abs(current_monthly_revenue):,.0f} monthly revenue")
print(f"  • €{abs(avg_revenue_per_contract):,.0f} average revenue per contract")
print(f"  • €{abs(revenue_uplift_per_upsell):,.0f} revenue uplift per upsell\n")

print("ML Model Opportunity:")
for p in projections:
    roi_percentage = abs((p['annual_increase'] - implementation_cost) / implementation_cost * 100)
    print(f"  {p['scenario']} Scenario ({p['description']}):")
    print(f"    → {p['additional_upsells']:,.0f} additional upsells")
    print(f"    → €{p['monthly_increase']:,.0f} monthly revenue increase")
    print(f"    → €{p['annual_increase']/1_000_000:.1f}M annual revenue increase")
    print(f"    → {roi_percentage:.0f}% ROI\n")
    


Business Case Summary:

Current State:
  • 7.0% upselling rate
  • €3,753,817 monthly revenue
  • €38 average revenue per contract
  • €2 revenue uplift per upsell

ML Model Opportunity:
  Conservative Scenario (50% improvement in upselling rate):
    → 3,524 additional upsells
    → €6,911 monthly revenue increase
    → €0.1M annual revenue increase
    → 83% ROI

  Moderate Scenario (100% improvement in upselling rate):
    → 7,049 additional upsells
    → €13,822 monthly revenue increase
    → €0.2M annual revenue increase
    → 67% ROI

  Aggressive Scenario (200% improvement in upselling rate):
    → 14,098 additional upsells
    → €27,644 monthly revenue increase
    → €0.3M annual revenue increase
    → 34% ROI



In [52]:
projections = []
for scenario in scenarios:
    new_upselling_rate = min(current_upselling_rate * scenario['improvement'], 25)  # cap at 25%
    additional_upsells = (new_upselling_rate - current_upselling_rate) / 100 * total_contracts
    monthly_revenue_increase = abs(additional_upsells * revenue_uplift_per_upsell)
    annual_revenue_increase = abs(monthly_revenue_increase * 12)

    projections.append({
        'scenario': scenario['name'],
        'description': scenario['description'],
        'new_upselling_rate': new_upselling_rate,
        'additional_upsells': abs(additional_upsells),
        'monthly_increase': monthly_revenue_increase,
        'annual_increase': annual_revenue_increase
    })

# Visualization setup: 1x2 subplots (wide format)
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=(
        'Annual Revenue Projection',
        'ROI Potential'
    )
)

scenario_names = [p['scenario'] for p in projections]
colors = [MAGENTA_LIGHT, MAGENTA_COLOR, MAGENTA_DARK]

# Annual revenue increase bars
fig.add_trace(
    go.Bar(
        x=scenario_names,
        y=[p['annual_increase'] for p in projections],
        name='Annual Revenue Increase',
        marker_color=colors,
        text=[f'€{p["annual_increase"]/1_000_000:.1f}M' for p in projections],
        textposition='auto',
        textfont=dict(size=16, family=FONT_FAMILY)
    ),
    row=1, col=1
)

# ROI calculation and bars
implementation_cost = 100000
roi_values = [
    abs((p['annual_increase'] - implementation_cost) / implementation_cost * 100)
    for p in projections
]

fig.add_trace(
    go.Bar(
        x=scenario_names,
        y=roi_values,
        name='ROI (%)',
        marker_color=colors,
        text=[f'{roi:.0f}%' for roi in roi_values],
        textposition='auto',
        textfont=dict(size=16, family=FONT_FAMILY)
    ),
    row=1, col=2
)

# Update axes titles
fig.update_yaxes(title_text="Annual Revenue (€)", row=1, col=1, title_font=dict(size=16, family=FONT_FAMILY))
fig.update_yaxes(title_text="ROI (%)", row=1, col=2, title_font=dict(size=16, family=FONT_FAMILY))

# Update overall layout
fig.update_layout(
    title={
        'text': 'Annual Impact & ROI Projection',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 24, 'family': FONT_FAMILY}
    },
    height=500,
    width=1400,
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgb(250,250,250)',
    showlegend=False,
    font=dict(family=FONT_FAMILY, size=16),
)

fig.show()

fig.write_image(images_dir / "annual_impact_roi_projection.pdf", width=2000, height=600, scale=2)

In [50]:
projections = []
for scenario in scenarios:
    new_upselling_rate = min(current_upselling_rate * scenario['improvement'], 25)  # cap at 25%
    additional_upsells = (new_upselling_rate - current_upselling_rate) / 100 * total_contracts
    monthly_revenue_increase = abs(additional_upsells * revenue_uplift_per_upsell)
    annual_revenue_increase = abs(monthly_revenue_increase * 12)

    projections.append({
        'scenario': scenario['name'],
        'description': scenario['description'],
        'new_upselling_rate': new_upselling_rate,
        'additional_upsells': abs(additional_upsells),
        'monthly_increase': monthly_revenue_increase,
        'annual_increase': annual_revenue_increase
    })

# Visualization: Annual Revenue Projection
fig = go.Figure()

scenario_names = [p['scenario'] for p in projections]
colors = [MAGENTA_LIGHT, MAGENTA_COLOR, MAGENTA_DARK]

fig.add_trace(
    go.Bar(
        x=scenario_names,
        y=[p['annual_increase'] for p in projections],
        name='Annual Revenue Increase',
        marker_color=colors,
        text=[f'€{p["annual_increase"]/1_000_000:.1f}M' for p in projections],
        textposition='auto',
        textfont=dict(size=16, family=FONT_FAMILY)
    )
)

fig.update_yaxes(
    title_text="Annual Revenue (€)",
    title_font=dict(size=16, family=FONT_FAMILY)
)

fig.update_layout(
    title={
        'text': 'Annual Revenue Projection',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 24, 'family': FONT_FAMILY}
    },
    height=500,
    width=700,
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgb(250,250,250)',
    showlegend=False,
    font=dict(family=FONT_FAMILY, size=16),
)

fig.show()

fig.write_image(images_dir / "annual_revenue_projection.pdf", width=1000, height=600, scale=2)
