# Import / Config

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from pathlib import Path
from dotenv import load_dotenv

from IPython.display import Markdown, display

import edurel.utils.dbcon as dbcu
import edurel.utils.db as dbu
import edurel.utils.duckdb as ddbu
import edurel.utils.llm as llmu
import edurel.utils.llmchat as llmc
import edurel.widgets.mermaid_viz as mmw
import edurel.widgets.chatman as cmw
import edurel.utils.misc as mu

load_dotenv() 
BASE_DIR = os.getenv("BASE_DIR")
DB_DIR = f"{BASE_DIR}/databases"


  from pydantic.v1.fields import FieldInfo as FieldInfoV1


# Database

In [3]:
con = dbcu.adw_olap()
additional_fks = {
    "DimAccount": ["DimAccount|ParentAccountKey|AccountKey"],
    "DimDepartmentGroup": ["DimDepartmentGroup|ParentDepartmentGroupKey|DepartmentGroupKey"],
    "DimEmployee": ["DimEmployee|ParentEmployeeKey|EmployeeKey"],
    "DimOrganization": ["DimOrganization|ParentOrganizationKey|OrganizationKey"],
    }

db = dbu.DbHandler(con,additional_fks=additional_fks)
schema = db.schema_yaml_str(["nullable", "fkname"])

# Callcenter

## 01

In [4]:
sql = """
WITH ShiftMetrics AS (
    SELECT
        fcc.Shift,
        fcc.WageType,
        dd.CalendarYear,
        dd.CalendarQuarter,
        COUNT(*) AS OperatingDays,
        SUM(fcc.TotalOperators) AS TotalOperatorShifts,
        SUM(fcc.LevelOneOperators) AS TotalLevelOneShifts,
        SUM(fcc.LevelTwoOperators) AS TotalLevelTwoShifts,
        SUM(fcc.Calls) AS TotalCalls,
        SUM(fcc.AutomaticResponses) AS TotalAutomaticResponses,
        SUM(fcc.Orders) AS TotalOrders,
        SUM(fcc.IssuesRaised) AS TotalIssuesRaised,
        AVG(fcc.AverageTimePerIssue) AS AvgTimePerIssue,
        AVG(fcc.ServiceGrade) AS AvgServiceGrade,
        AVG(fcc.TotalOperators) AS AvgOperatorsPerDay,
        AVG(fcc.Calls) AS AvgCallsPerDay
    FROM FactCallCenter fcc
    INNER JOIN DimDate dd ON fcc.DateKey = dd.DateKey
    GROUP BY fcc.Shift, fcc.WageType, dd.CalendarYear, dd.CalendarQuarter
),
ShiftEfficiency AS (
    SELECT
        Shift,
        WageType,
        CalendarYear,
        CalendarQuarter,
        OperatingDays,
        TotalCalls,
        TotalAutomaticResponses,
        TotalOrders,
        TotalIssuesRaised,
        TotalOperatorShifts,
        TotalLevelOneShifts,
        TotalLevelTwoShifts,
        ROUND(AvgTimePerIssue, 2) AS AvgTimePerIssue,
        ROUND(AvgServiceGrade, 4) AS AvgServiceGrade,
        ROUND(AvgOperatorsPerDay, 2) AS AvgOperatorsPerDay,
        ROUND(AvgCallsPerDay, 2) AS AvgCallsPerDay,
        -- Efficiency Metrics
        ROUND(CAST(TotalCalls AS FLOAT) / NULLIF(TotalOperatorShifts, 0), 2) AS CallsPerOperatorShift,
        ROUND(CAST(TotalOrders AS FLOAT) / NULLIF(TotalCalls, 0) * 100, 2) AS ConversionRatePct,
        ROUND(CAST(TotalAutomaticResponses AS FLOAT) / NULLIF(TotalCalls, 0) * 100, 2) AS AutoResponseRatePct,
        ROUND(CAST(TotalIssuesRaised AS FLOAT) / NULLIF(TotalCalls, 0) * 100, 2) AS IssueRatePct,
        ROUND(CAST(TotalOrders AS FLOAT) / NULLIF(TotalOperatorShifts, 0), 2) AS OrdersPerOperatorShift,
        ROUND(CAST(TotalLevelTwoShifts AS FLOAT) / NULLIF(TotalOperatorShifts, 0) * 100, 2) AS Level2OperatorPct
    FROM ShiftMetrics
),
ShiftRankings AS (
    SELECT
        *,
        RANK() OVER (PARTITION BY CalendarYear, CalendarQuarter ORDER BY ConversionRatePct DESC) AS ConversionRank,
        RANK() OVER (PARTITION BY CalendarYear, CalendarQuarter ORDER BY AvgServiceGrade DESC) AS ServiceGradeRank,
        RANK() OVER (PARTITION BY CalendarYear, CalendarQuarter ORDER BY CallsPerOperatorShift DESC) AS EfficiencyRank,
        RANK() OVER (PARTITION BY CalendarYear, CalendarQuarter ORDER BY AvgTimePerIssue ASC) AS IssueResolutionRank
    FROM ShiftEfficiency
)
SELECT
    Shift,
    WageType,
    CalendarYear,
    CalendarQuarter,
    OperatingDays,
    TotalCalls,
    TotalOrders,
    TotalIssuesRaised,
    AvgOperatorsPerDay,
    AvgCallsPerDay,
    CallsPerOperatorShift,
    ConversionRatePct,
    AutoResponseRatePct,
    IssueRatePct,
    OrdersPerOperatorShift,
    AvgTimePerIssue,
    AvgServiceGrade,
    Level2OperatorPct,
    ConversionRank,
    ServiceGradeRank,
    EfficiencyRank,
    IssueResolutionRank,
    CASE
        WHEN ConversionRank = 1 AND ServiceGradeRank <= 2 THEN 'Top Performer'
        WHEN ConversionRank <= 2 OR ServiceGradeRank <= 2 THEN 'Strong Performer'
        WHEN ConversionRank >= 4 AND ServiceGradeRank >= 4 THEN 'Needs Improvement'
        ELSE 'Average Performer'
    END AS PerformanceCategory
FROM ShiftRankings
ORDER BY CalendarYear DESC, CalendarQuarter DESC, ConversionRatePct DESC;
"""
db.sql_print(sql)

┌──────────┬──────────┬──────────────┬─────────────────┬───────────────┬────────────┬─────────────┬───────────────────┬────────────────────┬────────────────┬───────────────────────┬───────────────────┬─────────────────────┬──────────────┬────────────────────────┬─────────────────┬─────────────────┬───────────────────┬────────────────┬──────────────────┬────────────────┬─────────────────────┬─────────────────────┐
│  Shift   │ WageType │ CalendarYear │ CalendarQuarter │ OperatingDays │ TotalCalls │ TotalOrders │ TotalIssuesRaised │ AvgOperatorsPerDay │ AvgCallsPerDay │ CallsPerOperatorShift │ ConversionRatePct │ AutoResponseRatePct │ IssueRatePct │ OrdersPerOperatorShift │ AvgTimePerIssue │ AvgServiceGrade │ Level2OperatorPct │ ConversionRank │ ServiceGradeRank │ EfficiencyRank │ IssueResolutionRank │ PerformanceCategory │
│ varchar  │ varchar  │    int32     │      int32      │     int64     │   int128   │   int128    │      int128       │       double       │     double     │         

## 02

In [6]:
sql = """
WITH DailyOperatorMetrics AS (
    SELECT
        dd.FullDateAlternateKey AS Date,
        dd.CalendarYear,
        dd.CalendarQuarter,
        dd.MonthNumberOfYear AS CalendarMonth,
        dd.EnglishDayNameOfWeek AS DayOfWeek,
        fcc.Shift,
        fcc.WageType,
        fcc.TotalOperators,
        fcc.LevelOneOperators,
        fcc.LevelTwoOperators,
        fcc.Calls,
        fcc.AutomaticResponses,
        fcc.Orders,
        fcc.IssuesRaised,
        fcc.AverageTimePerIssue,
        fcc.ServiceGrade,
        -- Calculate ratios
        ROUND(CAST(fcc.Calls AS FLOAT) / NULLIF(fcc.TotalOperators, 0), 2) AS CallsPerOperator,
        ROUND(CAST(fcc.Orders AS FLOAT) / NULLIF(fcc.Calls, 0) * 100, 2) AS ConversionRate,
        ROUND(CAST(fcc.LevelTwoOperators AS FLOAT) / NULLIF(fcc.TotalOperators, 0) * 100, 2) AS SeniorOperatorPct,
        ROUND(CAST(fcc.IssuesRaised AS FLOAT) / NULLIF(fcc.Calls, 0) * 100, 2) AS IssueRate
    FROM FactCallCenter fcc
    INNER JOIN DimDate dd ON fcc.DateKey = dd.DateKey
),
StaffingBuckets AS (
    SELECT
        *,
        CASE
            WHEN TotalOperators <= 5 THEN '1-5 Operators'
            WHEN TotalOperators <= 10 THEN '6-10 Operators'
            WHEN TotalOperators <= 15 THEN '11-15 Operators'
            WHEN TotalOperators <= 20 THEN '16-20 Operators'
            ELSE '21+ Operators'
        END AS StaffingLevel,
        CASE
            WHEN SeniorOperatorPct < 20 THEN 'Low Senior (<20%)'
            WHEN SeniorOperatorPct < 40 THEN 'Medium Senior (20-40%)'
            ELSE 'High Senior (40%+)'
        END AS SeniorMixLevel,
        CASE
            WHEN CallsPerOperator < 50 THEN 'Low Load (<50)'
            WHEN CallsPerOperator < 100 THEN 'Medium Load (50-100)'
            WHEN CallsPerOperator < 150 THEN 'High Load (100-150)'
            ELSE 'Very High Load (150+)'
        END AS WorkloadLevel
    FROM DailyOperatorMetrics
),
StaffingImpactAnalysis AS (
    SELECT
        StaffingLevel,
        SeniorMixLevel,
        WorkloadLevel,
        COUNT(*) AS ObservationCount,
        ROUND(AVG(TotalOperators), 2) AS AvgTotalOperators,
        ROUND(AVG(SeniorOperatorPct), 2) AS AvgSeniorOperatorPct,
        ROUND(AVG(Calls), 2) AS AvgCalls,
        ROUND(AVG(CallsPerOperator), 2) AS AvgCallsPerOperator,
        ROUND(AVG(ConversionRate), 2) AS AvgConversionRate,
        ROUND(AVG(ServiceGrade), 4) AS AvgServiceGrade,
        ROUND(AVG(IssueRate), 2) AS AvgIssueRate,
        ROUND(AVG(AverageTimePerIssue), 2) AS AvgTimePerIssue,
        ROUND(AVG(Orders), 2) AS AvgOrders,
        -- Performance variability
        ROUND(STDDEV_SAMP(ConversionRate), 2) AS StdDevConversionRate,
        ROUND(STDDEV_SAMP(ServiceGrade), 4) AS StdDevServiceGrade,
        -- Min/Max for ranges
        ROUND(MIN(ConversionRate), 2) AS MinConversionRate,
        ROUND(MAX(ConversionRate), 2) AS MaxConversionRate,
        ROUND(MIN(ServiceGrade), 4) AS MinServiceGrade,
        ROUND(MAX(ServiceGrade), 4) AS MaxServiceGrade
    FROM StaffingBuckets
    GROUP BY StaffingLevel, SeniorMixLevel, WorkloadLevel
),
PerformanceScoring AS (
    SELECT
        *,
        -- Composite performance score (higher is better)
        ROUND((AvgConversionRate / 10) + (AvgServiceGrade * 100) - (AvgIssueRate / 2), 2) AS PerformanceScore,
        RANK() OVER (ORDER BY AvgConversionRate DESC, AvgServiceGrade DESC) AS OverallRank
    FROM StaffingImpactAnalysis
    WHERE ObservationCount >= 5  -- Filter out statistically insignificant groups
)
SELECT
    StaffingLevel,
    SeniorMixLevel,
    WorkloadLevel,
    ObservationCount,
    AvgTotalOperators,
    AvgSeniorOperatorPct,
    AvgCalls,
    AvgCallsPerOperator,
    AvgConversionRate,
    AvgServiceGrade,
    AvgIssueRate,
    AvgTimePerIssue,
    AvgOrders,
    StdDevConversionRate,
    StdDevServiceGrade,
    MinConversionRate,
    MaxConversionRate,
    MinServiceGrade,
    MaxServiceGrade,
    PerformanceScore,
    OverallRank,
    CASE
        WHEN OverallRank <= 3 THEN 'Optimal Configuration'
        WHEN OverallRank <= 10 THEN 'Good Configuration'
        WHEN OverallRank <= 20 THEN 'Acceptable Configuration'
        ELSE 'Suboptimal Configuration'
    END AS ConfigurationAssessment
FROM PerformanceScoring
ORDER BY PerformanceScore DESC;
"""
db.sql_print(sql)

┌─────────────────┬────────────────────┬────────────────┬──────────────────┬───────────────────┬──────────────────────┬──────────┬─────────────────────┬───────────────────┬─────────────────┬──────────────┬─────────────────┬───────────┬──────────────────────┬────────────────────┬───────────────────┬───────────────────┬─────────────────┬─────────────────┬──────────────────┬─────────────┬─────────────────────────┐
│  StaffingLevel  │   SeniorMixLevel   │ WorkloadLevel  │ ObservationCount │ AvgTotalOperators │ AvgSeniorOperatorPct │ AvgCalls │ AvgCallsPerOperator │ AvgConversionRate │ AvgServiceGrade │ AvgIssueRate │ AvgTimePerIssue │ AvgOrders │ StdDevConversionRate │ StdDevServiceGrade │ MinConversionRate │ MaxConversionRate │ MinServiceGrade │ MaxServiceGrade │ PerformanceScore │ OverallRank │ ConfigurationAssessment │
│     varchar     │      varchar       │    varchar     │      int64       │      double       │        double        │  double  │       double        │      double      

## 03

In [7]:
sql = """
WITH DailyMetrics AS (
    SELECT
        dd.FullDateAlternateKey AS Date,
        dd.DateKey,
        dd.CalendarYear,
        dd.CalendarQuarter,
        dd.MonthNumberOfYear,
        dd.EnglishMonthName AS MonthName,
        dd.DayNumberOfWeek,
        dd.EnglishDayNameOfWeek AS DayOfWeek,
        dd.WeekNumberOfYear,
        SUM(fcc.TotalOperators) AS TotalOperators,
        SUM(fcc.Calls) AS TotalCalls,
        SUM(fcc.Orders) AS TotalOrders,
        SUM(fcc.IssuesRaised) AS TotalIssues,
        AVG(fcc.ServiceGrade) AS AvgServiceGrade,
        AVG(fcc.AverageTimePerIssue) AS AvgTimePerIssue,
        ROUND(CAST(SUM(fcc.Orders) AS FLOAT) / NULLIF(SUM(fcc.Calls), 0) * 100, 2) AS ConversionRate
    FROM FactCallCenter fcc
    INNER JOIN DimDate dd ON fcc.DateKey = dd.DateKey
    GROUP BY
        dd.FullDateAlternateKey, dd.DateKey, dd.CalendarYear, dd.CalendarQuarter,
        dd.MonthNumberOfYear, dd.EnglishMonthName, dd.DayNumberOfWeek,
        dd.EnglishDayNameOfWeek, dd.WeekNumberOfYear
),
MonthlyAggregates AS (
    SELECT
        CalendarYear,
        MonthNumberOfYear,
        MonthName,
        COUNT(DISTINCT Date) AS OperatingDays,
        SUM(TotalOperators) AS MonthlyOperators,
        SUM(TotalCalls) AS MonthlyCalls,
        SUM(TotalOrders) AS MonthlyOrders,
        SUM(TotalIssues) AS MonthlyIssues,
        AVG(AvgServiceGrade) AS MonthlyServiceGrade,
        AVG(AvgTimePerIssue) AS MonthlyAvgTimePerIssue,
        AVG(ConversionRate) AS MonthlyConversionRate,
        AVG(TotalCalls) AS AvgDailyCalls,
        AVG(TotalOrders) AS AvgDailyOrders
    FROM DailyMetrics
    GROUP BY CalendarYear, MonthNumberOfYear, MonthName
),
MonthlyTrends AS (
    SELECT
        CalendarYear,
        MonthNumberOfYear,
        MonthName,
        OperatingDays,
        MonthlyCalls,
        MonthlyOrders,
        MonthlyIssues,
        ROUND(MonthlyServiceGrade, 4) AS MonthlyServiceGrade,
        ROUND(MonthlyAvgTimePerIssue, 2) AS MonthlyAvgTimePerIssue,
        ROUND(MonthlyConversionRate, 2) AS MonthlyConversionRate,
        ROUND(AvgDailyCalls, 2) AS AvgDailyCalls,
        ROUND(AvgDailyOrders, 2) AS AvgDailyOrders,
        -- Previous month comparison
        LAG(MonthlyCalls, 1) OVER (ORDER BY CalendarYear, MonthNumberOfYear) AS PrevMonthCalls,
        LAG(MonthlyConversionRate, 1) OVER (ORDER BY CalendarYear, MonthNumberOfYear) AS PrevMonthConversionRate,
        -- Same month previous year
        LAG(MonthlyCalls, 12) OVER (ORDER BY CalendarYear, MonthNumberOfYear) AS SameMonthLastYearCalls,
        LAG(MonthlyConversionRate, 12) OVER (ORDER BY CalendarYear, MonthNumberOfYear) AS SameMonthLastYearConversionRate,
        -- Moving averages
        AVG(MonthlyCalls) OVER (
            ORDER BY CalendarYear, MonthNumberOfYear
            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
        ) AS ThreeMonthAvgCalls,
        AVG(MonthlyConversionRate) OVER (
            ORDER BY CalendarYear, MonthNumberOfYear
            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
        ) AS ThreeMonthAvgConversion
    FROM MonthlyAggregates
),
SeasonalityAnalysis AS (
    SELECT
        CalendarYear,
        MonthNumberOfYear,
        MonthName,
        OperatingDays,
        MonthlyCalls,
        MonthlyOrders,
        MonthlyIssues,
        MonthlyServiceGrade,
        MonthlyConversionRate,
        AvgDailyCalls,
        AvgDailyOrders,
        ROUND(ThreeMonthAvgCalls, 2) AS ThreeMonthAvgCalls,
        ROUND(ThreeMonthAvgConversion, 2) AS ThreeMonthAvgConversion,
        -- Month-over-month growth
        CASE
            WHEN PrevMonthCalls > 0
            THEN ROUND(((MonthlyCalls - PrevMonthCalls) / CAST(PrevMonthCalls AS FLOAT)) * 100, 2)
            ELSE NULL
        END AS MoMCallGrowthPct,
        CASE
            WHEN PrevMonthConversionRate > 0
            THEN ROUND(MonthlyConversionRate - PrevMonthConversionRate, 2)
            ELSE NULL
        END AS MoMConversionChange,
        -- Year-over-year growth
        CASE
            WHEN SameMonthLastYearCalls > 0
            THEN ROUND(((MonthlyCalls - SameMonthLastYearCalls) / CAST(SameMonthLastYearCalls AS FLOAT)) * 100, 2)
            ELSE NULL
        END AS YoYCallGrowthPct,
        CASE
            WHEN SameMonthLastYearConversionRate > 0
            THEN ROUND(MonthlyConversionRate - SameMonthLastYearConversionRate, 2)
            ELSE NULL
        END AS YoYConversionChange,
        -- Seasonality index (compared to overall average)
        ROUND((MonthlyCalls / AVG(MonthlyCalls) OVER ()) * 100, 2) AS SeasonalityIndex
    FROM MonthlyTrends
),
DayOfWeekPatterns AS (
    SELECT
        DayOfWeek,
        DayNumberOfWeek,
        COUNT(*) AS ObservationCount,
        ROUND(AVG(TotalCalls), 2) AS AvgCalls,
        ROUND(AVG(TotalOrders), 2) AS AvgOrders,
        ROUND(AVG(ConversionRate), 2) AS AvgConversionRate,
        ROUND(AVG(AvgServiceGrade), 4) AS AvgServiceGrade,
        RANK() OVER (ORDER BY AVG(TotalCalls) DESC) AS CallVolumeRank,
        RANK() OVER (ORDER BY AVG(ConversionRate) DESC) AS ConversionRank
    FROM DailyMetrics
    GROUP BY DayOfWeek, DayNumberOfWeek
)
SELECT
    'Monthly Trends' AS AnalysisType,
    CAST(CalendarYear AS VARCHAR) || '-' || CAST(MonthNumberOfYear AS VARCHAR) AS Period,
    MonthName AS PeriodName,
    MonthlyCalls AS Calls,
    MonthlyOrders AS Orders,
    MonthlyConversionRate AS ConversionRate,
    MonthlyServiceGrade AS ServiceGrade,
    ThreeMonthAvgCalls AS MovingAvgCalls,
    ThreeMonthAvgConversion AS MovingAvgConversion,
    MoMCallGrowthPct AS MoMGrowthPct,
    YoYCallGrowthPct AS YoYGrowthPct,
    SeasonalityIndex,
    NULL AS DayOfWeek,
    NULL AS CallVolumeRank
FROM SeasonalityAnalysis
WHERE CalendarYear >= (SELECT MAX(CalendarYear) - 1 FROM SeasonalityAnalysis)

UNION ALL

SELECT
    'Day of Week Patterns' AS AnalysisType,
    CAST(DayNumberOfWeek AS VARCHAR) AS Period,
    DayOfWeek AS PeriodName,
    AvgCalls AS Calls,
    AvgOrders AS Orders,
    AvgConversionRate AS ConversionRate,
    AvgServiceGrade AS ServiceGrade,
    NULL AS MovingAvgCalls,
    NULL AS MovingAvgConversion,
    NULL AS MoMGrowthPct,
    NULL AS YoYGrowthPct,
    NULL AS SeasonalityIndex,
    DayOfWeek,
    CallVolumeRank
FROM DayOfWeekPatterns

ORDER BY AnalysisType, Period;
"""
db.sql_print(sql)

┌──────────────────────┬─────────┬────────────┬─────────┬─────────┬────────────────┬──────────────┬────────────────┬─────────────────────┬──────────────┬──────────────┬──────────────────┬───────────┬────────────────┐
│     AnalysisType     │ Period  │ PeriodName │  Calls  │ Orders  │ ConversionRate │ ServiceGrade │ MovingAvgCalls │ MovingAvgConversion │ MoMGrowthPct │ YoYGrowthPct │ SeasonalityIndex │ DayOfWeek │ CallVolumeRank │
│       varchar        │ varchar │  varchar   │ double  │ double  │     double     │    double    │     double     │       double        │    float     │    float     │      double      │  varchar  │     int64      │
├──────────────────────┼─────────┼────────────┼─────────┼─────────┼────────────────┼──────────────┼────────────────┼─────────────────────┼──────────────┼──────────────┼──────────────────┼───────────┼────────────────┤
│ Day of Week Patterns │ 1       │ Sunday     │ 1615.75 │  1118.0 │          69.26 │       0.0913 │           NULL │                

## 04

In [9]:
sql = """
WITH DailyIssueMetrics AS (
    SELECT
        dd.FullDateAlternateKey AS Date,
        dd.CalendarYear,
        dd.CalendarQuarter,
        dd.MonthNumberOfYear,
        dd.EnglishMonthName AS MonthName,
        dd.EnglishDayNameOfWeek AS DayOfWeek,
        fcc.Shift,
        fcc.WageType,
        fcc.TotalOperators,
        fcc.LevelOneOperators,
        fcc.LevelTwoOperators,
        fcc.Calls,
        fcc.AutomaticResponses,
        fcc.Orders,
        fcc.IssuesRaised,
        fcc.AverageTimePerIssue,
        fcc.ServiceGrade,
        -- Calculated metrics
        ROUND(CAST(fcc.IssuesRaised AS FLOAT) / NULLIF(fcc.Calls, 0) * 100, 2) AS IssueRate,
        ROUND(CAST(fcc.Calls AS FLOAT) / NULLIF(fcc.TotalOperators, 0), 2) AS CallsPerOperator,
        ROUND(CAST(fcc.AutomaticResponses AS FLOAT) / NULLIF(fcc.Calls, 0) * 100, 2) AS AutoResponseRate,
        ROUND(CAST(fcc.LevelTwoOperators AS FLOAT) / NULLIF(fcc.TotalOperators, 0) * 100, 2) AS SeniorOperatorPct
    FROM FactCallCenter fcc
    INNER JOIN DimDate dd ON fcc.DateKey = dd.DateKey
),
IssueSegmentation AS (
    SELECT
        *,
        CASE
            WHEN IssueRate < 5 THEN 'Low Issues (<5%)'
            WHEN IssueRate < 10 THEN 'Medium Issues (5-10%)'
            WHEN IssueRate < 15 THEN 'High Issues (10-15%)'
            ELSE 'Critical Issues (15%+)'
        END AS IssueSeverityLevel,
        CASE
            WHEN AverageTimePerIssue < 300 THEN 'Fast Resolution (<5 min)'
            WHEN AverageTimePerIssue < 600 THEN 'Normal Resolution (5-10 min)'
            WHEN AverageTimePerIssue < 900 THEN 'Slow Resolution (10-15 min)'
            ELSE 'Very Slow Resolution (15+ min)'
        END AS ResolutionSpeedCategory,
        CASE
            WHEN ServiceGrade >= 0.80 THEN 'Excellent Service (80%+)'
            WHEN ServiceGrade >= 0.70 THEN 'Good Service (70-80%)'
            WHEN ServiceGrade >= 0.60 THEN 'Fair Service (60-70%)'
            ELSE 'Poor Service (<60%)'
        END AS ServiceQualityLevel
    FROM DailyIssueMetrics
),
IssueAnalysisByFactors AS (
    SELECT
        Shift,
        WageType,
        IssueSeverityLevel,
        ResolutionSpeedCategory,
        ServiceQualityLevel,
        COUNT(*) AS ObservationCount,
        ROUND(AVG(IssuesRaised), 2) AS AvgIssuesRaised,
        ROUND(AVG(IssueRate), 2) AS AvgIssueRate,
        ROUND(AVG(AverageTimePerIssue), 2) AS AvgResolutionTime,
        ROUND(AVG(ServiceGrade), 4) AS AvgServiceGrade,
        ROUND(AVG(TotalOperators), 2) AS AvgOperators,
        ROUND(AVG(SeniorOperatorPct), 2) AS AvgSeniorPct,
        ROUND(AVG(CallsPerOperator), 2) AS AvgCallsPerOperator,
        ROUND(AVG(AutoResponseRate), 2) AS AvgAutoResponseRate,
        ROUND(AVG(Calls), 2) AS AvgCalls,
        -- Statistical measures
        ROUND(STDDEV(IssueRate), 2) AS StdDevIssueRate,
        ROUND(MIN(IssueRate), 2) AS MinIssueRate,
        ROUND(MAX(IssueRate), 2) AS MaxIssueRate,
        ROUND(MIN(AverageTimePerIssue), 2) AS MinResolutionTime,
        ROUND(MAX(AverageTimePerIssue), 2) AS MaxResolutionTime
    FROM IssueSegmentation
    GROUP BY Shift, WageType, IssueSeverityLevel, ResolutionSpeedCategory, ServiceQualityLevel
),
CorrelationAnalysis AS (
    SELECT
        'Staffing Level Impact' AS AnalysisDimension,
        CASE
            WHEN TotalOperators <= 10 THEN 'Low Staffing (<=10)'
            WHEN TotalOperators <= 15 THEN 'Medium Staffing (11-15)'
            ELSE 'High Staffing (16+)'
        END AS Segment,
        COUNT(*) AS ObservationCount,
        ROUND(AVG(IssueRate), 2) AS AvgIssueRate,
        ROUND(AVG(AverageTimePerIssue), 2) AS AvgResolutionTime,
        ROUND(AVG(ServiceGrade), 4) AS AvgServiceGrade
    FROM DailyIssueMetrics
    GROUP BY
        CASE
            WHEN TotalOperators <= 10 THEN 'Low Staffing (<=10)'
            WHEN TotalOperators <= 15 THEN 'Medium Staffing (11-15)'
            ELSE 'High Staffing (16+)'
        END

    UNION ALL

    SELECT
        'Senior Operator Mix Impact' AS AnalysisDimension,
        CASE
            WHEN SeniorOperatorPct < 25 THEN 'Low Senior Mix (<25%)'
            WHEN SeniorOperatorPct < 40 THEN 'Medium Senior Mix (25-40%)'
            ELSE 'High Senior Mix (40%+)'
        END AS Segment,
        COUNT(*) AS ObservationCount,
        ROUND(AVG(IssueRate), 2) AS AvgIssueRate,
        ROUND(AVG(AverageTimePerIssue), 2) AS AvgResolutionTime,
        ROUND(AVG(ServiceGrade), 4) AS AvgServiceGrade
    FROM DailyIssueMetrics
    GROUP BY
        CASE
            WHEN SeniorOperatorPct < 25 THEN 'Low Senior Mix (<25%)'
            WHEN SeniorOperatorPct < 40 THEN 'Medium Senior Mix (25-40%)'
            ELSE 'High Senior Mix (40%+)'
        END

    UNION ALL

    SELECT
        'Workload Impact' AS AnalysisDimension,
        CASE
            WHEN CallsPerOperator < 75 THEN 'Light Load (<75)'
            WHEN CallsPerOperator < 125 THEN 'Normal Load (75-125)'
            ELSE 'Heavy Load (125+)'
        END AS Segment,
        COUNT(*) AS ObservationCount,
        ROUND(AVG(IssueRate), 2) AS AvgIssueRate,
        ROUND(AVG(AverageTimePerIssue), 2) AS AvgResolutionTime,
        ROUND(AVG(ServiceGrade), 4) AS AvgServiceGrade
    FROM DailyIssueMetrics
    GROUP BY
        CASE
            WHEN CallsPerOperator < 75 THEN 'Light Load (<75)'
            WHEN CallsPerOperator < 125 THEN 'Normal Load (75-125)'
            ELSE 'Heavy Load (125+)'
        END
)
SELECT
    'Detailed Issue Analysis' AS ReportSection,
    Shift,
    WageType,
    IssueSeverityLevel,
    ResolutionSpeedCategory,
    ServiceQualityLevel,
    ObservationCount,
    AvgIssuesRaised,
    AvgIssueRate,
    AvgResolutionTime,
    AvgServiceGrade,
    AvgOperators,
    AvgSeniorPct,
    AvgCallsPerOperator,
    StdDevIssueRate,
    NULL AS AnalysisDimension,
    NULL AS Segment
FROM IssueAnalysisByFactors
WHERE ObservationCount >= 5

UNION ALL

SELECT
    'Correlation Analysis' AS ReportSection,
    NULL AS Shift,
    NULL AS WageType,
    NULL AS IssueSeverityLevel,
    NULL AS ResolutionSpeedCategory,
    NULL AS ServiceQualityLevel,
    ObservationCount,
    NULL AS AvgIssuesRaised,
    AvgIssueRate,
    AvgResolutionTime,
    AvgServiceGrade,
    NULL AS AvgOperators,
    NULL AS AvgSeniorPct,
    NULL AS AvgCallsPerOperator,
    NULL AS StdDevIssueRate,
    AnalysisDimension,
    Segment
FROM CorrelationAnalysis

ORDER BY ReportSection, AvgIssueRate DESC;
"""
db.sql_print(sql)

┌─────────────────────────┬──────────┬──────────┬────────────────────┬──────────────────────────┬─────────────────────┬──────────────────┬─────────────────┬──────────────┬───────────────────┬─────────────────┬──────────────┬──────────────┬─────────────────────┬─────────────────┬────────────────────────────┬────────────────────────────┐
│      ReportSection      │  Shift   │ WageType │ IssueSeverityLevel │ ResolutionSpeedCategory  │ ServiceQualityLevel │ ObservationCount │ AvgIssuesRaised │ AvgIssueRate │ AvgResolutionTime │ AvgServiceGrade │ AvgOperators │ AvgSeniorPct │ AvgCallsPerOperator │ StdDevIssueRate │     AnalysisDimension      │          Segment           │
│         varchar         │ varchar  │ varchar  │      varchar       │         varchar          │       varchar       │      int64       │     double      │    double    │      double       │     double      │    double    │    double    │       double        │     double      │          varchar           │          varcha

## 05

In [11]:
sql = """
WITH DailyConversionMetrics AS (
    SELECT
        dd.FullDateAlternateKey AS Date,
        dd.CalendarYear,
        dd.CalendarQuarter,
        dd.MonthNumberOfYear,
        dd.EnglishMonthName AS MonthName,
        dd.EnglishDayNameOfWeek AS DayOfWeek,
        fcc.Shift,
        fcc.WageType,
        fcc.TotalOperators,
        fcc.LevelOneOperators,
        fcc.LevelTwoOperators,
        fcc.Calls,
        fcc.AutomaticResponses,
        fcc.Orders,
        fcc.IssuesRaised,
        fcc.ServiceGrade,
        -- Conversion funnel metrics
        fcc.Calls AS FunnelTop,
        fcc.Calls - fcc.AutomaticResponses AS HumanHandledCalls,
        fcc.Orders AS FunnelBottom,
        ROUND(CAST(fcc.Orders AS FLOAT) / NULLIF(fcc.Calls, 0) * 100, 2) AS OverallConversionRate,
        ROUND(CAST(fcc.Orders AS FLOAT) / NULLIF(fcc.Calls - fcc.AutomaticResponses, 0) * 100, 2) AS HumanHandledConversionRate,
        ROUND(CAST(fcc.AutomaticResponses AS FLOAT) / NULLIF(fcc.Calls, 0) * 100, 2) AS AutomationRate,
        ROUND(CAST(fcc.IssuesRaised AS FLOAT) / NULLIF(fcc.Calls, 0) * 100, 2) AS IssueRate,
        ROUND(CAST(fcc.Calls AS FLOAT) / NULLIF(fcc.TotalOperators, 0), 2) AS CallsPerOperator,
        ROUND(CAST(fcc.Orders AS FLOAT) / NULLIF(fcc.TotalOperators, 0), 2) AS OrdersPerOperator,
        ROUND(CAST(fcc.LevelTwoOperators AS FLOAT) / NULLIF(fcc.TotalOperators, 0) * 100, 2) AS SeniorOperatorPct
    FROM FactCallCenter fcc
    INNER JOIN DimDate dd ON fcc.DateKey = dd.DateKey
),
ConversionSegmentation AS (
    SELECT
        *,
        CASE
            WHEN OverallConversionRate < 5 THEN 'Very Low (<5%)'
            WHEN OverallConversionRate < 10 THEN 'Low (5-10%)'
            WHEN OverallConversionRate < 15 THEN 'Medium (10-15%)'
            WHEN OverallConversionRate < 20 THEN 'Good (15-20%)'
            ELSE 'Excellent (20%+)'
        END AS ConversionTier,
        CASE
            WHEN ServiceGrade >= 0.75 THEN 'High Quality (75%+)'
            WHEN ServiceGrade >= 0.65 THEN 'Medium Quality (65-75%)'
            ELSE 'Low Quality (<65%)'
        END AS QualityTier,
        CASE
            WHEN AutomationRate < 30 THEN 'Low Auto (<30%)'
            WHEN AutomationRate < 50 THEN 'Medium Auto (30-50%)'
            ELSE 'High Auto (50%+)'
        END AS AutomationTier
    FROM DailyConversionMetrics
),
ConversionAnalysis AS (
    SELECT
        Shift,
        WageType,
        ConversionTier,
        QualityTier,
        AutomationTier,
        COUNT(*) AS ObservationCount,
        SUM(Calls) AS TotalCalls,
        SUM(HumanHandledCalls) AS TotalHumanHandledCalls,
        SUM(Orders) AS TotalOrders,
        ROUND(AVG(OverallConversionRate), 2) AS AvgOverallConversionRate,
        ROUND(AVG(HumanHandledConversionRate), 2) AS AvgHumanConversionRate,
        ROUND(AVG(AutomationRate), 2) AS AvgAutomationRate,
        ROUND(AVG(ServiceGrade), 4) AS AvgServiceGrade,
        ROUND(AVG(IssueRate), 2) AS AvgIssueRate,
        ROUND(AVG(OrdersPerOperator), 2) AS AvgOrdersPerOperator,
        ROUND(AVG(SeniorOperatorPct), 2) AS AvgSeniorOperatorPct,
        -- Variability metrics
        ROUND(STDDEV(OverallConversionRate), 2) AS StdDevConversionRate,
        ROUND(MIN(OverallConversionRate), 2) AS MinConversionRate,
        ROUND(MAX(OverallConversionRate), 2) AS MaxConversionRate
    FROM ConversionSegmentation
    GROUP BY Shift, WageType, ConversionTier, QualityTier, AutomationTier
),
MonthlyConversionTrends AS (
    SELECT
        CalendarYear,
        MonthNumberOfYear,
        MonthName,
        SUM(Calls) AS MonthlyCalls,
        SUM(Orders) AS MonthlyOrders,
        ROUND(CAST(SUM(Orders) AS FLOAT) / NULLIF(SUM(Calls), 0) * 100, 2) AS MonthlyConversionRate,
        LAG(ROUND(CAST(SUM(Orders) AS FLOAT) / NULLIF(SUM(Calls), 0) * 100, 2), 1)
            OVER (ORDER BY CalendarYear, MonthNumberOfYear) AS PrevMonthConversionRate,
        AVG(ServiceGrade) AS AvgServiceGrade
    FROM DailyConversionMetrics
    GROUP BY CalendarYear, MonthNumberOfYear, MonthName
),
ImpactProjections AS (
    SELECT
        Shift,
        WageType,
        ConversionTier,
        TotalCalls,
        TotalOrders,
        AvgOverallConversionRate,
        AvgServiceGrade,
        -- Project impact of 1% conversion improvement
        ROUND(TotalCalls * 0.01, 0) AS AdditionalOrdersFrom1PctImprovement,
        -- Benchmark gap to top performers
        (SELECT MAX(AvgOverallConversionRate) FROM ConversionAnalysis) AS BestConversionRate,
        ROUND((SELECT MAX(AvgOverallConversionRate) FROM ConversionAnalysis) - AvgOverallConversionRate, 2) AS GapToBest,
        ROUND(TotalCalls * ((SELECT MAX(AvgOverallConversionRate) FROM ConversionAnalysis) - AvgOverallConversionRate) / 100, 0) AS PotentialAdditionalOrders
    FROM ConversionAnalysis
    WHERE ObservationCount >= 10
)
SELECT
    'Conversion by Operational Factors' AS ReportSection,
    Shift,
    WageType,
    ConversionTier,
    QualityTier,
    AutomationTier,
    ObservationCount,
    TotalCalls,
    TotalOrders,
    AvgOverallConversionRate,
    AvgHumanConversionRate,
    AvgAutomationRate,
    AvgServiceGrade,
    AvgIssueRate,
    AvgOrdersPerOperator,
    StdDevConversionRate,
    MinConversionRate,
    MaxConversionRate,
    NULL AS MonthlyCalls,
    NULL AS MonthlyConversionRate,
    NULL AS PrevMonthConversionRate,
    NULL AS AdditionalOrdersFrom1PctImprovement,
    NULL AS GapToBest,
    NULL AS PotentialAdditionalOrders
FROM ConversionAnalysis
WHERE ObservationCount >= 5

UNION ALL

SELECT
    'Monthly Conversion Trends' AS ReportSection,
    NULL AS Shift,
    NULL AS WageType,
    NULL AS ConversionTier,
    NULL AS QualityTier,
    NULL AS AutomationTier,
    NULL AS ObservationCount,
    NULL AS TotalCalls,
    NULL AS TotalOrders,
    NULL AS AvgOverallConversionRate,
    NULL AS AvgHumanConversionRate,
    NULL AS AvgAutomationRate,
    ROUND(AvgServiceGrade, 4) AS AvgServiceGrade,
    NULL AS AvgIssueRate,
    NULL AS AvgOrdersPerOperator,
    NULL AS StdDevConversionRate,
    NULL AS MinConversionRate,
    NULL AS MaxConversionRate,
    MonthlyCalls,
    MonthlyConversionRate,
    PrevMonthConversionRate,
    NULL AS AdditionalOrdersFrom1PctImprovement,
    NULL AS GapToBest,
    NULL AS PotentialAdditionalOrders
FROM MonthlyConversionTrends

UNION ALL

SELECT
    'Improvement Impact Projections' AS ReportSection,
    Shift,
    WageType,
    ConversionTier,
    NULL AS QualityTier,
    NULL AS AutomationTier,
    NULL AS ObservationCount,
    TotalCalls,
    TotalOrders,
    AvgOverallConversionRate,
    NULL AS AvgHumanConversionRate,
    NULL AS AvgAutomationRate,
    AvgServiceGrade,
    NULL AS AvgIssueRate,
    NULL AS AvgOrdersPerOperator,
    NULL AS StdDevConversionRate,
    NULL AS MinConversionRate,
    NULL AS MaxConversionRate,
    NULL AS MonthlyCalls,
    NULL AS MonthlyConversionRate,
    NULL AS PrevMonthConversionRate,
    AdditionalOrdersFrom1PctImprovement,
    GapToBest,
    PotentialAdditionalOrders
FROM ImpactProjections

ORDER BY ReportSection, AvgOverallConversionRate DESC NULLS LAST;
"""
db.sql_print(sql)

┌───────────────────────────────────┬──────────┬──────────┬──────────────────┬────────────────────┬──────────────────┬──────────────────┬────────────┬─────────────┬──────────────────────────┬────────────────────────┬───────────────────┬─────────────────┬──────────────┬──────────────────────┬──────────────────────┬───────────────────┬───────────────────┬──────────────┬───────────────────────┬─────────────────────────┬─────────────────────────────────────┬───────────┬───────────────────────────┐
│           ReportSection           │  Shift   │ WageType │  ConversionTier  │    QualityTier     │  AutomationTier  │ ObservationCount │ TotalCalls │ TotalOrders │ AvgOverallConversionRate │ AvgHumanConversionRate │ AvgAutomationRate │ AvgServiceGrade │ AvgIssueRate │ AvgOrdersPerOperator │ StdDevConversionRate │ MinConversionRate │ MaxConversionRate │ MonthlyCalls │ MonthlyConversionRate │ PrevMonthConversionRate │ AdditionalOrdersFrom1PctImprovement │ GapToBest │ PotentialAdditionalOrders │
│ 

# CLV

## 01

In [12]:
sql = """
WITH CustomerFirstLastPurchase AS (
    SELECT
        fis.CustomerKey,
        MIN(fis.OrderDate) AS FirstPurchaseDate,
        MAX(fis.OrderDate) AS LastPurchaseDate,
        COUNT(DISTINCT fis.SalesOrderNumber) AS TotalOrders,
        COUNT(*) AS TotalLineItems,
        (MAX(fis.OrderDate)::DATE - MIN(fis.OrderDate)::DATE) AS CustomerTenureDays
    FROM FactInternetSales fis
    GROUP BY fis.CustomerKey
),

CustomerLifetimeMetrics AS (
    SELECT
        fis.CustomerKey,
        c.FirstName || ' ' || c.LastName AS CustomerName,
        c.YearlyIncome,
        c.EnglishEducation AS Education,
        c.EnglishOccupation AS Occupation,
        c.Gender,
        c.MaritalStatus,
        c.TotalChildren,
        c.NumberCarsOwned,
        c.HouseOwnerFlag,
        g.EnglishCountryRegionName AS Country,
        g.StateProvinceName AS State,
        g.City,
        st.SalesTerritoryRegion,
        cflp.FirstPurchaseDate,
        cflp.LastPurchaseDate,
        cflp.CustomerTenureDays,
        ROUND(cflp.CustomerTenureDays / 365.25, 2) AS CustomerTenureYears,
        (CURRENT_DATE - cflp.LastPurchaseDate::DATE) AS DaysSinceLastPurchase,
        cflp.TotalOrders,
        cflp.TotalLineItems,
        COUNT(DISTINCT DATE(fis.OrderDate)) AS UniquePurchaseDays,
        SUM(fis.OrderQuantity) AS TotalUnits,
        ROUND(SUM(fis.SalesAmount), 2) AS LifetimeRevenue,
        ROUND(SUM(fis.SalesAmount - fis.TotalProductCost), 2) AS LifetimeGrossProfit,
        ROUND(100.0 * SUM(fis.SalesAmount - fis.TotalProductCost) / NULLIF(SUM(fis.SalesAmount), 0), 2) AS LifetimeMarginPct,
        ROUND(AVG(fis.SalesAmount), 2) AS AvgLineItemValue,
        ROUND(SUM(fis.SalesAmount) / NULLIF(cflp.TotalOrders, 0), 2) AS AvgOrderValue,
        ROUND(SUM(fis.DiscountAmount), 2) AS TotalDiscounts,
        ROUND(100.0 * SUM(fis.DiscountAmount) / NULLIF(SUM(fis.SalesAmount + fis.DiscountAmount), 0), 2) AS AvgDiscountPct,
        COUNT(DISTINCT pc.EnglishProductCategoryName) AS UniqueCategories,
        COUNT(DISTINCT fis.ProductKey) AS UniqueProducts
    FROM FactInternetSales fis
    INNER JOIN CustomerFirstLastPurchase cflp ON fis.CustomerKey = cflp.CustomerKey
    INNER JOIN DimCustomer c ON fis.CustomerKey = c.CustomerKey
    INNER JOIN DimGeography g ON c.GeographyKey = g.GeographyKey
    INNER JOIN DimSalesTerritory st ON g.SalesTerritoryKey = st.SalesTerritoryKey
    INNER JOIN DimProduct p ON fis.ProductKey = p.ProductKey
    LEFT JOIN DimProductSubcategory psc ON p.ProductSubcategoryKey = psc.ProductSubcategoryKey
    LEFT JOIN DimProductCategory pc ON psc.ProductCategoryKey = pc.ProductCategoryKey
    GROUP BY fis.CustomerKey, c.FirstName, c.LastName, c.YearlyIncome, c.EnglishEducation,
             c.EnglishOccupation, c.Gender, c.MaritalStatus, c.TotalChildren, c.NumberCarsOwned,
             c.HouseOwnerFlag, g.EnglishCountryRegionName, g.StateProvinceName, g.City,
             st.SalesTerritoryRegion, cflp.FirstPurchaseDate, cflp.LastPurchaseDate,
             cflp.CustomerTenureDays, cflp.TotalOrders, cflp.TotalLineItems
),

CustomerValueMetrics AS (
    SELECT
        clm.*,
        -- Annualized metrics
        ROUND(clm.LifetimeRevenue / NULLIF(clm.CustomerTenureYears, 0), 2) AS RevenuePerYear,
        ROUND(clm.LifetimeGrossProfit / NULLIF(clm.CustomerTenureYears, 0), 2) AS ProfitPerYear,
        ROUND(clm.TotalOrders / NULLIF(clm.CustomerTenureYears, 0), 2) AS OrdersPerYear,
        -- Purchase frequency (days between orders)
        ROUND(clm.CustomerTenureDays / NULLIF(clm.TotalOrders - 1, 0), 2) AS AvgDaysBetweenOrders,
        -- Customer activity ratio (purchase days / tenure days)
        ROUND(100.0 * clm.UniquePurchaseDays / NULLIF(clm.CustomerTenureDays, 0), 2) AS ActivityRatioPct,
        -- Recency score (0-100, higher is better/more recent)
        CASE
            WHEN clm.DaysSinceLastPurchase <= 30 THEN 100
            WHEN clm.DaysSinceLastPurchase <= 90 THEN 80
            WHEN clm.DaysSinceLastPurchase <= 180 THEN 60
            WHEN clm.DaysSinceLastPurchase <= 365 THEN 40
            WHEN clm.DaysSinceLastPurchase <= 730 THEN 20
            ELSE 0
        END AS RecencyScore,
        -- Frequency quintile (relative to other customers)
        NTILE(5) OVER (ORDER BY clm.TotalOrders DESC) AS FrequencyQuintile,
        -- Monetary quintile (relative to other customers)
        NTILE(5) OVER (ORDER BY clm.LifetimeRevenue DESC) AS MonetaryQuintile
    FROM CustomerLifetimeMetrics clm
),

CustomerValueScoring AS (
    SELECT
        cvm.*,
        -- Comprehensive value score (0-100)
        ROUND(
            (cvm.MonetaryQuintile * 20) +  -- Revenue contribution (40%)
            (cvm.FrequencyQuintile * 16) +  -- Purchase frequency (32%)
            (cvm.RecencyScore * 0.20) +     -- Recency (20%)
            (CASE WHEN cvm.LifetimeMarginPct > 30 THEN 8 ELSE cvm.LifetimeMarginPct * 0.267 END) -- Profitability (8%)
        , 2) AS CustomerValueScore,
        RANK() OVER (ORDER BY cvm.LifetimeRevenue DESC) AS RevenueRank,
        RANK() OVER (ORDER BY cvm.LifetimeGrossProfit DESC) AS ProfitRank
    FROM CustomerValueMetrics cvm
),

CustomerTiering AS (
    SELECT
        cvs.*,
        CASE
            WHEN cvs.CustomerValueScore >= 80 THEN 'Platinum'
            WHEN cvs.CustomerValueScore >= 60 THEN 'Gold'
            WHEN cvs.CustomerValueScore >= 40 THEN 'Silver'
            ELSE 'Bronze'
        END AS CustomerTier,
        CASE
            WHEN cvs.RevenueRank <= (SELECT COUNT(*) * 0.01 FROM CustomerValueScoring) THEN 'Top 1%'
            WHEN cvs.RevenueRank <= (SELECT COUNT(*) * 0.05 FROM CustomerValueScoring) THEN 'Top 5%'
            WHEN cvs.RevenueRank <= (SELECT COUNT(*) * 0.10 FROM CustomerValueScoring) THEN 'Top 10%'
            WHEN cvs.RevenueRank <= (SELECT COUNT(*) * 0.25 FROM CustomerValueScoring) THEN 'Top 25%'
            ELSE 'Below Top 25%'
        END AS RevenuePercentile
    FROM CustomerValueScoring cvs
),

DemographicByTier AS (
    SELECT
        ct.CustomerTier,
        COUNT(*) AS CustomerCount,
        ROUND(AVG(ct.YearlyIncome), 2) AS AvgYearlyIncome,
        ROUND(AVG(ct.LifetimeRevenue), 2) AS AvgLifetimeRevenue,
        ROUND(AVG(ct.LifetimeGrossProfit), 2) AS AvgLifetimeGrossProfit,
        ROUND(AVG(ct.CustomerTenureYears), 2) AS AvgTenureYears,
        ROUND(AVG(ct.TotalOrders), 2) AS AvgTotalOrders,
        ROUND(AVG(ct.AvgOrderValue), 2) AS AvgOrderValue,
        ROUND(AVG(ct.DaysSinceLastPurchase), 2) AS AvgDaysSinceLastPurchase,
        ROUND(SUM(ct.LifetimeRevenue), 2) AS TotalTierRevenue,
        ROUND(100.0 * SUM(ct.LifetimeRevenue) / (SELECT SUM(LifetimeRevenue) FROM CustomerTiering), 2) AS TierRevenueSharePct,
        MODE(ct.Education) AS MostCommonEducation,
        MODE(ct.Occupation) AS MostCommonOccupation
    FROM CustomerTiering ct
    GROUP BY ct.CustomerTier
)

SELECT
    ct.CustomerKey,
    ct.CustomerName,
    ct.CustomerTier,
    ct.RevenuePercentile,
    ct.CustomerValueScore,
    ct.Country,
    ct.State,
    ct.SalesTerritoryRegion,
    ct.YearlyIncome,
    ct.Education,
    ct.Occupation,
    ct.Gender,
    ct.MaritalStatus,
    ct.FirstPurchaseDate,
    ct.LastPurchaseDate,
    ct.CustomerTenureYears,
    ct.DaysSinceLastPurchase,
    ct.TotalOrders,
    ct.LifetimeRevenue,
    ct.LifetimeGrossProfit,
    ct.LifetimeMarginPct,
    ct.AvgOrderValue,
    ct.AvgLineItemValue,
    ct.RevenuePerYear,
    ct.ProfitPerYear,
    ct.OrdersPerYear,
    ct.AvgDaysBetweenOrders,
    ct.TotalDiscounts,
    ct.AvgDiscountPct,
    ct.UniqueCategories,
    ct.UniqueProducts,
    ct.RecencyScore,
    ct.FrequencyQuintile,
    ct.MonetaryQuintile,
    ct.RevenueRank,
    ct.ProfitRank
FROM CustomerTiering ct
ORDER BY ct.CustomerValueScore DESC, ct.LifetimeRevenue DESC;

"""
db.sql_print(sql)

┌─────────────┬───────────────────┬──────────────┬───────────────────┬────────────────────┬───────────────┬──────────────────┬──────────────────────┬───────────────┬─────────────────────┬────────────────┬─────────┬───────────────┬─────────────────────┬─────────────────────┬─────────────────────┬───────────────────────┬─────────────┬─────────────────┬─────────────────────┬───────────────────┬───────────────┬──────────────────┬────────────────┬───────────────┬───────────────┬──────────────────────┬────────────────┬────────────────┬──────────────────┬────────────────┬──────────────┬───────────────────┬──────────────────┬─────────────┬────────────┐
│ CustomerKey │   CustomerName    │ CustomerTier │ RevenuePercentile │ CustomerValueScore │    Country    │      State       │ SalesTerritoryRegion │ YearlyIncome  │      Education      │   Occupation   │ Gender  │ MaritalStatus │  FirstPurchaseDate  │  LastPurchaseDate   │ CustomerTenureYears │ DaysSinceLastPurchase │ TotalOrders │ LifetimeReve

## 02

In [14]:
sql = """
WITH CustomerPurchaseHistory AS (
    SELECT
        fis.CustomerKey,
        MIN(fis.OrderDate) AS FirstPurchaseDate,
        MAX(fis.OrderDate) AS LastPurchaseDate,
        COUNT(DISTINCT fis.SalesOrderNumber) AS TotalOrders,
        (CURRENT_DATE - MAX(fis.OrderDate)::DATE) AS DaysSinceLastPurchase,
        (MAX(fis.OrderDate)::DATE - MIN(fis.OrderDate)::DATE) AS CustomerTenureDays,
        ROUND(SUM(fis.SalesAmount), 2) AS TotalRevenue,
        ROUND(SUM(fis.SalesAmount - fis.TotalProductCost), 2) AS TotalGrossProfit,
        ROUND(AVG(fis.SalesAmount), 2) AS AvgTransactionValue
    FROM FactInternetSales fis
    GROUP BY fis.CustomerKey
),

RFMScores AS (
    SELECT
        cph.CustomerKey,
        c.FirstName || ' ' || c.LastName AS CustomerName,
        c.YearlyIncome,
        c.EnglishEducation AS Education,
        c.EnglishOccupation AS Occupation,
        g.EnglishCountryRegionName AS Country,
        st.SalesTerritoryRegion,
        cph.FirstPurchaseDate,
        cph.LastPurchaseDate,
        cph.DaysSinceLastPurchase,
        cph.CustomerTenureDays,
        cph.TotalOrders,
        cph.TotalRevenue,
        cph.TotalGrossProfit,
        cph.AvgTransactionValue,
        -- Recency Score (1-5, 5 is best/most recent)
        CASE
            WHEN cph.DaysSinceLastPurchase <= 60 THEN 5
            WHEN cph.DaysSinceLastPurchase <= 120 THEN 4
            WHEN cph.DaysSinceLastPurchase <= 240 THEN 3
            WHEN cph.DaysSinceLastPurchase <= 480 THEN 2
            ELSE 1
        END AS RecencyScore,
        -- Frequency Score (1-5, 5 is best/most frequent)
        NTILE(5) OVER (ORDER BY cph.TotalOrders ASC) AS FrequencyScore,
        -- Monetary Score (1-5, 5 is best/highest value)
        NTILE(5) OVER (ORDER BY cph.TotalRevenue ASC) AS MonetaryScore
    FROM CustomerPurchaseHistory cph
    INNER JOIN DimCustomer c ON cph.CustomerKey = c.CustomerKey
    INNER JOIN DimGeography g ON c.GeographyKey = g.GeographyKey
    INNER JOIN DimSalesTerritory st ON g.SalesTerritoryKey = st.SalesTerritoryKey
),

RFMSegmentation AS (
    SELECT
        rfm.*,
        -- Combined RFM score (concatenated for segment definition)
        CAST(rfm.RecencyScore AS TEXT) || CAST(rfm.FrequencyScore AS TEXT) || CAST(rfm.MonetaryScore AS TEXT) AS RFMString,
        -- Average RFM score
        ROUND((rfm.RecencyScore + rfm.FrequencyScore + rfm.MonetaryScore) / 3.0, 2) AS AvgRFMScore,
        -- Segment classification based on RFM patterns
        CASE
            -- Champions: High R, F, M
            WHEN rfm.RecencyScore >= 4 AND rfm.FrequencyScore >= 4 AND rfm.MonetaryScore >= 4 THEN 'Champions'
            -- Loyal Customers: High F, M but moderate R
            WHEN rfm.FrequencyScore >= 4 AND rfm.MonetaryScore >= 4 AND rfm.RecencyScore >= 3 THEN 'Loyal Customers'
            -- Potential Loyalists: Recent, moderate frequency and monetary
            WHEN rfm.RecencyScore >= 4 AND rfm.FrequencyScore >= 3 AND rfm.MonetaryScore >= 3 THEN 'Potential Loyalists'
            -- New Customers: High recency, low frequency
            WHEN rfm.RecencyScore >= 4 AND rfm.FrequencyScore <= 2 THEN 'New Customers'
            -- Promising: Recent, low-moderate F and M
            WHEN rfm.RecencyScore >= 4 AND rfm.FrequencyScore <= 3 AND rfm.MonetaryScore <= 3 THEN 'Promising'
            -- Need Attention: Moderate across board
            WHEN rfm.RecencyScore = 3 AND rfm.FrequencyScore >= 3 AND rfm.MonetaryScore >= 3 THEN 'Need Attention'
            -- About To Sleep: Declining recency, was active
            WHEN rfm.RecencyScore = 2 AND rfm.FrequencyScore >= 3 AND rfm.MonetaryScore >= 3 THEN 'About To Sleep'
            -- At Risk: Low recency, was valuable
            WHEN rfm.RecencyScore <= 2 AND rfm.FrequencyScore >= 4 AND rfm.MonetaryScore >= 4 THEN 'At Risk'
            -- Cannot Lose Them: Lowest recency, high historical value
            WHEN rfm.RecencyScore = 1 AND rfm.FrequencyScore >= 4 AND rfm.MonetaryScore >= 4 THEN 'Cannot Lose Them'
            -- Hibernating: Low recency, moderate F and M
            WHEN rfm.RecencyScore <= 2 AND rfm.FrequencyScore <= 3 AND rfm.MonetaryScore <= 3 THEN 'Hibernating'
            -- Lost: Lowest across all dimensions
            WHEN rfm.RecencyScore = 1 AND rfm.FrequencyScore <= 2 AND rfm.MonetaryScore <= 2 THEN 'Lost'
            ELSE 'Others'
        END AS RFMSegment
    FROM RFMScores rfm
),

SegmentCharacteristics AS (
    SELECT
        rfms.RFMSegment,
        COUNT(*) AS CustomerCount,
        ROUND(100.0 * COUNT(*) / (SELECT COUNT(*) FROM RFMSegmentation), 2) AS SegmentPct,
        ROUND(AVG(rfms.RecencyScore), 2) AS AvgRecencyScore,
        ROUND(AVG(rfms.FrequencyScore), 2) AS AvgFrequencyScore,
        ROUND(AVG(rfms.MonetaryScore), 2) AS AvgMonetaryScore,
        ROUND(AVG(rfms.DaysSinceLastPurchase), 0) AS AvgDaysSinceLastPurchase,
        ROUND(AVG(rfms.TotalOrders), 2) AS AvgTotalOrders,
        ROUND(AVG(rfms.TotalRevenue), 2) AS AvgLifetimeRevenue,
        ROUND(AVG(rfms.TotalGrossProfit), 2) AS AvgLifetimeProfit,
        ROUND(SUM(rfms.TotalRevenue), 2) AS TotalSegmentRevenue,
        ROUND(100.0 * SUM(rfms.TotalRevenue) / (SELECT SUM(TotalRevenue) FROM RFMSegmentation), 2) AS RevenueSharePct,
        ROUND(AVG(rfms.YearlyIncome), 2) AS AvgYearlyIncome,
        MIN(rfms.TotalRevenue) AS MinLifetimeRevenue,
        MAX(rfms.TotalRevenue) AS MaxLifetimeRevenue
    FROM RFMSegmentation rfms
    GROUP BY rfms.RFMSegment
),

MarketingRecommendations AS (
    SELECT
        rfms.CustomerKey,
        rfms.CustomerName,
        rfms.RFMSegment,
        rfms.RecencyScore,
        rfms.FrequencyScore,
        rfms.MonetaryScore,
        rfms.AvgRFMScore,
        rfms.DaysSinceLastPurchase,
        rfms.TotalOrders,
        rfms.TotalRevenue,
        rfms.TotalGrossProfit,
        rfms.Country,
        rfms.SalesTerritoryRegion,
        rfms.YearlyIncome,
        -- Marketing action recommendations
        CASE
            WHEN rfms.RFMSegment = 'Champions' THEN 'Reward with VIP benefits, early access, exclusive offers'
            WHEN rfms.RFMSegment = 'Loyal Customers' THEN 'Upsell premium products, loyalty program, referral incentives'
            WHEN rfms.RFMSegment = 'Potential Loyalists' THEN 'Nurture with membership offers, personalized recommendations'
            WHEN rfms.RFMSegment = 'New Customers' THEN 'Onboarding campaigns, product education, welcome discounts'
            WHEN rfms.RFMSegment = 'Promising' THEN 'Cross-sell campaigns, bundle offers, engagement emails'
            WHEN rfms.RFMSegment = 'Need Attention' THEN 'Re-engagement campaigns, limited-time offers, feedback surveys'
            WHEN rfms.RFMSegment = 'About To Sleep' THEN 'Win-back campaigns, personalized discounts, reminder emails'
            WHEN rfms.RFMSegment = 'At Risk' THEN 'Urgent win-back offers, satisfaction surveys, retention discounts'
            WHEN rfms.RFMSegment = 'Cannot Lose Them' THEN 'HIGH PRIORITY: Executive outreach, special recovery offers'
            WHEN rfms.RFMSegment = 'Hibernating' THEN 'Low-cost reactivation, seasonal promotions, product updates'
            WHEN rfms.RFMSegment = 'Lost' THEN 'Minimal investment, brand awareness only, or exclude from campaigns'
            ELSE 'Standard marketing communications'
        END AS MarketingAction,
        -- Priority level
        CASE
            WHEN rfms.RFMSegment IN ('Champions', 'Loyal Customers', 'Cannot Lose Them') THEN 'High Priority'
            WHEN rfms.RFMSegment IN ('Potential Loyalists', 'At Risk', 'Need Attention') THEN 'Medium Priority'
            WHEN rfms.RFMSegment IN ('New Customers', 'Promising', 'About To Sleep') THEN 'Moderate Priority'
            ELSE 'Low Priority'
        END AS MarketingPriority,
        -- Expected ROI category
        CASE
            WHEN rfms.RFMSegment IN ('Champions', 'Loyal Customers', 'Potential Loyalists') THEN 'High ROI Expected'
            WHEN rfms.RFMSegment IN ('New Customers', 'Promising', 'At Risk', 'Cannot Lose Them') THEN 'Medium ROI Expected'
            ELSE 'Low ROI Expected'
        END AS ExpectedROI
    FROM RFMSegmentation rfms
)

SELECT
    mr.CustomerKey,
    mr.CustomerName,
    mr.RFMSegment,
    mr.RecencyScore,
    mr.FrequencyScore,
    mr.MonetaryScore,
    mr.AvgRFMScore,
    mr.DaysSinceLastPurchase,
    mr.TotalOrders,
    mr.TotalRevenue,
    mr.TotalGrossProfit,
    mr.Country,
    mr.SalesTerritoryRegion,
    mr.YearlyIncome,
    mr.MarketingAction,
    mr.MarketingPriority,
    mr.ExpectedROI
FROM MarketingRecommendations mr
ORDER BY
    CASE mr.RFMSegment
        WHEN 'Champions' THEN 1
        WHEN 'Loyal Customers' THEN 2
        WHEN 'Cannot Lose Them' THEN 3
        WHEN 'At Risk' THEN 4
        WHEN 'Potential Loyalists' THEN 5
        WHEN 'Need Attention' THEN 6
        WHEN 'About To Sleep' THEN 7
        WHEN 'New Customers' THEN 8
        WHEN 'Promising' THEN 9
        WHEN 'Hibernating' THEN 10
        WHEN 'Lost' THEN 11
        ELSE 12
    END,
    mr.TotalRevenue DESC;
"""
db.sql_print(sql)

┌─────────────┬───────────────────┬─────────────┬──────────────┬────────────────┬───────────────┬─────────────┬───────────────────────┬─────────────┬───────────────┬──────────────────┬───────────────┬──────────────────────┬───────────────┬───────────────────────────────────────────────────────────────────┬───────────────────┬─────────────────────┐
│ CustomerKey │   CustomerName    │ RFMSegment  │ RecencyScore │ FrequencyScore │ MonetaryScore │ AvgRFMScore │ DaysSinceLastPurchase │ TotalOrders │ TotalRevenue  │ TotalGrossProfit │    Country    │ SalesTerritoryRegion │ YearlyIncome  │                          MarketingAction                          │ MarketingPriority │     ExpectedROI     │
│    int32    │      varchar      │   varchar   │    int32     │     int64      │     int64     │   double    │         int64         │    int64    │ decimal(38,2) │  decimal(38,2)   │    varchar    │       varchar        │ decimal(13,2) │                              varchar                        

## 03

In [16]:
sql = """
WITH CustomerPurchaseTimeline AS (
    SELECT
        fis.CustomerKey,
        fis.OrderDate,
        fis.SalesOrderNumber,
        fis.SalesAmount,
        fis.SalesAmount - fis.TotalProductCost AS GrossProfit,
        ROW_NUMBER() OVER (PARTITION BY fis.CustomerKey ORDER BY fis.OrderDate) AS PurchaseSequence,
        COUNT(DISTINCT fis.SalesOrderNumber) OVER (PARTITION BY fis.CustomerKey ORDER BY fis.OrderDate) AS CumulativeOrders,
        SUM(fis.SalesAmount) OVER (PARTITION BY fis.CustomerKey ORDER BY fis.OrderDate) AS CumulativeRevenue,
        LAG(fis.OrderDate) OVER (PARTITION BY fis.CustomerKey ORDER BY fis.OrderDate) AS PreviousOrderDate,
        (fis.OrderDate::DATE - (LAG(fis.OrderDate) OVER (PARTITION BY fis.CustomerKey ORDER BY fis.OrderDate))::DATE) AS DaysSincePreviousOrder
    FROM FactInternetSales fis
),

CustomerCurrentState AS (
    SELECT
        cpt.CustomerKey,
        MIN(cpt.OrderDate) AS FirstPurchaseDate,
        MAX(cpt.OrderDate) AS LastPurchaseDate,
        (CURRENT_DATE - MIN(cpt.OrderDate)::DATE) AS CustomerAgeDays,
        (CURRENT_DATE - MAX(cpt.OrderDate)::DATE) AS DaysSinceLastPurchase,
        (MAX(cpt.OrderDate)::DATE - MIN(cpt.OrderDate)::DATE) AS ActiveLifespanDays,
        COUNT(DISTINCT cpt.SalesOrderNumber) AS TotalOrders,
        MAX(cpt.CumulativeOrders) AS MaxOrders,
        ROUND(SUM(cpt.SalesAmount), 2) AS TotalRevenue,
        ROUND(SUM(cpt.GrossProfit), 2) AS TotalGrossProfit,
        ROUND(AVG(cpt.SalesAmount), 2) AS AvgTransactionValue,
        ROUND(AVG(cpt.DaysSincePreviousOrder), 2) AS AvgDaysBetweenOrders,
        ROUND(CAST((MAX(cpt.OrderDate)::DATE - MIN(cpt.OrderDate)::DATE) AS DOUBLE) / NULLIF(COUNT(DISTINCT cpt.SalesOrderNumber) - 1, 0), 2) AS AvgPurchaseCycleDays
    FROM CustomerPurchaseTimeline cpt
    GROUP BY cpt.CustomerKey
),

LifecycleStageAssignment AS (
    SELECT
        ccs.CustomerKey,
        c.FirstName || ' ' || c.LastName AS CustomerName,
        c.YearlyIncome,
        c.EnglishEducation AS Education,
        c.EnglishOccupation AS Occupation,
        g.EnglishCountryRegionName AS Country,
        st.SalesTerritoryRegion,
        ccs.FirstPurchaseDate,
        ccs.LastPurchaseDate,
        ccs.CustomerAgeDays,
        ROUND(ccs.CustomerAgeDays / 365.25, 2) AS CustomerAgeYears,
        ccs.DaysSinceLastPurchase,
        ccs.ActiveLifespanDays,
        ccs.TotalOrders,
        ccs.TotalRevenue,
        ccs.TotalGrossProfit,
        ccs.AvgTransactionValue,
        ccs.AvgDaysBetweenOrders,
        ccs.AvgPurchaseCycleDays,
        -- Lifecycle Stage Logic
        CASE
            -- Churned: No purchase in over 1 year
            WHEN ccs.DaysSinceLastPurchase > 365 THEN 'Churned'
            -- At-Risk: No purchase in 6-12 months, had been active
            WHEN ccs.DaysSinceLastPurchase > 180 AND ccs.TotalOrders >= 3 THEN 'At-Risk'
            -- New: Less than 90 days old, 1-2 orders
            WHEN ccs.CustomerAgeDays <= 90 AND ccs.TotalOrders <= 2 THEN 'New'
            -- Developing: 90-180 days old OR 2-4 orders
            WHEN (ccs.CustomerAgeDays BETWEEN 91 AND 180) OR (ccs.TotalOrders BETWEEN 2 AND 4) THEN 'Developing'
            -- Growing: 180-365 days old OR 5-10 orders
            WHEN (ccs.CustomerAgeDays BETWEEN 181 AND 365) OR (ccs.TotalOrders BETWEEN 5 AND 10) THEN 'Growing'
            -- Mature: Over 1 year old AND 11+ orders
            WHEN ccs.CustomerAgeDays > 365 AND ccs.TotalOrders >= 11 AND ccs.DaysSinceLastPurchase <= 180 THEN 'Mature'
            -- Inactive: Doesn't fit other categories, low engagement
            ELSE 'Inactive'
        END AS LifecycleStage
    FROM CustomerCurrentState ccs
    INNER JOIN DimCustomer c ON ccs.CustomerKey = c.CustomerKey
    INNER JOIN DimGeography g ON c.GeographyKey = g.GeographyKey
    INNER JOIN DimSalesTerritory st ON g.SalesTerritoryKey = st.SalesTerritoryKey
),

StageCharacteristics AS (
    SELECT
        lsa.LifecycleStage,
        COUNT(*) AS CustomerCount,
        ROUND(100.0 * COUNT(*) / (SELECT COUNT(*) FROM LifecycleStageAssignment), 2) AS StagePct,
        ROUND(AVG(lsa.CustomerAgeYears), 2) AS AvgCustomerAgeYears,
        ROUND(AVG(lsa.DaysSinceLastPurchase), 0) AS AvgDaysSinceLastPurchase,
        ROUND(AVG(lsa.TotalOrders), 2) AS AvgTotalOrders,
        ROUND(AVG(lsa.TotalRevenue), 2) AS AvgLifetimeRevenue,
        ROUND(AVG(lsa.TotalGrossProfit), 2) AS AvgLifetimeProfit,
        ROUND(AVG(lsa.AvgTransactionValue), 2) AS AvgTransactionValue,
        ROUND(AVG(lsa.AvgDaysBetweenOrders), 0) AS AvgDaysBetweenOrders,
        ROUND(SUM(lsa.TotalRevenue), 2) AS TotalStageRevenue,
        ROUND(100.0 * SUM(lsa.TotalRevenue) / (SELECT SUM(TotalRevenue) FROM LifecycleStageAssignment), 2) AS RevenueSharePct,
        ROUND(AVG(lsa.YearlyIncome), 2) AS AvgYearlyIncome
    FROM LifecycleStageAssignment lsa
    GROUP BY lsa.LifecycleStage
),

StageTransitionOpportunities AS (
    SELECT
        lsa.CustomerKey,
        lsa.CustomerName,
        lsa.LifecycleStage,
        lsa.CustomerAgeYears,
        lsa.DaysSinceLastPurchase,
        lsa.TotalOrders,
        lsa.TotalRevenue,
        lsa.TotalGrossProfit,
        lsa.AvgDaysBetweenOrders,
        -- Next stage and requirements
        CASE lsa.LifecycleStage
            WHEN 'New' THEN 'Developing'
            WHEN 'Developing' THEN 'Growing'
            WHEN 'Growing' THEN 'Mature'
            WHEN 'Mature' THEN 'Retain as Mature'
            WHEN 'At-Risk' THEN 'Reactivate to Growing'
            WHEN 'Churned' THEN 'Win Back to New'
            WHEN 'Inactive' THEN 'Activate to Developing'
        END AS NextTargetStage,
        -- Gap to next stage
        CASE lsa.LifecycleStage
            WHEN 'New' THEN CONCAT(GREATEST(0, 3 - lsa.TotalOrders), ' more orders OR ', GREATEST(0, 91 - lsa.CustomerAgeDays), ' more days')
            WHEN 'Developing' THEN CONCAT(GREATEST(0, 5 - lsa.TotalOrders), ' more orders needed for Growing stage')
            WHEN 'Growing' THEN CONCAT(GREATEST(0, 11 - lsa.TotalOrders), ' more orders needed for Mature stage')
            WHEN 'At-Risk' THEN CONCAT('Purchase within ', GREATEST(0, 180 - lsa.DaysSinceLastPurchase), ' days to avoid churn')
            WHEN 'Churned' THEN 'Win-back campaign required'
            ELSE 'N/A'
        END AS StageProgressionGap,
        -- Recommended action
        CASE lsa.LifecycleStage
            WHEN 'New' THEN 'Onboarding campaign: Product education, repeat purchase incentive'
            WHEN 'Developing' THEN 'Engagement campaign: Cross-sell, loyalty program enrollment'
            WHEN 'Growing' THEN 'Expansion campaign: Premium tiers, volume discounts'
            WHEN 'Mature' THEN 'Retention campaign: VIP benefits, exclusive access'
            WHEN 'At-Risk' THEN 'URGENT: Re-engagement campaign, win-back offer'
            WHEN 'Churned' THEN 'Win-back campaign: Reactivation incentive'
            WHEN 'Inactive' THEN 'Activation campaign: Limited-time promotion'
        END AS RecommendedAction,
        RANK() OVER (PARTITION BY lsa.LifecycleStage ORDER BY lsa.TotalRevenue DESC) AS StageRevenueRank
    FROM LifecycleStageAssignment lsa
)

SELECT
    sto.CustomerKey,
    sto.CustomerName,
    sto.LifecycleStage,
    sto.CustomerAgeYears,
    sto.DaysSinceLastPurchase,
    sto.TotalOrders,
    sto.TotalRevenue,
    sto.TotalGrossProfit,
    sto.AvgDaysBetweenOrders,
    sto.NextTargetStage,
    sto.StageProgressionGap,
    sto.RecommendedAction,
    sto.StageRevenueRank,
    CASE
        WHEN sto.LifecycleStage = 'At-Risk' THEN 'High'
        WHEN sto.LifecycleStage IN ('New', 'Developing') THEN 'Medium'
        WHEN sto.LifecycleStage = 'Churned' THEN 'Low'
        ELSE 'Stable'
    END AS InterventionPriority
FROM StageTransitionOpportunities sto
ORDER BY
    CASE sto.LifecycleStage
        WHEN 'At-Risk' THEN 1
        WHEN 'Mature' THEN 2
        WHEN 'Growing' THEN 3
        WHEN 'Developing' THEN 4
        WHEN 'New' THEN 5
        WHEN 'Inactive' THEN 6
        WHEN 'Churned' THEN 7
    END,
    sto.TotalRevenue DESC;
"""
db.sql_print(sql)

┌─────────────┬───────────────────┬────────────────┬──────────────────┬───────────────────────┬─────────────┬───────────────┬──────────────────┬──────────────────────┬─────────────────┬────────────────────────────┬───────────────────────────────────────────┬──────────────────┬──────────────────────┐
│ CustomerKey │   CustomerName    │ LifecycleStage │ CustomerAgeYears │ DaysSinceLastPurchase │ TotalOrders │ TotalRevenue  │ TotalGrossProfit │ AvgDaysBetweenOrders │ NextTargetStage │    StageProgressionGap     │             RecommendedAction             │ StageRevenueRank │ InterventionPriority │
│    int32    │      varchar      │    varchar     │      double      │         int64         │    int64    │ decimal(38,2) │  decimal(38,2)   │        double        │     varchar     │          varchar           │                  varchar                  │      int64       │       varchar        │
├─────────────┼───────────────────┼────────────────┼──────────────────┼───────────────────────┼──

## 04

In [17]:
sql = """
WITH CustomerValueTier AS (
    SELECT
        fis.CustomerKey,
        ROUND(SUM(fis.SalesAmount), 2) AS LifetimeRevenue,
        COUNT(DISTINCT fis.SalesOrderNumber) AS TotalOrders,
        NTILE(4) OVER (ORDER BY SUM(fis.SalesAmount) DESC) AS ValueQuartile,
        CASE
            WHEN NTILE(4) OVER (ORDER BY SUM(fis.SalesAmount) DESC) = 1 THEN 'High Value'
            WHEN NTILE(4) OVER (ORDER BY SUM(fis.SalesAmount) DESC) = 2 THEN 'Medium-High Value'
            WHEN NTILE(4) OVER (ORDER BY SUM(fis.SalesAmount) DESC) = 3 THEN 'Medium-Low Value'
            ELSE 'Low Value'
        END AS ValueTier
    FROM FactInternetSales fis
    GROUP BY fis.CustomerKey
),

CustomerCategoryPurchases AS (
    SELECT
        fis.CustomerKey,
        pc.EnglishProductCategoryName AS CategoryName,
        MIN(fis.OrderDate) AS FirstCategoryPurchaseDate,
        MAX(fis.OrderDate) AS LastCategoryPurchaseDate,
        COUNT(DISTINCT fis.SalesOrderNumber) AS CategoryOrders,
        COUNT(*) AS CategoryLineItems,
        ROUND(SUM(fis.SalesAmount), 2) AS CategoryRevenue,
        ROUND(SUM(fis.SalesAmount - fis.TotalProductCost), 2) AS CategoryGrossProfit,
        COUNT(DISTINCT fis.ProductKey) AS UniqueProductsInCategory
    FROM FactInternetSales fis
    INNER JOIN DimProduct p ON fis.ProductKey = p.ProductKey
    LEFT JOIN DimProductSubcategory psc ON p.ProductSubcategoryKey = psc.ProductSubcategoryKey
    LEFT JOIN DimProductCategory pc ON psc.ProductCategoryKey = pc.ProductCategoryKey
    WHERE pc.EnglishProductCategoryName IS NOT NULL
    GROUP BY fis.CustomerKey, pc.EnglishProductCategoryName
),

AllCategories AS (
    SELECT DISTINCT EnglishProductCategoryName AS CategoryName
    FROM DimProductCategory
    WHERE EnglishProductCategoryName IS NOT NULL
),

CustomerCategoryMatrix AS (
    SELECT
        cvt.CustomerKey,
        cvt.ValueTier,
        cvt.LifetimeRevenue,
        cvt.TotalOrders,
        ac.CategoryName,
        COALESCE(ccp.CategoryOrders, 0) AS CategoryOrders,
        COALESCE(ccp.CategoryRevenue, 0) AS CategoryRevenue,
        COALESCE(ccp.CategoryGrossProfit, 0) AS CategoryGrossProfit,
        CASE WHEN ccp.CustomerKey IS NOT NULL THEN 1 ELSE 0 END AS HasPurchasedCategory,
        ccp.FirstCategoryPurchaseDate,
        ccp.LastCategoryPurchaseDate
    FROM CustomerValueTier cvt
    CROSS JOIN AllCategories ac
    LEFT JOIN CustomerCategoryPurchases ccp
        ON cvt.CustomerKey = ccp.CustomerKey
        AND ac.CategoryName = ccp.CategoryName
),

CustomerCategorySummary AS (
    SELECT
        ccm.CustomerKey,
        c.FirstName || ' ' || c.LastName AS CustomerName,
        ccm.ValueTier,
        ccm.LifetimeRevenue,
        ccm.TotalOrders,
        c.YearlyIncome,
        g.EnglishCountryRegionName AS Country,
        st.SalesTerritoryRegion,
        COUNT(*) AS TotalCategories,
        SUM(ccm.HasPurchasedCategory) AS CategoriesPurchased,
        COUNT(*) - SUM(ccm.HasPurchasedCategory) AS CategoriesNotPurchased,
        ROUND(100.0 * SUM(ccm.HasPurchasedCategory) / COUNT(*), 2) AS CategoryPenetrationPct,
        STRING_AGG(CASE WHEN ccm.HasPurchasedCategory = 1 THEN ccm.CategoryName END, ', ') AS PurchasedCategories,
        STRING_AGG(CASE WHEN ccm.HasPurchasedCategory = 0 THEN ccm.CategoryName END, ', ') AS UnpurchasedCategories
    FROM CustomerCategoryMatrix ccm
    INNER JOIN DimCustomer c ON ccm.CustomerKey = c.CustomerKey
    INNER JOIN DimGeography g ON c.GeographyKey = g.GeographyKey
    INNER JOIN DimSalesTerritory st ON g.SalesTerritoryKey = st.SalesTerritoryKey
    GROUP BY ccm.CustomerKey, c.FirstName, c.LastName, ccm.ValueTier, ccm.LifetimeRevenue,
             ccm.TotalOrders, c.YearlyIncome, g.EnglishCountryRegionName, st.SalesTerritoryRegion
),

CategoryAffinityPatterns AS (
    -- Find which categories are commonly purchased together
    SELECT
        ccp1.CategoryName AS Category1,
        ccp2.CategoryName AS Category2,
        COUNT(DISTINCT ccp1.CustomerKey) AS CustomerCount,
        ROUND(AVG(ccp1.CategoryRevenue + ccp2.CategoryRevenue), 2) AS AvgCombinedRevenue
    FROM CustomerCategoryPurchases ccp1
    INNER JOIN CustomerCategoryPurchases ccp2
        ON ccp1.CustomerKey = ccp2.CustomerKey
        AND ccp1.CategoryName < ccp2.CategoryName
    GROUP BY ccp1.CategoryName, ccp2.CategoryName
    HAVING COUNT(DISTINCT ccp1.CustomerKey) >= 10
),

CrossSellOpportunities AS (
    SELECT
        ccm.CustomerKey,
        ccs.CustomerName,
        ccs.ValueTier,
        ccs.LifetimeRevenue,
        ccs.TotalOrders,
        ccs.YearlyIncome,
        ccs.Country,
        ccs.CategoryPenetrationPct,
        ccm.CategoryName AS OpportunityCategory,
        cap.Category1 AS OwnedCategory,
        cap.CustomerCount AS AffinityStrength,
        cap.AvgCombinedRevenue AS PotentialRevenue,
        RANK() OVER (PARTITION BY ccm.CustomerKey ORDER BY cap.CustomerCount DESC, cap.AvgCombinedRevenue DESC) AS OpportunityRank
    FROM CustomerCategoryMatrix ccm
    INNER JOIN CustomerCategorySummary ccs ON ccm.CustomerKey = ccs.CustomerKey
    INNER JOIN CategoryAffinityPatterns cap
        ON ccm.CategoryName = cap.Category2
        AND ccm.HasPurchasedCategory = 0
    INNER JOIN CustomerCategoryMatrix ccm_owned
        ON ccm.CustomerKey = ccm_owned.CustomerKey
        AND cap.Category1 = ccm_owned.CategoryName
        AND ccm_owned.HasPurchasedCategory = 1
    WHERE ccs.ValueTier IN ('High Value', 'Medium-High Value')
),

TopCrossSellOpportunities AS (
    SELECT
        cso.CustomerKey,
        cso.CustomerName,
        cso.ValueTier,
        cso.LifetimeRevenue,
        cso.TotalOrders,
        cso.YearlyIncome,
        cso.Country,
        cso.CategoryPenetrationPct,
        cso.OpportunityCategory AS RecommendedCategory,
        cso.OwnedCategory AS BasedOnOwnership,
        cso.AffinityStrength,
        cso.PotentialRevenue,
        cso.OpportunityRank,
        CASE
            WHEN cso.OpportunityRank = 1 THEN 'Top Priority'
            WHEN cso.OpportunityRank <= 3 THEN 'High Priority'
            WHEN cso.OpportunityRank <= 5 THEN 'Medium Priority'
            ELSE 'Low Priority'
        END AS CrossSellPriority,
        CASE
            WHEN cso.ValueTier = 'High Value' AND cso.OpportunityRank = 1 THEN 'Personalized outreach with premium offer'
            WHEN cso.ValueTier = 'High Value' THEN 'Targeted email with category-specific benefits'
            WHEN cso.ValueTier = 'Medium-High Value' THEN 'Automated campaign with bundle discount'
            ELSE 'Include in general cross-sell communications'
        END AS RecommendedApproach
    FROM CrossSellOpportunities cso
    WHERE cso.OpportunityRank <= 5
)

SELECT
    tcso.CustomerKey,
    tcso.CustomerName,
    tcso.ValueTier,
    tcso.LifetimeRevenue,
    tcso.TotalOrders,
    tcso.YearlyIncome,
    tcso.Country,
    tcso.CategoryPenetrationPct,
    tcso.RecommendedCategory,
    tcso.BasedOnOwnership,
    tcso.AffinityStrength,
    tcso.PotentialRevenue,
    tcso.OpportunityRank,
    tcso.CrossSellPriority,
    tcso.RecommendedApproach
FROM TopCrossSellOpportunities tcso
ORDER BY tcso.ValueTier, tcso.LifetimeRevenue DESC, tcso.OpportunityRank;
"""
db.sql_print(sql)

┌─────────────┬───────────────────┬───────────────────┬─────────────────┬─────────────┬───────────────┬────────────────┬────────────────────────┬─────────────────────┬──────────────────┬──────────────────┬──────────────────┬─────────────────┬───────────────────┬────────────────────────────────────────────────┐
│ CustomerKey │   CustomerName    │     ValueTier     │ LifetimeRevenue │ TotalOrders │ YearlyIncome  │    Country     │ CategoryPenetrationPct │ RecommendedCategory │ BasedOnOwnership │ AffinityStrength │ PotentialRevenue │ OpportunityRank │ CrossSellPriority │              RecommendedApproach               │
│    int32    │      varchar      │      varchar      │  decimal(38,2)  │    int64    │ decimal(13,2) │    varchar     │         double         │       varchar       │     varchar      │      int64       │      double      │      int64      │      varchar      │                    varchar                     │
├─────────────┼───────────────────┼───────────────────┼─────────

## 05

In [19]:
sql = """
WITH CustomerPurchaseMetrics AS (
    SELECT
        fis.CustomerKey,
        MIN(fis.OrderDate) AS FirstPurchaseDate,
        MAX(fis.OrderDate) AS LastPurchaseDate,
        COUNT(DISTINCT fis.SalesOrderNumber) AS TotalOrders,
        (CURRENT_DATE - MAX(fis.OrderDate)::DATE) AS DaysSinceLastPurchase,
        (MAX(fis.OrderDate)::DATE - MIN(fis.OrderDate)::DATE) AS CustomerTenureDays,
        ROUND(CAST((MAX(fis.OrderDate)::DATE - MIN(fis.OrderDate)::DATE) AS DOUBLE) / NULLIF(COUNT(DISTINCT fis.SalesOrderNumber) - 1, 0), 2) AS AvgDaysBetweenOrders,
        ROUND(SUM(fis.SalesAmount), 2) AS LifetimeRevenue,
        ROUND(SUM(fis.SalesAmount - fis.TotalProductCost), 2) AS LifetimeGrossProfit,
        ROUND(AVG(fis.SalesAmount), 2) AS AvgTransactionValue,
        ROUND(SUM(fis.DiscountAmount), 2) AS TotalDiscounts
    FROM FactInternetSales fis
    GROUP BY fis.CustomerKey
),

RecentActivityTrends AS (
    -- Compare recent 3 months vs. previous 3 months
    SELECT
        fis.CustomerKey,
        COUNT(DISTINCT CASE WHEN (CURRENT_DATE - fis.OrderDate::DATE) <= 90 THEN fis.SalesOrderNumber END) AS OrdersLast90Days,
        COUNT(DISTINCT CASE WHEN (CURRENT_DATE - fis.OrderDate::DATE) BETWEEN 91 AND 180 THEN fis.SalesOrderNumber END) AS OrdersPrevious90Days,
        ROUND(SUM(CASE WHEN (CURRENT_DATE - fis.OrderDate::DATE) <= 90 THEN fis.SalesAmount ELSE 0 END), 2) AS RevenueLast90Days,
        ROUND(SUM(CASE WHEN (CURRENT_DATE - fis.OrderDate::DATE) BETWEEN 91 AND 180 THEN fis.SalesAmount ELSE 0 END), 2) AS RevenuePrevious90Days
    FROM FactInternetSales fis
    GROUP BY fis.CustomerKey
),

ChurnRiskFactors AS (
    SELECT
        cpm.CustomerKey,
        c.FirstName || ' ' || c.LastName AS CustomerName,
        c.YearlyIncome,
        c.EnglishEducation AS Education,
        c.EnglishOccupation AS Occupation,
        g.EnglishCountryRegionName AS Country,
        st.SalesTerritoryRegion,
        cpm.FirstPurchaseDate,
        cpm.LastPurchaseDate,
        cpm.DaysSinceLastPurchase,
        cpm.CustomerTenureDays,
        ROUND(cpm.CustomerTenureDays / 365.25, 2) AS CustomerTenureYears,
        cpm.TotalOrders,
        cpm.AvgDaysBetweenOrders,
        cpm.LifetimeRevenue,
        cpm.LifetimeGrossProfit,
        cpm.AvgTransactionValue,
        rat.OrdersLast90Days,
        rat.OrdersPrevious90Days,
        rat.RevenueLast90Days,
        rat.RevenuePrevious90Days,
        -- Risk Factor 1: Recency Risk (0-35 points)
        CASE
            WHEN cpm.DaysSinceLastPurchase > cpm.AvgDaysBetweenOrders * 3 THEN 35
            WHEN cpm.DaysSinceLastPurchase > cpm.AvgDaysBetweenOrders * 2 THEN 25
            WHEN cpm.DaysSinceLastPurchase > cpm.AvgDaysBetweenOrders * 1.5 THEN 15
            WHEN cpm.DaysSinceLastPurchase > cpm.AvgDaysBetweenOrders THEN 5
            ELSE 0
        END AS RecencyRiskScore,
        -- Risk Factor 2: Activity Decline (0-30 points)
        CASE
            WHEN rat.OrdersLast90Days = 0 AND rat.OrdersPrevious90Days > 0 THEN 30
            WHEN rat.OrdersLast90Days < rat.OrdersPrevious90Days * 0.5 THEN 20
            WHEN rat.OrdersLast90Days < rat.OrdersPrevious90Days THEN 10
            ELSE 0
        END AS ActivityDeclineScore,
        -- Risk Factor 3: Revenue Decline (0-20 points)
        CASE
            WHEN rat.RevenueLast90Days = 0 AND rat.RevenuePrevious90Days > 0 THEN 20
            WHEN rat.RevenueLast90Days < rat.RevenuePrevious90Days * 0.5 THEN 15
            WHEN rat.RevenueLast90Days < rat.RevenuePrevious90Days THEN 8
            ELSE 0
        END AS RevenueDeclineScore,
        -- Risk Factor 4: Low Engagement (0-15 points)
        CASE
            WHEN cpm.TotalOrders <= 2 AND cpm.CustomerTenureDays > 365 THEN 15
            WHEN cpm.TotalOrders <= 3 AND cpm.CustomerTenureDays > 180 THEN 10
            WHEN cpm.TotalOrders <= 5 THEN 5
            ELSE 0
        END AS LowEngagementScore
    FROM CustomerPurchaseMetrics cpm
    LEFT JOIN RecentActivityTrends rat ON cpm.CustomerKey = rat.CustomerKey
    INNER JOIN DimCustomer c ON cpm.CustomerKey = c.CustomerKey
    INNER JOIN DimGeography g ON c.GeographyKey = g.GeographyKey
    INNER JOIN DimSalesTerritory st ON g.SalesTerritoryKey = st.SalesTerritoryKey
),

ChurnRiskScoring AS (
    SELECT
        crf.*,
        -- Total Churn Risk Score (0-100)
        crf.RecencyRiskScore + crf.ActivityDeclineScore + crf.RevenueDeclineScore + crf.LowEngagementScore AS TotalChurnRiskScore,
        -- Churn Risk Category
        CASE
            WHEN (crf.RecencyRiskScore + crf.ActivityDeclineScore + crf.RevenueDeclineScore + crf.LowEngagementScore) >= 70 THEN 'Critical Risk'
            WHEN (crf.RecencyRiskScore + crf.ActivityDeclineScore + crf.RevenueDeclineScore + crf.LowEngagementScore) >= 50 THEN 'High Risk'
            WHEN (crf.RecencyRiskScore + crf.ActivityDeclineScore + crf.RevenueDeclineScore + crf.LowEngagementScore) >= 30 THEN 'Moderate Risk'
            WHEN (crf.RecencyRiskScore + crf.ActivityDeclineScore + crf.RevenueDeclineScore + crf.LowEngagementScore) >= 15 THEN 'Low Risk'
            ELSE 'Healthy'
        END AS ChurnRiskCategory,
        -- Value Tier
        NTILE(4) OVER (ORDER BY crf.LifetimeRevenue DESC) AS ValueQuartile,
        CASE
            WHEN NTILE(4) OVER (ORDER BY crf.LifetimeRevenue DESC) = 1 THEN 'High Value'
            WHEN NTILE(4) OVER (ORDER BY crf.LifetimeRevenue DESC) = 2 THEN 'Medium-High Value'
            WHEN NTILE(4) OVER (ORDER BY crf.LifetimeRevenue DESC) = 3 THEN 'Medium-Low Value'
            ELSE 'Low Value'
        END AS ValueTier
    FROM ChurnRiskFactors crf
),

RetentionPrioritization AS (
    SELECT
        crs.CustomerKey,
        crs.CustomerName,
        crs.ValueTier,
        crs.ChurnRiskCategory,
        crs.TotalChurnRiskScore,
        crs.Country,
        crs.SalesTerritoryRegion,
        crs.YearlyIncome,
        crs.FirstPurchaseDate,
        crs.LastPurchaseDate,
        crs.DaysSinceLastPurchase,
        crs.CustomerTenureYears,
        crs.TotalOrders,
        crs.LifetimeRevenue,
        crs.LifetimeGrossProfit,
        crs.AvgTransactionValue,
        crs.OrdersLast90Days,
        crs.OrdersPrevious90Days,
        crs.RevenueLast90Days,
        crs.RevenuePrevious90Days,
        crs.RecencyRiskScore,
        crs.ActivityDeclineScore,
        crs.RevenueDeclineScore,
        crs.LowEngagementScore,
        -- Expected Annual Value (based on historical patterns)
        ROUND(crs.LifetimeRevenue / NULLIF(crs.CustomerTenureYears, 0), 2) AS ExpectedAnnualRevenue,
        -- Value at Risk (what we stand to lose)
        ROUND((crs.LifetimeRevenue / NULLIF(crs.CustomerTenureYears, 0)) * 2, 2) AS TwoYearValueAtRisk,
        -- Retention Priority Score (combines risk and value)
        ROUND(
            (crs.TotalChurnRiskScore * 0.6) +
            (crs.ValueQuartile * 10 * 0.4)
        , 2) AS RetentionPriorityScore,
        -- Recommended retention investment
        CASE
            WHEN crs.ValueTier = 'High Value' AND crs.ChurnRiskCategory IN ('Critical Risk', 'High Risk') THEN 'HIGH: Up to 20% of annual value'
            WHEN crs.ValueTier = 'High Value' AND crs.ChurnRiskCategory = 'Moderate Risk' THEN 'MEDIUM-HIGH: Up to 10% of annual value'
            WHEN crs.ValueTier IN ('High Value', 'Medium-High Value') AND crs.ChurnRiskCategory IN ('Critical Risk', 'High Risk') THEN 'MEDIUM: Up to 15% of annual value'
            WHEN crs.ValueTier = 'Medium-High Value' THEN 'MEDIUM: Up to 8% of annual value'
            WHEN crs.ChurnRiskCategory IN ('Critical Risk', 'High Risk') THEN 'LOW-MEDIUM: Up to 5% of annual value'
            ELSE 'LOW: Standard retention budget'
        END AS RetentionInvestmentRecommendation,
        -- Recommended retention action
        CASE
            WHEN crs.ChurnRiskCategory = 'Critical Risk' AND crs.ValueTier = 'High Value' THEN 'URGENT: Executive outreach, personalized offer, satisfaction survey'
            WHEN crs.ChurnRiskCategory = 'Critical Risk' THEN 'URGENT: Personal call/email, win-back offer, identify issues'
            WHEN crs.ChurnRiskCategory = 'High Risk' AND crs.ValueTier IN ('High Value', 'Medium-High Value') THEN 'HIGH PRIORITY: Personalized re-engagement, special incentive'
            WHEN crs.ChurnRiskCategory = 'High Risk' THEN 'Re-engagement campaign, limited-time offer'
            WHEN crs.ChurnRiskCategory = 'Moderate Risk' THEN 'Proactive outreach, engagement content, reminder campaign'
            WHEN crs.ChurnRiskCategory = 'Low Risk' THEN 'Standard nurture campaign'
            ELSE 'Continue standard customer communications'
        END AS RetentionAction,
        RANK() OVER (ORDER BY
            (crs.TotalChurnRiskScore * 0.6) + (crs.ValueQuartile * 10 * 0.4) DESC,
            crs.LifetimeRevenue DESC
        ) AS RetentionPriorityRank
    FROM ChurnRiskScoring crs
    WHERE crs.TotalChurnRiskScore >= 15  -- Only customers with some risk
)

SELECT
    rp.CustomerKey,
    rp.CustomerName,
    rp.ValueTier,
    rp.ChurnRiskCategory,
    rp.TotalChurnRiskScore,
    rp.RetentionPriorityScore,
    rp.RetentionPriorityRank,
    rp.Country,
    rp.SalesTerritoryRegion,
    rp.YearlyIncome,
    rp.DaysSinceLastPurchase,
    rp.CustomerTenureYears,
    rp.TotalOrders,
    rp.LifetimeRevenue,
    rp.LifetimeGrossProfit,
    rp.ExpectedAnnualRevenue,
    rp.TwoYearValueAtRisk,
    rp.OrdersLast90Days,
    rp.OrdersPrevious90Days,
    rp.RevenueLast90Days,
    rp.RevenuePrevious90Days,
    rp.RecencyRiskScore,
    rp.ActivityDeclineScore,
    rp.RevenueDeclineScore,
    rp.LowEngagementScore,
    rp.RetentionInvestmentRecommendation,
    rp.RetentionAction
FROM RetentionPrioritization rp
WHERE rp.ChurnRiskCategory IN ('Critical Risk', 'High Risk', 'Moderate Risk')
ORDER BY rp.RetentionPriorityScore DESC, rp.LifetimeRevenue DESC;
"""
db.sql_print(sql)

┌─────────────┬────────────────────┬────────────┬───────────────────┬─────────────────────┬────────────────────────┬───────────────────────┬────────────────┬──────────────────────┬───────────────┬───────────────────────┬─────────────────────┬─────────────┬─────────────────┬─────────────────────┬───────────────────────┬────────────────────┬──────────────────┬──────────────────────┬───────────────────┬───────────────────────┬──────────────────┬──────────────────────┬─────────────────────┬────────────────────┬────────────────────────────────────────┬───────────────────────────────────────────────────────────┐
│ CustomerKey │    CustomerName    │ ValueTier  │ ChurnRiskCategory │ TotalChurnRiskScore │ RetentionPriorityScore │ RetentionPriorityRank │    Country     │ SalesTerritoryRegion │ YearlyIncome  │ DaysSinceLastPurchase │ CustomerTenureYears │ TotalOrders │ LifetimeRevenue │ LifetimeGrossProfit │ ExpectedAnnualRevenue │ TwoYearValueAtRisk │ OrdersLast90Days │ OrdersPrevious90Days │ Re

# xxx

## 01

In [None]:
sql = """

"""
db.sql_print(sql)

## 02

In [None]:
sql = """

"""
db.sql_print(sql)

## 03

In [None]:
sql = """

"""
db.sql_print(sql)

## 04

In [None]:
sql = """

"""
db.sql_print(sql)

## 05

In [None]:
sql = """

"""
db.sql_print(sql)

# xxx

## 01

In [None]:
sql = """

"""
db.sql_print(sql)

## 02

In [None]:
sql = """

"""
db.sql_print(sql)

## 03

In [None]:
sql = """

"""
db.sql_print(sql)

## 04

In [None]:
sql = """

"""
db.sql_print(sql)

## 05

In [None]:
sql = """

"""
db.sql_print(sql)

# xxx

## 01

In [None]:
sql = """

"""
db.sql_print(sql)

## 02

In [None]:
sql = """

"""
db.sql_print(sql)

## 03

In [None]:
sql = """

"""
db.sql_print(sql)

## 04

In [None]:
sql = """

"""
db.sql_print(sql)

## 05

In [None]:
sql = """

"""
db.sql_print(sql)

# xxx

## 01

In [None]:
sql = """

"""
db.sql_print(sql)

## 02

In [None]:
sql = """

"""
db.sql_print(sql)

## 03

In [None]:
sql = """

"""
db.sql_print(sql)

## 04

In [None]:
sql = """

"""
db.sql_print(sql)

## 05

In [None]:
sql = """

"""
db.sql_print(sql)

# xxx

## 01

In [None]:
sql = """

"""
db.sql_print(sql)

## 02

In [None]:
sql = """

"""
db.sql_print(sql)

## 03

In [None]:
sql = """

"""
db.sql_print(sql)

## 04

In [None]:
sql = """

"""
db.sql_print(sql)

## 05

In [None]:
sql = """

"""
db.sql_print(sql)

# Internet Sales

## 1

In [5]:
sql = """
WITH CurrentYearSales AS (
    SELECT
        pc.EnglishProductCategoryName AS CategoryName,
        psc.EnglishProductSubcategoryName AS SubcategoryName,
        dd.CalendarYear,
        SUM(fis.SalesAmount) AS TotalRevenue,
        SUM(fis.OrderQuantity) AS TotalQuantity,
        COUNT(DISTINCT fis.SalesOrderNumber) AS OrderCount
    FROM FactInternetSales fis
    INNER JOIN DimProduct p ON fis.ProductKey = p.ProductKey
    INNER JOIN DimProductSubcategory psc ON p.ProductSubcategoryKey = psc.ProductSubcategoryKey
    INNER JOIN DimProductCategory pc ON psc.ProductCategoryKey = pc.ProductCategoryKey
    INNER JOIN DimDate dd ON fis.OrderDateKey = dd.DateKey
    WHERE dd.CalendarYear >= (SELECT MAX(CalendarYear) - 1 FROM DimDate WHERE DateKey IN (SELECT DISTINCT OrderDateKey FROM FactInternetSales))
    GROUP BY pc.EnglishProductCategoryName, psc.EnglishProductSubcategoryName, dd.CalendarYear
),
YoYComparison AS (
    SELECT
        curr.CategoryName,
        curr.SubcategoryName,
        curr.CalendarYear AS CurrentYear,
        curr.TotalRevenue AS CurrentYearRevenue,
        curr.TotalQuantity AS CurrentYearQuantity,
        curr.OrderCount AS CurrentYearOrders,
        prev.TotalRevenue AS PreviousYearRevenue,
        prev.TotalQuantity AS PreviousYearQuantity,
        CASE
            WHEN prev.TotalRevenue IS NOT NULL AND prev.TotalRevenue > 0
            THEN ((curr.TotalRevenue - prev.TotalRevenue) / prev.TotalRevenue) * 100
            ELSE NULL
        END AS RevenueGrowthPct,
        CASE
            WHEN prev.TotalQuantity IS NOT NULL AND prev.TotalQuantity > 0
            THEN ((curr.TotalQuantity - prev.TotalQuantity) / CAST(prev.TotalQuantity AS FLOAT)) * 100
            ELSE NULL
        END AS QuantityGrowthPct
    FROM CurrentYearSales curr
    LEFT JOIN CurrentYearSales prev
        ON curr.CategoryName = prev.CategoryName
        AND curr.SubcategoryName = prev.SubcategoryName
        AND curr.CalendarYear = prev.CalendarYear + 1
)
SELECT
    CategoryName,
    SubcategoryName,
    CurrentYear,
    ROUND(CurrentYearRevenue, 2) AS CurrentYearRevenue,
    CurrentYearQuantity,
    CurrentYearOrders,
    ROUND(PreviousYearRevenue, 2) AS PreviousYearRevenue,
    ROUND(RevenueGrowthPct, 2) AS RevenueGrowthPct,
    ROUND(QuantityGrowthPct, 2) AS QuantityGrowthPct,
    RANK() OVER (PARTITION BY CurrentYear ORDER BY CurrentYearRevenue DESC) AS RevenueRank
FROM YoYComparison
WHERE CurrentYear = (SELECT MAX(CurrentYear) FROM YoYComparison)
ORDER BY CurrentYearRevenue DESC;
"""
db.sql_print(sql)

┌──────────────┬───────────────────┬─────────────┬────────────────────┬─────────────────────┬───────────────────┬─────────────────────┬──────────────────┬───────────────────┬─────────────┐
│ CategoryName │  SubcategoryName  │ CurrentYear │ CurrentYearRevenue │ CurrentYearQuantity │ CurrentYearOrders │ PreviousYearRevenue │ RevenueGrowthPct │ QuantityGrowthPct │ RevenueRank │
│   varchar    │      varchar      │    int32    │   decimal(38,2)    │       int128        │       int64       │    decimal(38,2)    │      double      │       float       │    int64    │
├──────────────┼───────────────────┼─────────────┼────────────────────┼─────────────────────┼───────────────────┼─────────────────────┼──────────────────┼───────────────────┼─────────────┤
│ Accessories  │ Tires and Tubes   │        2014 │           12675.06 │                 925 │               532 │           232276.42 │           -94.54 │            -94.35 │           1 │
│ Accessories  │ Helmets           │        2014 │     

## 2

In [6]:
sql = """
WITH CustomerRevenue AS (
    SELECT
        c.CustomerKey,
        CONCAT(c.FirstName, ' ', c.LastName) AS CustomerName,
        c.YearlyIncome,
        g.City,
        g.StateProvinceName,
        g.EnglishCountryRegionName AS Country,
        st.SalesTerritoryRegion,
        st.SalesTerritoryCountry,
        st.SalesTerritoryGroup,
        SUM(fis.SalesAmount) AS TotalRevenue,
        SUM(fis.OrderQuantity) AS TotalQuantity,
        COUNT(DISTINCT fis.SalesOrderNumber) AS TotalOrders,
        AVG(fis.SalesAmount) AS AvgOrderValue,
        MIN(dd.FullDateAlternateKey) AS FirstPurchaseDate,
        MAX(dd.FullDateAlternateKey) AS LastPurchaseDate
    FROM FactInternetSales fis
    INNER JOIN DimCustomer c ON fis.CustomerKey = c.CustomerKey
    INNER JOIN DimGeography g ON c.GeographyKey = g.GeographyKey
    INNER JOIN DimSalesTerritory st ON g.SalesTerritoryKey = st.SalesTerritoryKey
    INNER JOIN DimDate dd ON fis.OrderDateKey = dd.DateKey
    GROUP BY
        c.CustomerKey, c.FirstName, c.LastName, c.YearlyIncome,
        g.City, g.StateProvinceName, g.EnglishCountryRegionName,
        st.SalesTerritoryRegion, st.SalesTerritoryCountry, st.SalesTerritoryGroup
),
TerritoryMetrics AS (
    SELECT
        SalesTerritoryGroup,
        SalesTerritoryCountry,
        SalesTerritoryRegion,
        COUNT(DISTINCT CustomerKey) AS CustomerCount,
        SUM(TotalRevenue) AS TerritoryRevenue,
        SUM(TotalOrders) AS TerritoryOrders,
        AVG(TotalRevenue) AS AvgRevenuePerCustomer,
        AVG(AvgOrderValue) AS AvgOrderValue,
        MAX(TotalRevenue) AS TopCustomerRevenue
    FROM CustomerRevenue
    GROUP BY SalesTerritoryGroup, SalesTerritoryCountry, SalesTerritoryRegion
)
SELECT
    tm.SalesTerritoryGroup,
    tm.SalesTerritoryCountry,
    tm.SalesTerritoryRegion,
    tm.CustomerCount,
    ROUND(tm.TerritoryRevenue, 2) AS TerritoryRevenue,
    tm.TerritoryOrders,
    ROUND(tm.AvgRevenuePerCustomer, 2) AS AvgRevenuePerCustomer,
    ROUND(tm.AvgOrderValue, 2) AS AvgOrderValue,
    ROUND(tm.TopCustomerRevenue, 2) AS TopCustomerRevenue,
    ROUND((tm.TerritoryRevenue / SUM(tm.TerritoryRevenue) OVER ()) * 100, 2) AS PctOfTotalRevenue,
    RANK() OVER (ORDER BY tm.TerritoryRevenue DESC) AS TerritoryRevenueRank
FROM TerritoryMetrics tm
ORDER BY tm.TerritoryRevenue DESC;
"""
db.sql_print(sql)

┌─────────────────────┬───────────────────────┬──────────────────────┬───────────────┬──────────────────┬─────────────────┬───────────────────────┬───────────────┬────────────────────┬───────────────────┬──────────────────────┐
│ SalesTerritoryGroup │ SalesTerritoryCountry │ SalesTerritoryRegion │ CustomerCount │ TerritoryRevenue │ TerritoryOrders │ AvgRevenuePerCustomer │ AvgOrderValue │ TopCustomerRevenue │ PctOfTotalRevenue │ TerritoryRevenueRank │
│       varchar       │        varchar        │       varchar        │     int64     │  decimal(38,2)   │     int128      │        double         │    double     │   decimal(38,2)    │      double       │        int64         │
├─────────────────────┼───────────────────────┼──────────────────────┼───────────────┼──────────────────┼─────────────────┼───────────────────────┼───────────────┼────────────────────┼───────────────────┼──────────────────────┤
│ Pacific             │ Australia             │ Australia            │          3591 │  

## 3

In [7]:
sql = """
WITH PromotionSales AS (
    SELECT
        p.EnglishPromotionName AS PromotionName,
        p.EnglishPromotionType AS PromotionType,
        p.EnglishPromotionCategory AS PromotionCategory,
        p.DiscountPct AS PromotionDiscountPct,
        dd.CalendarYear,
        dd.CalendarQuarter,
        COUNT(DISTINCT fis.SalesOrderNumber) AS OrderCount,
        SUM(fis.OrderQuantity) AS TotalQuantity,
        SUM(fis.SalesAmount) AS TotalRevenue,
        SUM(fis.DiscountAmount) AS TotalDiscountAmount,
        AVG(fis.UnitPriceDiscountPct) AS AvgLineDiscountPct,
        SUM(fis.SalesAmount + fis.DiscountAmount) AS RevenueBeforeDiscount,
        AVG(fis.SalesAmount) AS AvgOrderValue
    FROM FactInternetSales fis
    INNER JOIN DimPromotion p ON fis.PromotionKey = p.PromotionKey
    INNER JOIN DimDate dd ON fis.OrderDateKey = dd.DateKey
    WHERE p.PromotionKey > 1  -- Exclude "No Discount" promotion
    GROUP BY
        p.EnglishPromotionName, p.EnglishPromotionType,
        p.EnglishPromotionCategory, p.DiscountPct,
        dd.CalendarYear, dd.CalendarQuarter
),
NoPromotionSales AS (
    SELECT
        dd.CalendarYear,
        dd.CalendarQuarter,
        COUNT(DISTINCT fis.SalesOrderNumber) AS OrderCount,
        SUM(fis.OrderQuantity) AS TotalQuantity,
        SUM(fis.SalesAmount) AS TotalRevenue,
        AVG(fis.SalesAmount) AS AvgOrderValue
    FROM FactInternetSales fis
    INNER JOIN DimPromotion p ON fis.PromotionKey = p.PromotionKey
    INNER JOIN DimDate dd ON fis.OrderDateKey = dd.DateKey
    WHERE p.PromotionKey = 1  -- "No Discount" promotion
    GROUP BY dd.CalendarYear, dd.CalendarQuarter
),
PromotionEffectiveness AS (
    SELECT
        ps.PromotionName,
        ps.PromotionType,
        ps.PromotionCategory,
        ps.PromotionDiscountPct,
        ps.CalendarYear,
        ps.CalendarQuarter,
        ps.OrderCount AS PromotionOrders,
        ps.TotalRevenue AS PromotionRevenue,
        ps.TotalDiscountAmount,
        ps.AvgOrderValue AS PromotionAvgOrderValue,
        nps.OrderCount AS NoPromotionOrders,
        nps.TotalRevenue AS NoPromotionRevenue,
        nps.AvgOrderValue AS NoPromotionAvgOrderValue,
        CASE
            WHEN nps.AvgOrderValue > 0
            THEN ((ps.AvgOrderValue - nps.AvgOrderValue) / nps.AvgOrderValue) * 100
            ELSE NULL
        END AS AvgOrderValueLiftPct,
        ps.RevenueBeforeDiscount - ps.TotalRevenue AS RevenueImpactOfDiscount,
        CASE
            WHEN ps.TotalDiscountAmount > 0
            THEN (ps.TotalRevenue - ps.TotalDiscountAmount) / ps.TotalDiscountAmount
            ELSE NULL
        END AS PromotionROI
    FROM PromotionSales ps
    LEFT JOIN NoPromotionSales nps
        ON ps.CalendarYear = nps.CalendarYear
        AND ps.CalendarQuarter = nps.CalendarQuarter
)
SELECT
    PromotionName,
    PromotionType,
    PromotionCategory,
    ROUND(PromotionDiscountPct * 100, 2) AS PromotionDiscountPct,
    CalendarYear,
    CalendarQuarter,
    PromotionOrders,
    ROUND(PromotionRevenue, 2) AS PromotionRevenue,
    ROUND(TotalDiscountAmount, 2) AS TotalDiscountAmount,
    ROUND(PromotionAvgOrderValue, 2) AS PromotionAvgOrderValue,
    ROUND(NoPromotionAvgOrderValue, 2) AS NoPromotionAvgOrderValue,
    ROUND(AvgOrderValueLiftPct, 2) AS AvgOrderValueLiftPct,
    ROUND(RevenueImpactOfDiscount, 2) AS RevenueImpactOfDiscount,
    ROUND(PromotionROI, 2) AS PromotionROI,
    RANK() OVER (PARTITION BY CalendarYear ORDER BY PromotionRevenue DESC) AS PromotionRevenueRank
FROM PromotionEffectiveness
WHERE CalendarYear = (SELECT MAX(CalendarYear) FROM PromotionEffectiveness)
ORDER BY PromotionRevenue DESC;
"""
db.sql_print(sql)

┌──────────────────────────┬─────────────────┬───────────────────┬──────────────────────┬──────────────┬─────────────────┬─────────────────┬──────────────────┬─────────────────────┬────────────────────────┬──────────────────────────┬──────────────────────┬─────────────────────────┬──────────────┬──────────────────────┐
│      PromotionName       │  PromotionType  │ PromotionCategory │ PromotionDiscountPct │ CalendarYear │ CalendarQuarter │ PromotionOrders │ PromotionRevenue │ TotalDiscountAmount │ PromotionAvgOrderValue │ NoPromotionAvgOrderValue │ AvgOrderValueLiftPct │ RevenueImpactOfDiscount │ PromotionROI │ PromotionRevenueRank │
│         varchar          │     varchar     │      varchar      │        float         │    int32     │      int32      │      int64      │  decimal(38,2)   │       double        │         double         │          double          │        double        │         double          │    double    │        int64         │
├──────────────────────────┼─────────

## 4

In [8]:
sql = """
WITH MonthlySales AS (
    SELECT
        dd.CalendarYear,
        dd.CalendarQuarter,
        dd.MonthNumberOfYear,
        dd.EnglishMonthName AS MonthName,
        dd.FiscalYear,
        dd.FiscalQuarter,
        SUM(fis.SalesAmount) AS MonthlyRevenue,
        SUM(fis.OrderQuantity) AS MonthlyQuantity,
        COUNT(DISTINCT fis.SalesOrderNumber) AS MonthlyOrders,
        COUNT(DISTINCT fis.CustomerKey) AS UniqueCustomers,
        AVG(fis.SalesAmount) AS AvgOrderValue
    FROM FactInternetSales fis
    INNER JOIN DimDate dd ON fis.OrderDateKey = dd.DateKey
    GROUP BY
        dd.CalendarYear, dd.CalendarQuarter, dd.MonthNumberOfYear,
        dd.EnglishMonthName, dd.FiscalYear, dd.FiscalQuarter
),
MonthlyTrends AS (
    SELECT
        CalendarYear,
        CalendarQuarter,
        MonthNumberOfYear,
        MonthName,
        FiscalYear,
        FiscalQuarter,
        MonthlyRevenue,
        MonthlyQuantity,
        MonthlyOrders,
        UniqueCustomers,
        AvgOrderValue,
        LAG(MonthlyRevenue, 1) OVER (ORDER BY CalendarYear, MonthNumberOfYear) AS PrevMonthRevenue,
        LAG(MonthlyRevenue, 12) OVER (ORDER BY CalendarYear, MonthNumberOfYear) AS SameMonthPrevYearRevenue,
        AVG(MonthlyRevenue) OVER (
            ORDER BY CalendarYear, MonthNumberOfYear
            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
        ) AS ThreeMonthMovingAvg,
        AVG(MonthlyRevenue) OVER (PARTITION BY MonthNumberOfYear) AS HistoricalMonthAvg,
        SUM(MonthlyRevenue) OVER (
            PARTITION BY CalendarYear
            ORDER BY MonthNumberOfYear
        ) AS YearToDateRevenue
    FROM MonthlySales
),
SeasonalityMetrics AS (
    SELECT
        CalendarYear,
        CalendarQuarter,
        MonthNumberOfYear,
        MonthName,
        FiscalYear,
        FiscalQuarter,
        MonthlyRevenue,
        MonthlyOrders,
        UniqueCustomers,
        ROUND(AvgOrderValue, 2) AS AvgOrderValue,
        ROUND(ThreeMonthMovingAvg, 2) AS ThreeMonthMovingAvg,
        CASE
            WHEN PrevMonthRevenue > 0
            THEN ROUND(((MonthlyRevenue - PrevMonthRevenue) / PrevMonthRevenue) * 100, 2)
            ELSE NULL
        END AS MoMGrowthPct,
        CASE
            WHEN SameMonthPrevYearRevenue > 0
            THEN ROUND(((MonthlyRevenue - SameMonthPrevYearRevenue) / SameMonthPrevYearRevenue) * 100, 2)
            ELSE NULL
        END AS YoYGrowthPct,
        ROUND((MonthlyRevenue / HistoricalMonthAvg) * 100, 2) AS SeasonalityIndex,
        ROUND(YearToDateRevenue, 2) AS YearToDateRevenue,
        RANK() OVER (PARTITION BY CalendarYear ORDER BY MonthlyRevenue DESC) AS MonthRevenueRank
    FROM MonthlyTrends
)
SELECT
    CalendarYear,
    CalendarQuarter,
    MonthNumberOfYear,
    MonthName,
    FiscalYear,
    FiscalQuarter,
    ROUND(MonthlyRevenue, 2) AS MonthlyRevenue,
    MonthlyOrders,
    UniqueCustomers,
    AvgOrderValue,
    ThreeMonthMovingAvg,
    MoMGrowthPct,
    YoYGrowthPct,
    SeasonalityIndex,
    YearToDateRevenue,
    MonthRevenueRank
FROM SeasonalityMetrics
WHERE CalendarYear >= (SELECT MAX(CalendarYear) - 2 FROM SeasonalityMetrics)
ORDER BY CalendarYear DESC, MonthNumberOfYear;
"""
db.sql_print(sql)

┌──────────────┬─────────────────┬───────────────────┬───────────┬────────────┬───────────────┬────────────────┬───────────────┬─────────────────┬───────────────┬─────────────────────┬──────────────┬──────────────┬──────────────────┬───────────────────┬──────────────────┐
│ CalendarYear │ CalendarQuarter │ MonthNumberOfYear │ MonthName │ FiscalYear │ FiscalQuarter │ MonthlyRevenue │ MonthlyOrders │ UniqueCustomers │ AvgOrderValue │ ThreeMonthMovingAvg │ MoMGrowthPct │ YoYGrowthPct │ SeasonalityIndex │ YearToDateRevenue │ MonthRevenueRank │
│    int32     │      int32      │       int32       │  varchar  │   int32    │     int32     │ decimal(38,2)  │     int64     │      int64      │    double     │       double        │    double    │    double    │      double      │   decimal(38,2)   │      int64       │
├──────────────┼─────────────────┼───────────────────┼───────────┼────────────┼───────────────┼────────────────┼───────────────┼─────────────────┼───────────────┼───────────────────

## 5

rewrite in md -  error julianday

In [10]:
sql = """
WITH CustomerPurchases AS (
    SELECT
        c.CustomerKey,
        CONCAT(c.FirstName, ' ', c.LastName) AS CustomerName,
        c.EmailAddress,
        c.YearlyIncome,
        c.EnglishEducation AS Education,
        c.EnglishOccupation AS Occupation,
        c.Gender,
        c.MaritalStatus,
        c.TotalChildren,
        g.City,
        g.StateProvinceName,
        g.EnglishCountryRegionName AS Country,
        MIN(dd.FullDateAlternateKey) AS FirstPurchaseDate,
        MAX(dd.FullDateAlternateKey) AS LastPurchaseDate,
        COUNT(DISTINCT fis.SalesOrderNumber) AS TotalOrders,
        SUM(fis.SalesAmount) AS TotalRevenue,
        AVG(fis.SalesAmount) AS AvgOrderValue,
        SUM(fis.OrderQuantity) AS TotalQuantity,
        COUNT(DISTINCT p.ProductSubcategoryKey) AS ProductCategoryDiversity,
        COUNT(DISTINCT CAST(dd.CalendarYear AS VARCHAR) || '-' || CAST(dd.CalendarQuarter AS VARCHAR)) AS ActiveQuarters
    FROM FactInternetSales fis
    INNER JOIN DimCustomer c ON fis.CustomerKey = c.CustomerKey
    INNER JOIN DimGeography g ON c.GeographyKey = g.GeographyKey
    INNER JOIN DimDate dd ON fis.OrderDateKey = dd.DateKey
    INNER JOIN DimProduct p ON fis.ProductKey = p.ProductKey
    GROUP BY
        c.CustomerKey, c.FirstName, c.LastName, c.EmailAddress,
        c.YearlyIncome, c.EnglishEducation, c.EnglishOccupation,
        c.Gender, c.MaritalStatus, c.TotalChildren,
        g.City, g.StateProvinceName, g.EnglishCountryRegionName
),
CustomerMetrics AS (
    SELECT
        *,
        -- Days since last purchase
        CURRENT_DATE - LastPurchaseDate AS DaysSinceLastPurchase,

        -- Customer lifespan in days
        LastPurchaseDate - FirstPurchaseDate AS CustomerLifespanDays,

        -- Orders per year (preserves original logic)
        CASE
            WHEN (LastPurchaseDate - FirstPurchaseDate) > 0
            THEN TotalOrders / ((LastPurchaseDate - FirstPurchaseDate) / 365.25)
            ELSE TotalOrders
        END AS OrdersPerYear
    FROM CustomerPurchases
),
RFMScores AS (
    SELECT
        *,
        NTILE(5) OVER (ORDER BY DaysSinceLastPurchase DESC) AS RecencyScore,
        NTILE(5) OVER (ORDER BY TotalOrders) AS FrequencyScore,
        NTILE(5) OVER (ORDER BY TotalRevenue) AS MonetaryScore
    FROM CustomerMetrics
),
CustomerSegmentation AS (
    SELECT
        *,
        RecencyScore + FrequencyScore + MonetaryScore AS RFMScore,
        CASE
            WHEN RecencyScore >= 4 AND FrequencyScore >= 4 AND MonetaryScore >= 4 THEN 'VIP Champions'
            WHEN RecencyScore >= 4 AND FrequencyScore >= 3 THEN 'Loyal Customers'
            WHEN RecencyScore >= 4 AND MonetaryScore >= 4 THEN 'Big Spenders'
            WHEN RecencyScore >= 3 AND FrequencyScore >= 3 AND MonetaryScore >= 3 THEN 'Potential Loyalists'
            WHEN RecencyScore <= 2 AND FrequencyScore >= 4 THEN 'At Risk'
            WHEN RecencyScore <= 2 AND FrequencyScore <= 2 THEN 'Lost Customers'
            WHEN RecencyScore >= 4 AND FrequencyScore <= 2 THEN 'New Customers'
            ELSE 'Regular Customers'
        END AS CustomerSegment
    FROM RFMScores
)
SELECT
    CustomerName,
    EmailAddress,
    Country,
    StateProvinceName,
    City,
    CustomerSegment,
    RFMScore,
    RecencyScore,
    FrequencyScore,
    MonetaryScore,
    ROUND(TotalRevenue, 2) AS LifetimeValue,
    TotalOrders,
    ROUND(AvgOrderValue, 2) AS AvgOrderValue,
    TotalQuantity,
    ProductCategoryDiversity,
    ActiveQuarters,
    DaysSinceLastPurchase,
    ROUND(OrdersPerYear, 2) AS OrdersPerYear,
    FirstPurchaseDate,
    LastPurchaseDate,
    YearlyIncome,
    Education,
    Occupation,
    Gender,
    MaritalStatus,
    TotalChildren,
    RANK() OVER (ORDER BY TotalRevenue DESC) AS RevenueRank,
    RANK() OVER (PARTITION BY CustomerSegment ORDER BY TotalRevenue DESC) AS SegmentRevenueRank
FROM CustomerSegmentation
ORDER BY TotalRevenue DESC;
"""
db.sql_print(sql)

┌───────────────────┬─────────────────────────────────┬───────────────┬───────────────────┬────────────────────┬─────────────────────┬──────────┬──────────────┬────────────────┬───────────────┬───────────────┬─────────────┬───────────────┬───────────────┬──────────────────────────┬────────────────┬───────────────────────┬───────────────┬───────────────────┬──────────────────┬───────────────┬─────────────────────┬────────────────┬─────────┬───────────────┬───────────────┬─────────────┬────────────────────┐
│   CustomerName    │          EmailAddress           │    Country    │ StateProvinceName │        City        │   CustomerSegment   │ RFMScore │ RecencyScore │ FrequencyScore │ MonetaryScore │ LifetimeValue │ TotalOrders │ AvgOrderValue │ TotalQuantity │ ProductCategoryDiversity │ ActiveQuarters │ DaysSinceLastPurchase │ OrdersPerYear │ FirstPurchaseDate │ LastPurchaseDate │ YearlyIncome  │      Education      │   Occupation   │ Gender  │ MaritalStatus │ TotalChildren │ RevenueRank │

# xxx

## 01

In [None]:
sql = """

"""
db.sql_print(sql)

## 02

In [None]:
sql = """

"""
db.sql_print(sql)

## 03

In [None]:
sql = """

"""
db.sql_print(sql)

## 04

In [None]:
sql = """

"""
db.sql_print(sql)

## 05

In [None]:
sql = """

"""
db.sql_print(sql)

# xxx

## 01

In [None]:
sql = """

"""
db.sql_print(sql)

## 02

In [None]:
sql = """

"""
db.sql_print(sql)

## 03

In [None]:
sql = """

"""
db.sql_print(sql)

## 04

In [None]:
sql = """

"""
db.sql_print(sql)

## 05

In [None]:
sql = """

"""
db.sql_print(sql)

# xxx

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)

In [None]:
sql = """

"""
db.sql_print(sql)