### Cumulative Analysis

Purpose:
- To calculate running totals or moving averages for key metrics.
- To track performance over time cumulatively.
- Useful for growth analysis or identifying long-term trends.

SQL Functions Used:
    - Window Functions: SUM() OVER(), AVG() OVER()

In [2]:
# Import required libraries
import pandas as pd
from sqlalchemy import create_engine
%load_ext sql
from IPython.display import Image, display

# Configure pandas display format
pd.options.display.float_format = '{:.2f}'.format

# Connect to PostgreSQL database with password
%sql postgresql://postgres:legacy@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Test the connection with a simple query
%sql SELECT version();

Unnamed: 0,version
0,"PostgreSQL 17.4 on x86_64-windows, compiled by..."


In [3]:
%%sql

WITH monthly_stats AS (
    SELECT 
        TO_CHAR(orderdate, 'YYYY-MM') as order_month,
        SUM(quantity * netprice * exchangerate) AS total_sales,
        COUNT(DISTINCT customerkey) as num_customers,
        AVG(quantity * netprice * exchangerate) AS avg_sales
    FROM sales
    GROUP BY TO_CHAR(orderdate, 'YYYY-MM')
)
SELECT 
    order_month,
    total_sales,
    num_customers,
    AVG(total_sales) OVER (ORDER BY order_month) as moving_average,
    SUM(total_sales) OVER (ORDER BY order_month) as running_total_sales
FROM monthly_stats
ORDER BY order_month

Unnamed: 0,order_month,total_sales,num_customers,moving_average,running_total_sales
0,2015-01,384092.66,200,384092.66,384092.66
1,2015-02,706374.12,291,545233.39,1090466.78
2,2015-03,332961.59,139,474476.12,1423428.37
3,2015-04,160767.00,78,396048.84,1584195.37
4,2015-05,548632.63,236,426565.60,2132828.00
...,...,...,...,...,...
107,2023-12,2928550.93,1484,1833435.29,198011011.20
108,2024-01,2677498.55,1340,1841178.99,200688509.75
109,2024-02,3542322.55,1718,1856643.93,204230832.30
110,2024-03,1692854.89,877,1855168.35,205923687.19


In [None]:
%%sql

WITH monthly_stats AS (
    SELECT 
        TO_CHAR(orderdate, 'YYYY-MM') as order_month,
        SUM(quantity * netprice * exchangerate) AS total_sales,
        COUNT(DISTINCT customerkey) as num_customers,
        AVG(quantity * netprice * exchangerate) AS avg_sales
    FROM sales
    GROUP BY TO_CHAR(orderdate, 'YYYY-MM')
)
SELECT 
    order_month,
    total_sales,
    num_customers,
    AVG(total_sales) OVER (ORDER BY order_month) as moving_average,
    SUM(total_sales) OVER (ORDER BY order_month) as running_total_sales
FROM monthly_stats
where order_month > '2022-12'
ORDER BY order_month

Unnamed: 0,order_month,total_sales,num_customers,moving_average,running_total_sales
0,2023-01,3664431.34,1579,3664431.34,3664431.34
1,2023-02,4465204.57,1946,4064817.96,8129635.91
2,2023-03,2244316.52,1056,3457984.14,10373952.43
3,2023-04,1162796.16,551,2884187.15,11536748.6
4,2023-05,2943005.99,1363,2895950.92,14479754.59
5,2023-06,2864500.03,1301,2890709.1,17344254.61
6,2023-07,2337639.34,1126,2811699.14,19681893.96
7,2023-08,2623919.79,1198,2788226.72,22305813.75
8,2023-09,2622774.85,1255,2769843.18,24928588.59
9,2023-10,2551322.61,1217,2747991.12,27479911.2


#### Making the order_month as text to enable plot

In [15]:
%%sql
WITH monthly_stats AS (
    SELECT 
        TO_CHAR(orderdate, 'YYYY-MM') as order_month,
        SUM(quantity * netprice * exchangerate) AS total_sales,
        COUNT(DISTINCT customerkey) as num_customers,
        AVG(quantity * netprice * exchangerate) AS avg_sales
    FROM sales
    GROUP BY TO_CHAR(orderdate, 'YYYY-MM')
)
SELECT 
    order_month,
    total_sales,
    num_customers,
    AVG(total_sales) OVER (ORDER BY order_month) as moving_average,
    SUM(total_sales) OVER (ORDER BY order_month) as running_total_sales
FROM monthly_stats
ORDER BY order_month

Unnamed: 0,order_month,total_sales,num_customers,moving_average,running_total_sales
0,2015-01,384092.66,200,384092.66,384092.66
1,2015-02,706374.12,291,545233.39,1090466.78
2,2015-03,332961.59,139,474476.12,1423428.37
3,2015-04,160767.00,78,396048.84,1584195.37
4,2015-05,548632.63,236,426565.60,2132828.00
...,...,...,...,...,...
107,2023-12,2928550.93,1484,1833435.29,198011011.20
108,2024-01,2677498.55,1340,1841178.99,200688509.75
109,2024-02,3542322.55,1718,1856643.93,204230832.30
110,2024-03,1692854.89,877,1855168.35,205923687.19


### starting from 2023 to enable plot

In [27]:
%%sql
WITH monthly_stats AS (
    SELECT 
        TO_CHAR(orderdate, 'YYYY-MM') as order_month,
        SUM(quantity * netprice * exchangerate) AS total_sales,
        COUNT(DISTINCT customerkey) as num_customers,
        AVG(quantity * netprice * exchangerate) AS avg_sales
    FROM sales
    WHERE orderdate BETWEEN '2022-01-01' AND '2024-12-31'
    GROUP BY TO_CHAR(orderdate, 'YYYY-MM')
)
SELECT 
    order_month,
    total_sales,
    num_customers,
    AVG(total_sales) OVER (ORDER BY order_month) as moving_average,
    SUM(total_sales) OVER (ORDER BY order_month) as running_total_sales
FROM monthly_stats
ORDER BY order_month;

Unnamed: 0,order_month,total_sales,num_customers,moving_average,running_total_sales
0,2022-01,3647525.92,1362,3647525.92,3647525.92
1,2022-02,4840124.87,1871,4243825.4,8487650.8
2,2022-03,2801554.72,1105,3763068.51,11289205.52
3,2022-04,1746624.57,696,3258957.52,13035830.09
4,2022-05,4430652.19,1652,3493296.45,17466482.27
5,2022-06,4777313.11,1741,3707299.23,22243795.38
6,2022-07,3395262.66,1564,3662722.58,25639058.05
7,2022-08,3698942.66,1647,3667250.09,29338000.71
8,2022-09,3854509.88,1731,3688056.73,33192510.59
9,2022-10,3913434.52,1705,3710594.51,37105945.11


In [31]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add running_total_sales on primary y-axis
fig.add_trace(
    go.Scatter(
        x=_.order_month,
        y=_.running_total_sales,
        name="Running Total Sales",
        mode='lines+markers',
        line=dict(color='royalblue', width=2),
        marker=dict(size=8)
    ),
    secondary_y=False
)

# Add moving_average on secondary y-axis
fig.add_trace(
    go.Scatter(
        x=_.order_month,
        y=_.moving_average,
        name="Moving Average",
        mode='lines+markers',
        line=dict(color='red', width=2),
        marker=dict(size=8)
    ),
    secondary_y=True
)

# Update layout
fig.update_layout(
    title="Sales Analysis: Running Total and Moving Average for 2022 - 2024",  # Added title here
    title_x=0.5,  # Center the title
    title_font_size=20,  # Make title more prominent
    xaxis_title="Month",
    plot_bgcolor='rgba(240, 245, 250, 1)',
    paper_bgcolor='rgba(240, 245, 250, 1)',
    xaxis=dict(
        showgrid=True,
        gridcolor='white',
        gridwidth=1,
        dtick="M1",
        tickformat="%Y-%m"
    ),
    yaxis=dict(
        showgrid=True,
        gridcolor='white',
        gridwidth=1,
        title_text="Running Total Sales ($)"
    ),
    legend=dict(
        x=1,
        y=1,
        orientation='v'
    ),
    margin=dict(l=50, r=50, t=50, b=50)  # Increased top margin to accommodate title
)

# Set y-axes titles and formats
fig.update_yaxes(
    title_text="Running Total Sales ($)", 
    secondary_y=False,
    tickformat=",.0f"
)
fig.update_yaxes(
    title_text="Moving Average ($)", 
    secondary_y=True,
    tickformat=","
)

fig.show()