

### Change Over Time Analysis

Purpose:
    - To track trends, growth, and changes in key metrics over time.
    - For time-series analysis and identifying seasonality.
    - To measure growth or decline over specific periods.

SQL Functions Used:
    - Date Functions: DATEPART(), DATETRUNC(), FORMAT()
    - Aggregate Functions: SUM(), COUNT(), AVG()



- Analyse sales performance over time
- Quick Date Functions

In [14]:
# Import required libraries
import pandas as pd
from sqlalchemy import create_engine
%load_ext sql
from IPython.display import Image, display

# Configure pandas display format
pd.options.display.float_format = '{:.2f}'.format

# Connect to PostgreSQL database with password
%sql postgresql://postgres:legacy@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Test the connection with a simple query
%sql SELECT version();

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


Unnamed: 0,version
0,"PostgreSQL 17.4 on x86_64-windows, compiled by..."


In [16]:
%%sql
SELECT 

EXTRACT(YEAR FROM orderdate) as order_year,
EXTRACT(MONTH FROM orderdate) as order_month,
SUM(quantity * netprice * exchangerate) as total_sales,
COUNT(DISTINCT customerkey) as num_customers,
sum(quantity) as total_quantity
FROM sales
group by order_year, order_month
order by order_year ASC

Unnamed: 0,order_year,order_month,total_sales,num_customers,total_quantity
0,2015,1,384092.66,200,1469
1,2015,2,706374.12,291,2047
2,2015,3,332961.59,139,977
3,2015,4,160767.00,78,530
4,2015,5,548632.63,236,1635
...,...,...,...,...,...
107,2023,12,2928550.93,1484,11226
108,2024,1,2677498.55,1340,10204
109,2024,2,3542322.55,1718,13234
110,2024,3,1692854.89,877,6696


In [17]:
%%sql
SELECT 
# DATE_PART('month', orderdate) as order_date,
# TO_CHAR(orderdate,'yyyy-Month') as order_month,
EXTRACT(MONTH FROM orderdate) as order_month,
SUM(quantity * netprice * exchangerate) as total_sales,
COUNT(DISTINCT customerkey) as num_customers,
sum(quantity) as total_quantity
FROM sales
group by order_month
order by order_month ASC

Unnamed: 0,order_month,total_sales,num_customers,total_quantity
0,1,19765401.22,7632,61311
1,2,25980857.73,9741,78994
2,3,13538465.09,5388,42319
3,4,7056402.33,2835,21478
4,5,17245023.74,6466,50399
5,6,18740856.31,6714,54283
6,7,14589241.23,5792,45172
7,8,16161387.57,6212,49074
8,9,16717883.77,6547,51538
9,10,17653586.65,6733,54676


In [18]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(
        x=_.order_month.apply(lambda x: calendar.month_abbr[int(x)]),
        y=_.total_sales,
        name="Total Sales",
        mode='lines+markers'
    ),
    secondary_y=False
)

fig.add_trace(
    go.Scatter(
        x=_.order_month.apply(lambda x: calendar.month_abbr[int(x)]),
        y=_.num_customers,
        name="Number of Customers",
        mode='lines+markers'
    ),
    secondary_y=True
)

# Add figure title
fig.update_layout(
    title="Sales amount and Customer Count by Month",
    xaxis_title="Month"
)

# Set y-axes titles
fig.update_yaxes(title_text="Total Sales ($)", secondary_y=False)
fig.update_yaxes(title_text="Number of Customers", secondary_y=True)

fig.show()