<a target="_blank" href="https://colab.research.google.com/github/lukebarousse/Int_SQL_Data_Analytics_Course/blob/main/Resources/Blank_SQL_Notebook.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Blank SQL Notebook

#### Import Libraries & Database

In [2]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Update package installer
    !sudo apt-get update -qq > /dev/null 2>&1

    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [None]:
%%sql

SELECT
  table_name
FROM
  information_schema.tables
WHERE
  table_schema = 'public';

Unnamed: 0,table_name
0,currencyexchange
1,customer
2,sales
3,date
4,product
5,store


In [None]:
%%sql
SELECT
  s.orderdate,
  quantity * netprice * exchangerate AS net_revenue,
  c.givenname,
  c.surname,
  c.countryfull,
  c.continent,
  p.productkey,
  p.productname,
  p.categoryname,
  p.subcategoryname,
  CASE
    WHEN quantity * netprice * exchangerate > 1000 THEN 'HIGH' ELSE 'LOW'
  END AS high_low
FROM
  sales AS s
LEFT JOIN
  customer AS c
USING
  (customerkey)
LEFT JOIN
  product AS p
USING
  (productkey)
WHERE
  s.orderdate::Date > '2020-01-01';

In [None]:
%%sql
SELECT
  s.orderdate,
  COUNT(DISTINCT CASE WHEN c.continent = 'Europe' THEN s.customerkey END) AS eu_customers,
  COUNT(DISTINCT CASE WHEN c.continent = 'North America' THEN s.customerkey END) AS na_customers,
  COUNT(DISTINCT CASE WHEN c.continent = 'Australia' THEN s.customerkey END) AS au_customers
FROM
  sales s
LEFT JOIN
  customer c
USING (customerkey)
WHERE
  orderdate BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY
  orderdate
ORDER BY
  orderdate;

In [None]:
%%sql
SELECT
  p.categoryname,
  AVG(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN s.quantity * s.netprice * s.exchangerate END) AS avg_net_revenue_2022,
  AVG(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN s.quantity * s.netprice * s.exchangerate END) AS avg_net_revenue_2023,
  MIN(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN s.quantity * s.netprice * s.exchangerate END) AS min_net_revenue_2022,
  MAX(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN s.quantity * s.netprice * s.exchangerate END) AS max_net_revenue_2022,
  MIN(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN s.quantity * s.netprice * s.exchangerate END) AS min_net_revenue_2023,
  MAX(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN s.quantity * s.netprice * s.exchangerate END) AS max_net_revenue_2023
FROM
  sales s
LEFT JOIN
  product p
USING (productkey)
GROUP BY
  p.categoryname
ORDER BY
  p.categoryname;

Unnamed: 0,categoryname,avg_net_revenue_2022,avg_net_revenue_2023,min_net_revenue_2022,max_net_revenue_2022,min_net_revenue_2023,max_net_revenue_2023
0,Audio,392.3,425.38,9.31,3473.36,10.85,2730.87
1,Cameras and camcorders,1210.02,1210.96,6.74,15008.39,5.98,13572.0
2,Cell phones,722.2,623.28,2.53,7692.37,2.28,8912.22
3,Computers,1565.62,1292.39,0.83,38082.66,0.75,27611.6
4,Games and Toys,81.29,80.83,2.83,5202.01,3.49,3357.3
5,Home Appliances,1755.36,1886.55,4.04,31654.55,4.54,32915.59
6,"Music, Movies and Audio Books",386.61,334.58,7.29,5415.19,6.91,3804.91
7,TV and Video,1535.61,1687.9,41.3,30259.41,42.3,27503.12


In [None]:
%%sql
SELECT
  p.categoryname,
  PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN s.quantity * s.netprice * s.exchangerate END)) AS median_net_revenue_2022,
  PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN s.quantity * s.netprice * s.exchangerate END)) AS median_net_revenue_2023
FROM
  sales s
LEFT JOIN
  product p
USING (productkey)
GROUP BY
  p.categoryname
ORDER BY
  p.categoryname;

Unnamed: 0,categoryname,median_net_revenue_2022,median_net_revenue_2023
0,Audio,257.21,266.59
1,Cameras and camcorders,651.46,672.6
2,Cell phones,418.6,375.88
3,Computers,809.7,657.18
4,Games and Toys,33.78,32.62
5,Home Appliances,791.0,825.25
6,"Music, Movies and Audio Books",186.58,159.63
7,TV and Video,730.46,790.79


In [None]:
%%sql
SELECT
  orderdate,
  netprice,
  quantity,
  CASE
    WHEN quantity >= 2 AND netprice >= 100 THEN 'Multiple High Value Item'
    WHEN netprice >= 100 THEN 'Single High Value Item'
    WHEN quantity >= 2 THEN 'Multiple Standard Items'
    ELSE 'Single Standard Item'
  END AS order_type
FROM
  sales
LIMIT 10;

Unnamed: 0,orderdate,netprice,quantity,order_type
0,2015-01-01,98.97,1,Single Standard Item
1,2015-01-01,659.78,1,Single High Value Item
2,2015-01-01,54.38,2,Multiple Standard Items
3,2015-01-01,286.69,4,Multiple High Value Item
4,2015-01-01,135.75,7,Multiple High Value Item
5,2015-01-01,434.3,3,Multiple High Value Item
6,2015-01-01,58.73,1,Single Standard Item
7,2015-01-01,74.99,3,Multiple Standard Items
8,2015-01-01,113.57,2,Multiple High Value Item
9,2015-01-01,499.45,1,Single High Value Item


In [None]:
%%sql
WITH median_value AS (
    SELECT
      PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (s.quantity * s.netprice * s.exchangerate)) AS median
    FROM
      sales s
    WHERE
      orderdate BETWEEN '2022-01-01' AND '2023-12-31'
)

SELECT
  p.categoryname AS category,
  SUM(CASE WHEN (s.quantity * s.netprice * s.exchangerate) < mv.median AND
                s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN
                (s.quantity * s.netprice * s.exchangerate) END ) AS low_new_revenue_2022,
  SUM(CASE WHEN (s.quantity * s.netprice * s.exchangerate) < mv.median AND
                s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN
                (s.quantity * s.netprice * s.exchangerate) END ) AS low_new_revenue_2023,
  SUM(CASE WHEN (s.quantity * s.netprice * s.exchangerate) >= mv.median AND
                s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN
                (s.quantity * s.netprice * s.exchangerate) END ) AS high_new_revenue_2022,
  SUM(CASE WHEN (s.quantity * s.netprice * s.exchangerate) >= mv.median AND
                s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN
                (s.quantity * s.netprice * s.exchangerate) END ) AS high_new_revenue_2022
FROM
  sales s
LEFT JOIN
  product p
ON s.productkey = p.productkey,
median_value mv
WHERE
  s.orderdate BETWEEN '2022-01-01' AND '2023-12-31'
GROUP BY
  p.categoryname
ORDER BY
  p.categoryname;

Unnamed: 0,category,low_new_revenue_2022,low_new_revenue_2023,high_new_revenue_2022,high_new_revenue_2022.1
0,Audio,222337.83,180251.13,544600.39,508439.06
1,Cameras and camcorders,133004.54,104869.46,2249528.02,1878676.83
2,Cell phones,814449.53,729699.39,7305215.55,5272448.24
3,Computers,624340.42,590790.31,17237873.07,11060076.9
4,Games and Toys,231979.63,206103.36,84147.67,64271.6
5,Home Appliances,219797.07,176261.35,6392649.61,5743731.52
6,"Music, Movies and Audio Books",685808.49,574958.76,2303488.8,1605809.37
7,TV and Video,272338.29,164275.35,5542998.32,4247902.87


In [None]:
%%sql
WITH percentiles AS (
    SELECT
      PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY (s.quantity * s.netprice * s.exchangerate)) AS revenue_25th_percentile,
      PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY (s.quantity * s.netprice * s.exchangerate)) AS revenue_75th_percentile
    FROM
      sales s
    WHERE
      orderdate BETWEEN '2022-01-01' AND '2023-12-31'
)

SELECT
  p.categoryname AS category,
  CASE
    WHEN (s.quantity * s.netprice * s.exchangerate) < prct.revenue_25th_percentile THEN 'Low'
    WHEN (s.quantity * s.netprice * s.exchangerate) >= prct.revenue_75th_percentile THEN 'High'
    ELSE 'Medium'
  END AS revenue_category,
  SUM(s.quantity * s.netprice * s.exchangerate) AS total_revenue
FROM
  sales s
LEFT JOIN
  product p
ON s.productkey = p.productkey,
percentiles prct
WHERE
  s.orderdate BETWEEN '2022-01-01' AND '2023-12-31'
GROUP BY
  p.categoryname,
  revenue_category
ORDER BY
  p.categoryname;

Unnamed: 0,category,revenue_category,total_revenue
0,Audio,High,453108.9
1,Audio,Low,49819.44
2,Audio,Medium,952700.06
3,Cameras and camcorders,High,3414876.61
4,Cameras and camcorders,Low,21787.96
5,Cameras and camcorders,Medium,929414.28
6,Cell phones,High,8557888.89
7,Cell phones,Low,206223.79
8,Cell phones,Medium,5357700.03
9,Computers,High,24192945.36


In [None]:
%%sql
SELECT
  TO_CHAR(orderdate, 'YYYY-MM') AS order_month,
  SUM(quantity * netprice * exchangerate) AS total_revenue,
  COUNT(DISTINCT customerkey) AS unique_customers
FROM
  sales
GROUP BY
  order_month;

Unnamed: 0,order_month,total_revenue,unique_customers
0,2015-01,384092.66,200
1,2015-02,706374.12,291
2,2015-03,332961.59,139
3,2015-04,160767.00,78
4,2015-05,548632.63,236
...,...,...,...
107,2023-12,2928550.93,1484
108,2024-01,2677498.55,1340
109,2024-02,3542322.55,1718
110,2024-03,1692854.89,877


In [None]:
%%sql
SELECT
  EXTRACT(YEAR FROM orderdate) AS order_year,
  EXTRACT(MONTH FROM orderdate) AS order_month,
  SUM(quantity * netprice * exchangerate) AS total_revenue,
  COUNT(DISTINCT customerkey) AS unique_customers
FROM
  sales
GROUP BY
  order_year,
  order_month;

Unnamed: 0,order_year,order_month,total_revenue,unique_customers
0,2015,1,384092.66,200
1,2015,2,706374.12,291
2,2015,3,332961.59,139
3,2015,4,160767.00,78
4,2015,5,548632.63,236
...,...,...,...,...
107,2023,12,2928550.93,1484
108,2024,1,2677498.55,1340
109,2024,2,3542322.55,1718
110,2024,3,1692854.89,877


In [None]:
%%sql
SELECT
  CURRENT_DATE,
  orderdate
FROM
  sales
WHERE
  orderdate >= CURRENT_DATE - INTERVAL '5 years'
LIMIT 10;

Unnamed: 0,current_date,orderdate
0,2025-09-19,2020-09-19
1,2025-09-19,2020-09-19
2,2025-09-19,2020-09-19
3,2025-09-19,2020-09-19
4,2025-09-19,2020-09-19
5,2025-09-19,2020-09-19
6,2025-09-19,2020-09-19
7,2025-09-19,2020-09-19
8,2025-09-19,2020-09-19
9,2025-09-19,2020-09-19


In [None]:
%%sql
SELECT
  EXTRACT(YEAR FROM orderdate) AS order_year,
  ROUND(AVG(EXTRACT(DAY FROM AGE(deliverydate, orderdate))), 2) AS processing_time,
  SUM(quantity * netprice * exchangerate)::INTEGER AS total_revenue
FROM
  sales
GROUP BY
  order_year
ORDER BY
  order_year;

Unnamed: 0,order_year,processing_time,total_revenue
0,2015,1.1,7370979
1,2016,1.08,10383614
2,2017,0.83,13221339
3,2018,0.86,24667448
4,2019,0.81,31818096
5,2020,0.93,11218436
6,2021,1.36,21357977
7,2022,1.62,44864557
8,2023,1.75,33108566
9,2024,1.67,8396527


In [None]:
%%sql
SELECT
  customerkey AS customer,
  orderdate,
  AVG(quantity * netprice * exchangerate) OVER(PARTITION BY customerkey) AS avg_net_revenue_customer,

FROM
  sales
ORDER BY customerkey, orderdate
LIMIT 10;

Unnamed: 0,customer,orderdate,avg_net_revenue_customer
0,15,2021-03-08,2217.41
1,180,2018-07-28,836.74
2,180,2023-08-28,836.74
3,180,2023-08-28,836.74
4,185,2019-06-01,1395.52
5,243,2016-05-19,287.67
6,387,2018-12-21,517.32
7,387,2018-12-21,517.32
8,387,2018-12-21,517.32
9,387,2018-12-21,517.32


In [None]:
%%sql
SELECT
  orderdate,
  orderkey * 10 + linenumber AS order_line_number,
  (quantity * netprice * exchangerate) AS net_revenue,
  (quantity * netprice * exchangerate) * 100/ SUM((quantity * netprice * exchangerate)) OVER(PARTITION BY orderdate) AS percentage_of_daily_revenue
FROM
  sales
ORDER BY
  orderdate,
  percentage_of_daily_revenue DESC;

Unnamed: 0,orderdate,order_line_number,net_revenue,percentage_of_daily_revenue
0,2015-01-01,10043,2395.10,20.58
1,2015-01-01,10061,1552.32,13.34
2,2015-01-01,10022,1302.91,11.19
3,2015-01-01,10020,1146.75,9.85
4,2015-01-01,10050,975.16,8.38
...,...,...,...,...
199868,2024-04-20,33980141,12.00,0.01
199869,2024-04-20,33980074,9.29,0.01
199870,2024-04-20,33980080,8.35,0.01
199871,2024-04-20,33980142,8.34,0.01


In [None]:
%%sql

WITH yearly_cohort AS (
  SELECT DISTINCT
    customerkey,
    EXTRACT(YEAR FROM MIN(orderdate) OVER(PARTITION BY customerkey)) AS cohort_year
  FROM
    sales
)

SELECT
  y.cohort_year,
  EXTRACT (YEAR FROM orderdate) AS purchase_year,
  SUM(quantity * netprice * exchangerate) AS net_revenue
FROM
  sales s
LEFT JOIN
  yearly_cohort y
USING
  (customerkey)
GROUP BY
  y.cohort_year,
  purchase_year
LIMIT 10;

Unnamed: 0,cohort_year,purchase_year,net_revenue
0,2015,2015,7370979.48
1,2015,2016,392623.48
2,2015,2017,479841.31
3,2015,2018,1069850.87
4,2015,2019,1235991.48
5,2015,2020,386489.6
6,2015,2021,872845.99
7,2015,2022,1569787.72
8,2015,2023,1157633.91
9,2015,2024,356186.62


In [None]:
%%sql
WITH yearly_cohort AS (
  SELECT DISTINCT
    customerkey,
    EXTRACT(YEAR FROM MIN(orderdate) OVER (PARTITION BY customerkey)) AS cohort_year,
    EXTRACT(YEAR FROM orderdate) AS purchase_year
  FROM
    sales
)

SELECT  DISTINCT
  cohort_year,
  purchase_year,
  COUNT(customerkey) OVER (PARTITION BY cohort_year, purchase_year) AS num_customers
FROM
  yearly_cohort
ORDER BY
  cohort_year, purchase_year;

Unnamed: 0,cohort_year,purchase_year,num_customers
0,2015,2015,2825
1,2015,2016,126
2,2015,2017,149
3,2015,2018,348
4,2015,2019,388
5,2015,2020,171
6,2015,2021,295
7,2015,2022,600
8,2015,2023,499
9,2015,2024,146


In [3]:
%%sql
WITH customer_ltv_cohort AS (
  SELECT
    customerkey,
    EXTRACT(YEAR FROM MIN(orderdate)) AS cohort_year,
    SUM(quantity * netprice * exchangerate) AS customer_ltv
  FROM
    sales
  GROUP BY
    customerkey
)

SELECT
  *,
  AVG(customer_ltv) OVER (PARTITION BY cohort_year) AS avg_customer_ltv
FROM
  customer_ltv_cohort
LIMIT 10;

Unnamed: 0,customerkey,cohort_year,customer_ltv,avg_customer_ltv
0,1926957,2015,1180.38,5271.59
1,547171,2015,2119.69,5271.59
2,909571,2015,2199.26,5271.59
3,1782482,2015,1038.3,5271.59
4,1546041,2015,3226.97,5271.59
5,894672,2015,2383.76,5271.59
6,994575,2015,3796.99,5271.59
7,319567,2015,5007.93,5271.59
8,732791,2015,439.73,5271.59
9,1488502,2015,756.42,5271.59


In [None]:
%%sql
SELECT
  customerkey,
  orderdate,
  (quantity * netprice * exchangerate) AS net_revenue,
  COUNT(*) OVER (PARTITION BY customerkey ORDER BY orderdate) AS running_order_count,
  AVG(quantity * netprice * exchangerate) OVER (PARTITION BY customerkey ORDER BY orderdate) AS running_avg_revenue
FROM
  sales;

Unnamed: 0,customerkey,orderdate,net_revenue,running_order_count,running_avg_revenue
0,15,2021-03-08,2217.41,1,2217.41
1,180,2018-07-28,525.31,1,525.31
2,180,2023-08-28,71.36,3,836.74
3,180,2023-08-28,1913.55,3,836.74
4,185,2019-06-01,1395.52,1,1395.52
...,...,...,...,...,...
199868,2099711,2016-08-13,2067.75,1,2067.75
199869,2099711,2017-08-14,3940.92,2,3004.34
199870,2099743,2022-03-17,375.57,2,234.81
199871,2099743,2022-03-17,94.05,2,234.81


In [None]:
%%sql
SELECT
  customerkey,
  COUNT(*) AS order_count,
  ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) AS row_num,
  RANK() OVER (ORDER BY COUNT(*) DESC) AS rank,
  DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) AS dense_rank
FROM
  sales
GROUP BY
  	customerkey
LIMIT 10;

Unnamed: 0,customerkey,order_count,row_num,rank,dense_rank
0,1834524,31,1,1,1
1,1375597,30,2,2,2
2,249557,27,3,3,3
3,459519,26,4,4,4
4,1495941,26,5,4,4
5,1801215,26,6,4,4
6,1219056,25,7,7,5
7,759419,24,8,8,6
8,1427444,24,9,8,6
9,1876222,24,10,8,6


In [None]:
%%sql
WITH monthly_revenue AS (
  SELECT
    TO_CHAR(orderdate, 'YYYY-MM') AS order_month,
    SUM(quantity * netprice * exchangerate) AS net_revenue
  FROM
    sales
  WHERE
    EXTRACT(YEAR FROM orderdate) = 2023
  GROUP BY
    order_month
  ORDER BY
    order_month
)

SELECT
  *,
  FIRST_VALUE(net_revenue) OVER (ORDER BY order_month) AS first_month_revenue,
  LAST_VALUE(net_revenue) OVER (ORDER BY order_month ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS last_month_revenue,
  NTH_VALUE(net_revenue, 3) OVER (ORDER BY order_month ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS third_month_revenue,
  LAG(net_revenue) OVER (ORDER BY order_month) AS previous_month_revenue,
  LEAD(net_revenue) OVER (ORDER BY order_month) AS next_month_revenue
FROM
  monthly_revenue;

Unnamed: 0,order_month,net_revenue,first_month_revenue,last_month_revenue,third_month_revenue,previous_month_revenue,next_month_revenue
0,2023-01,3664431.34,3664431.34,2928550.93,2244316.52,,4465204.57
1,2023-02,4465204.57,3664431.34,2928550.93,2244316.52,3664431.34,2244316.52
2,2023-03,2244316.52,3664431.34,2928550.93,2244316.52,4465204.57,1162796.16
3,2023-04,1162796.16,3664431.34,2928550.93,2244316.52,2244316.52,2943005.99
4,2023-05,2943005.99,3664431.34,2928550.93,2244316.52,1162796.16,2864500.03
5,2023-06,2864500.03,3664431.34,2928550.93,2244316.52,2943005.99,2337639.34
6,2023-07,2337639.34,3664431.34,2928550.93,2244316.52,2864500.03,2623919.79
7,2023-08,2623919.79,3664431.34,2928550.93,2244316.52,2337639.34,2622774.85
8,2023-09,2622774.85,3664431.34,2928550.93,2244316.52,2623919.79,2551322.61
9,2023-10,2551322.61,3664431.34,2928550.93,2244316.52,2622774.85,2700103.38


In [9]:
%%sql
WITH yearly_cohort AS (
  SELECT
    customerkey,
    EXTRACT(YEAR FROM MIN(orderdate)) AS cohort_year,
    SUM(quantity * netprice * exchangerate) AS customer_ltv
  FROM
    sales
  GROUP BY
    customerkey
), cohort_summary AS (
    SELECT
      cohort_year,
      customerkey,
      customer_ltv,
      AVG(customer_ltv) OVER (PARTITION BY cohort_year) AS avg_cohort_ltv
    FROM yearly_cohort
    ORDER BY
      cohort_year,
      customerkey
), cohort_final AS (
    SELECT DISTINCT
      cohort_year,
      avg_cohort_ltv
    FROM cohort_summary
    ORDER BY
      cohort_year
), cohort_ltv_perc AS (
    SELECT
      *,
      LAG(avg_cohort_ltv) OVER(ORDER BY cohort_year) AS prev_cohort_ltv
    FROM
      cohort_final
)

SELECT
  *,
  (avg_cohort_ltv - prev_cohort_ltv) * 100/prev_cohort_ltv AS ltv_growth
FROM
  cohort_ltv_perc;

Unnamed: 0,cohort_year,avg_cohort_ltv,prev_cohort_ltv,ltv_growth
0,2015,5271.59,,
1,2016,5404.92,5271.59,2.53
2,2017,5403.08,5404.92,-0.03
3,2018,4896.64,5403.08,-9.37
4,2019,4731.95,4896.64,-3.36
5,2020,3933.32,4731.95,-16.88
6,2021,3943.33,3933.32,0.25
7,2022,3315.52,3943.33,-15.92
8,2023,2543.18,3315.52,-23.29
9,2024,2037.55,2543.18,-19.88


In [18]:
%%sql
WITH monthly_sales AS (
  SELECT
    TO_CHAR(orderdate, 'YYYY-MM') AS month,
    SUM(quantity * netprice * exchangerate) AS net_revenue
  FROM
    sales
  WHERE
    EXTRACT(YEAR FROM orderdate) = 2023
  GROUP BY month
  ORDER BY month
)
SELECT
  *,
  AVG(net_revenue) OVER (ORDER BY month ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS net_revenue_current
FROM monthly_sales

Unnamed: 0,month,net_revenue,net_revenue_current
0,2023-01,3664431.34,4064817.96
1,2023-02,4465204.57,3457984.14
2,2023-03,2244316.52,2624105.75
3,2023-04,1162796.16,2116706.22
4,2023-05,2943005.99,2323434.06
5,2023-06,2864500.03,2715048.45
6,2023-07,2337639.34,2608686.39
7,2023-08,2623919.79,2528111.33
8,2023-09,2622774.85,2599339.08
9,2023-10,2551322.61,2624733.61
