In [1]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Update package installer
    !sudo apt-get update -qq > /dev/null 2>&1

    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

###INTERVAL
####Net Revenue Last 5 Years

In [2]:
%%sql

SELECT
  CURRENT_DATE,
  s.orderdate,
  p.categoryname,
  SUM(quantity * netprice * exchangerate) AS net_revenue
FROM
  sales s
LEFT JOIN
  product p ON s.productkey = p.productkey
WHERE
  orderdate >= CURRENT_DATE - INTERVAL '5 years'
GROUP BY
  s.orderdate,
  p.categoryname
ORDER BY
  s.orderdate,
  p.categoryname

Unnamed: 0,current_date,orderdate,categoryname,net_revenue
0,2026-01-15,2021-01-15,Audio,668.11
1,2026-01-15,2021-01-15,Cameras and camcorders,1847.82
2,2026-01-15,2021-01-15,Computers,25203.64
3,2026-01-15,2021-01-15,Home Appliances,9561.31
4,2026-01-15,2021-01-15,"Music, Movies and Audio Books",253.07
...,...,...,...,...
8869,2026-01-15,2024-04-20,Computers,58353.68
8870,2026-01-15,2024-04-20,Games and Toys,1744.30
8871,2026-01-15,2024-04-20,Home Appliances,1562.04
8872,2026-01-15,2024-04-20,"Music, Movies and Audio Books",4949.43


###AGE() and EXTRACT()
####Average Processing Time

In [11]:
%%sql

SELECT
  DATE_PART('year', orderdate) AS order_year,
  ROUND(AVG(EXTRACT (DAYS FROM AGE(deliverydate, orderdate))), 2) AS avg_processing_time,
  CAST(SUM(quantity * netprice * exchangerate) AS INTEGER) AS net_revenue
FROM
  sales
WHERE
  orderdate >= CURRENT_DATE - INTERVAL '5 years'
GROUP BY
  order_year
ORDER BY
  order_year

Unnamed: 0,order_year,avg_processing_time,net_revenue
0,2021.0,1.36,21109906
1,2022.0,1.62,44864557
2,2023.0,1.75,33108566
3,2024.0,1.67,8396527
