In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sqlite3

conn = sqlite3.connect('example.db')


def rsq(query: str) -> pd.DataFrame:
    return pd.read_sql_query(query, conn)


In [2]:
rsq("""SELECT OrderID, TerritoryName,
       -- Total price using the partition
       SUM(OrderPrice)
       -- Create the window and partitions
       OVER(PARTITION BY TerritoryName) AS TotalPrice
FROM Orders""")

Unnamed: 0,Orderid,territoryName,TotalPrice
0,43706,Australia,1469.0
1,43722,Australia,1469.0
2,43729,Australia,1469.0
3,47622,Australia,1469.0
4,47722,Australia,1469.0
...,...,...,...
194,43697,United Kingdom,645.0
195,47688,United Kingdom,645.0
196,48629,United Kingdom,645.0
197,50374,United Kingdom,645.0


In [3]:
rsq("""SELECT OrderID, TerritoryName,
       -- Number of rows per partition
       COUNT(*)
       -- Create the window and partitions
       OVER(PARTITION BY TerritoryName) AS TotalOrders
FROM Orders""")

Unnamed: 0,Orderid,territoryName,TotalOrders
0,43706,Australia,13
1,43722,Australia,13
2,43729,Australia,13
3,47622,Australia,13
4,47722,Australia,13
...,...,...,...
194,43697,United Kingdom,6
195,47688,United Kingdom,6
196,48629,United Kingdom,6
197,50374,United Kingdom,6


In [4]:
rsq("""SELECT TerritoryName, OrderDate,
       -- Select the first value in each partition
       FIRST_VALUE(OrderDate)
       -- Create the partitions and arrange the rows
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS FirstOrder
FROM Orders""")

Unnamed: 0,territoryName,OrderDate,FirstOrder
0,Australia,2015-02-23T09:00:00Z,2015-02-23T09:00:00Z
1,Australia,2015-02-23T11:00:00Z,2015-02-23T09:00:00Z
2,Australia,2015-02-23T12:00:00Z,2015-02-23T09:00:00Z
3,Australia,2015-04-23T02:00:00Z,2015-02-23T09:00:00Z
4,Australia,2015-04-24T02:00:00Z,2015-02-23T09:00:00Z
...,...,...,...
194,United Kingdom,2015-02-23T07:00:00Z,2015-02-19T02:00:00Z
195,United Kingdom,2015-04-23T12:00:00Z,2015-02-19T02:00:00Z
196,United Kingdom,2015-05-07T06:00:00Z,2015-02-19T02:00:00Z
197,United Kingdom,2015-06-03T08:00:00Z,2015-02-19T02:00:00Z


In [5]:
rsq("""SELECT TerritoryName, OrderDate,
       -- Specify the previous OrderDate in the window
       LAG(OrderDate)
       -- Over the window, partition by territory & order by order date
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS PreviousOrder,
       -- Specify the next OrderDate in the window
       LEAD(OrderDate)
       -- Create the partitions and arrange the rows
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS NextOrder
FROM Orders""")

Unnamed: 0,territoryName,OrderDate,PreviousOrder,NextOrder
0,Australia,2015-02-23T09:00:00Z,,2015-02-23T11:00:00Z
1,Australia,2015-02-23T11:00:00Z,2015-02-23T09:00:00Z,2015-02-23T12:00:00Z
2,Australia,2015-02-23T12:00:00Z,2015-02-23T11:00:00Z,2015-04-23T02:00:00Z
3,Australia,2015-04-23T02:00:00Z,2015-02-23T12:00:00Z,2015-04-24T02:00:00Z
4,Australia,2015-04-24T02:00:00Z,2015-04-23T02:00:00Z,2015-05-06T03:00:00Z
...,...,...,...,...
194,United Kingdom,2015-02-23T07:00:00Z,2015-02-19T02:00:00Z,2015-04-23T12:00:00Z
195,United Kingdom,2015-04-23T12:00:00Z,2015-02-23T07:00:00Z,2015-05-07T06:00:00Z
196,United Kingdom,2015-05-07T06:00:00Z,2015-04-23T12:00:00Z,2015-06-03T08:00:00Z
197,United Kingdom,2015-06-03T08:00:00Z,2015-05-07T06:00:00Z,2015-07-21T02:00:00Z


In [6]:
rsq("""SELECT TerritoryName, OrderDate,
       -- Create a running total
       SUM(OrderPrice)
       -- Create the partitions and arrange the rows
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS TerritoryTotal	  
FROM Orders""")

Unnamed: 0,territoryName,OrderDate,TerritoryTotal
0,Australia,2015-02-23T09:00:00Z,48.0
1,Australia,2015-02-23T11:00:00Z,83.0
2,Australia,2015-02-23T12:00:00Z,313.0
3,Australia,2015-04-23T02:00:00Z,543.0
4,Australia,2015-04-24T02:00:00Z,568.0
...,...,...,...
194,United Kingdom,2015-02-23T07:00:00Z,377.0
195,United Kingdom,2015-04-23T12:00:00Z,464.0
196,United Kingdom,2015-05-07T06:00:00Z,563.0
197,United Kingdom,2015-06-03T08:00:00Z,595.0


In [7]:
rsq("""SELECT TerritoryName, OrderDate,
       -- Assign a row number
       ROW_NUMBER()
       -- Create the partitions and arrange the rows
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS OrderCount
FROM Orders""")

Unnamed: 0,territoryName,OrderDate,OrderCount
0,Australia,2015-02-23T09:00:00Z,1
1,Australia,2015-02-23T11:00:00Z,2
2,Australia,2015-02-23T12:00:00Z,3
3,Australia,2015-04-23T02:00:00Z,4
4,Australia,2015-04-24T02:00:00Z,5
...,...,...,...
194,United Kingdom,2015-02-23T07:00:00Z,2
195,United Kingdom,2015-04-23T12:00:00Z,3
196,United Kingdom,2015-05-07T06:00:00Z,4
197,United Kingdom,2015-06-03T08:00:00Z,5


In [None]:
rsq("""SELECT OrderDate, TerritoryName,
       -- Calculate the standard deviation
	   STDEV(OrderPrice)
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS StdDevPrice	  
FROM Orders""")

In [14]:
rsq("""SELECT OrderDate, TerritoryName,
       -- Calculate the standard deviation
	   STDEV(OrderPrice)
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS StdDevPrice	  
FROM Orders""")

DatabaseError: Execution failed on sql 'SELECT OrderDate, TerritoryName,
       -- Calculate the standard deviation
	   STDEV(OrderPrice)
       OVER(PARTITION BY TerritoryName ORDER BY OrderDate) AS StdDevPrice	  
FROM Orders': no such function: STDEV

In [15]:
rsq("""-- Create a CTE Called ModePrice which contains two columns
WITH ModePrice (OrderPrice, UnitPriceFrequency)
AS
(
	SELECT OrderPrice,
	ROW_NUMBER()
	OVER(PARTITION BY OrderPrice ORDER BY OrderPrice) AS UnitPriceFrequency
	FROM Orders
)

-- Select everything from the CTE
SELECT *
FROM ModePrice""")

Unnamed: 0,OrderPrice,UnitPriceFrequency
0,3.5,1
1,3.5,2
2,3.7,1
3,3.7,2
4,4.1,1
...,...,...
194,285.0,2
195,285.0,3
196,345.0,1
197,345.0,2


In [16]:
rsq("""-- CTE from the previous exercise
WITH ModePrice (OrderPrice, UnitPriceFrequency)
AS
(
	SELECT OrderPrice,
	ROW_NUMBER()
    OVER (PARTITION BY OrderPrice ORDER BY OrderPrice) AS UnitPriceFrequency
	FROM Orders
)

-- Select the order price from the CTE
SELECT OrderPrice AS ModeOrderPrice
FROM ModePrice
-- Select the maximum UnitPriceFrequency from the CTE
WHERE UnitPriceFrequency IN (SELECT MAX(UnitPriceFrequency) From ModePrice)""")

Unnamed: 0,ModeOrderPrice
0,32.0
