In [1]:
import sqlite3
import pandas as pd

In [2]:
pd.options.display.max_columns = None
#pd.options.display.max_rows = None

In [3]:
conn = sqlite3.connect('parch-and-posey.db')

In [4]:
cursor = conn.cursor()
cursor.execute('''
select * from sqlite_master where type = "table";
''')
columns = [col[0] for col in cursor.description]
data = cursor.fetchall()
cursor.close()

In [5]:
pd.DataFrame(data, columns=columns)

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,web_events,web_events,2,"CREATE TABLE web_events (\tid integer,\taccoun..."
1,table,sales_reps,sales_reps,7,"CREATE TABLE sales_reps (\tid integer,\tname b..."
2,table,region,region,222,"CREATE TABLE region (\tid integer,\tname bpchar)"
3,table,orders,orders,223,"CREATE TABLE orders (\tid integer,\taccount_id..."
4,table,accounts,accounts,583,"CREATE TABLE accounts (\tid integer,\tname bpc..."


PostgreSQL’s documentation does an excellent job of introducing the concept of Window Functions:
https://www.postgresql.org/docs/9.1/tutorial-window.html
a window function performs a calculation across a set of table rows that are somehow related to the current row. This is comparable to the type of calculation that can be done with an aggregate function. But unlike regular aggregate functions, use of a window function does not cause rows to become grouped into a single output row — the rows retain their separate identities. Behind the scenes, the window function is able to access more than just the current row of the query result.

Through introducing window functions, we have also introduced two statements that you may not be familiar with: OVER and PARTITION BY. These are key to window functions. Not every window function uses PARTITION BY; we can also use ORDER BY or no statement at all depending on the query we want to run. You will practice using these clauses in the upcoming quizzes. If you want more details right now, this resource from Pinal Dave https://blog.sqlauthority.com/2015/11/04/sql-server-what-is-the-over-clause-notes-from-the-field-101/ is helpful.

Note: You can’t use window functions and standard aggregations in the same query. More specifically, you can’t include window functions in a GROUP BY clause.

In [22]:
pd.read_sql_query(sql='''
SELECT occurred_at,standard_qty,
SUM(standard_qty) OVER (ORDER BY occurred_at) running_total
FROM orders;
''', con=conn)

Unnamed: 0,occurred_at,standard_qty,running_total
0,2013-12-04 04:22:44,0,0
1,2013-12-04 04:45:54,490,490
2,2013-12-04 04:53:25,528,1018
3,2013-12-05 20:29:16,0,1018
4,2013-12-05 20:33:56,492,1510
...,...,...,...
6907,2017-01-01 21:04:25,0,1937478
6908,2017-01-01 22:17:26,497,1937975
6909,2017-01-01 22:29:50,38,1938013
6910,2017-01-01 23:50:16,291,1938304


In [23]:
pd.read_sql_query(sql='''
SELECT strftime('%Y-%m', occurred_at) month, standard_qty,
SUM(standard_qty) OVER (PARTITION BY strftime('%Y-%m', occurred_at) ORDER BY occurred_at) running_total
FROM orders;
''', con=conn)[:102]

Unnamed: 0,month,standard_qty,running_total
0,2013-12,0,0
1,2013-12,490,490
2,2013-12,528,1018
3,2013-12,0,1018
4,2013-12,492,1510
...,...,...,...
97,2013-12,119,26069
98,2013-12,485,26554
99,2014-01,515,515
100,2014-01,0,515


In [25]:
# without order by are simple sums

pd.read_sql_query(sql='''
SELECT strftime('%Y-%m', occurred_at) month, standard_qty,
SUM(standard_qty) OVER (PARTITION BY strftime('%Y-%m', occurred_at)) running_total
FROM orders;
''', con=conn)[:102]

Unnamed: 0,month,standard_qty,running_total
0,2013-12,339,26554
1,2013-12,86,26554
2,2013-12,492,26554
3,2013-12,85,26554
4,2013-12,299,26554
...,...,...,...
97,2013-12,0,26554
98,2013-12,43,26554
99,2014-01,511,26744
100,2014-01,126,26744


In [32]:
# without order by are simple sums

pd.read_sql_query(sql='''
SELECT DISTINCT month, running_total
FROM(SELECT strftime('%Y-%m', occurred_at) month,
SUM(standard_qty) OVER (PARTITION BY strftime('%Y-%m', occurred_at)) running_total
FROM orders) t1;
''', con=conn)

Unnamed: 0,month,running_total
0,2013-12,26554
1,2014-01,26744
2,2014-02,28459
3,2014-03,27165
4,2014-04,26390
5,2014-05,28772
6,2014-06,28240
7,2014-07,29875
8,2014-08,30142
9,2014-09,28562


In [30]:
pd.read_sql_query(sql='''
SELECT strftime('%Y-%m', occurred_at) month, SUM(standard_qty)
FROM orders
GROUP BY 1;
''', con=conn)

Unnamed: 0,month,SUM(standard_qty)
0,2013-12,26554
1,2014-01,26744
2,2014-02,28459
3,2014-03,27165
4,2014-04,26390
5,2014-05,28772
6,2014-06,28240
7,2014-07,29875
8,2014-08,30142
9,2014-09,28562
