# Chapter 8: Grouping and Aggregates


In [1]:
import os

from dotenv import load_dotenv
from sqlalchemy import create_engine, URL, select, func
from sqlalchemy.orm import Session
import pandas as pd

from utils import print_sql_statement


load_dotenv()

url_object = URL.create(
    os.environ["DB_ENGINE"],
    username=os.environ["DB_USER"],
    password=os.environ["DB_PASSWD"],
    host=os.environ["DB_HOST"],
    database=os.environ["DB_NAME"],
)

engine = create_engine(url_object)

## Grouping by expressions

Count the number of employees hired by branch and year

In [2]:
from collections import Counter

from sqlalchemy import literal

from model import Branch, Employee


with Session(engine) as session:

    # Using a raw query
    df = pd.read_sql_query(
        """
        SELECT
            b.name branch
            , EXTRACT(YEAR FROM e.start_date) year
            , COUNT(*) how_many
        FROM employee
        e JOIN branch b ON e.assigned_branch_id = b.branch_id
        GROUP BY EXTRACT(YEAR FROM e.start_date), branch
        ORDER BY EXTRACT(YEAR FROM e.start_date) ASC
        """,
        con=session.connection()
    )

    # Using SQLAlchemy functions
    statement = (
        select(
            Branch.name.label("branch"),
            func.extract("YEAR", Employee.start_date).label("year"),
            func.count(literal("*")).label("how_many")
        )
        .select_from(Employee)
        .join(
            Branch,
            Branch.branch_id == Employee.assigned_branch_id
        )
        .group_by(
            func.extract("YEAR", Employee.start_date),
            Branch.name
        )
        .order_by(
            func.extract("YEAR", Employee.start_date),
            Branch.name
        )
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

    # Using the SQLalchemy ORM relationships with native Python
    new_results = sorted(
        [
            # Each row is name, year, count
            (*key, count) for (key, count) in
            # Count the group (name, year)
            Counter(
                (emp.employee_branch.name, emp.start_date.year)
                for emp in session.query(Employee)
            )
            .items()
        ],
        # Order by year then name
        key=lambda tup: tuple(reversed(tup[:2]))
    )

print(df)
print(results)
assert results == new_results

"""SELECT branch.name AS branch, EXTRACT(YEAR FROM employee.start_date) AS year, count(:param_1) AS how_many 
FROM employee JOIN branch ON branch.branch_id = employee.assigned_branch_id GROUP BY EXTRACT(YEAR FROM employee.start_date), branch.name ORDER BY EXTRACT(YEAR FROM employee.start_date), branch.name"""
           branch  year  how_many
0    Headquarters  2000         1
1   Quincy Branch  2000         1
2   Woburn Branch  2000         1
3    Headquarters  2001         1
4   So. NH Branch  2001         1
5    Headquarters  2002         4
6   Quincy Branch  2002         1
7   So. NH Branch  2002         2
8   Woburn Branch  2002         1
9    Headquarters  2003         1
10  Quincy Branch  2003         1
11  Woburn Branch  2003         1
12   Headquarters  2004         2
[('Headquarters', 2000, 1), ('Quincy Branch', 2000, 1), ('Woburn Branch', 2000, 1), ('Headquarters', 2001, 1), ('So. NH Branch', 2001, 1), ('Headquarters', 2002, 4), ('Quincy Branch', 2002, 1), ('So. NH Branch', 2

# Rollup

Find the total balance of every product per opening branch and the sum total of the product itself.

Using the rollup functionality, a column can be aggregated per grouping.

In [20]:
from typing import Final

from model import Account, Branch


with Session(engine) as session:

    df = pd.read_sql_query(
        """
        SELECT
            a.product_cd
            , b.name
            , SUM(a.avail_balance) tot_balance
        FROM
            account
        a JOIN branch b ON a.open_branch_id = b.branch_id
        GROUP BY a.product_cd, b.name WITH ROLLUP;
        """,
        con=session.connection()
    )

    statement = (
        select(
            Account.product_cd,
            Branch.name,
            func.sum(Account.avail_balance).label("tot_balance")
        )
        .select_from(Account)
        .join(Branch, Account.open_branch_id == Branch.branch_id)
        .group_by(
            func.rollup(
                Account.product_cd,
                Branch.name
            )
        )
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

    # Using the SQLalchemy ORM relationships with native Python
    key_all_products_rollup: Final[tuple] = (None, None)
    total_balance_data: dict[tuple, float] = {
        key_all_products_rollup: 0.
    }
    open_branch: Branch
    for acct in session.query(Account):
        open_branch = acct.account_open_branch
        product_cd = acct.product_cd
        key = (product_cd, open_branch.name)
        key_rollup = (product_cd, None)
        if key not in total_balance_data:
            total_balance_data[key] = 0.
        if key_rollup not in total_balance_data:
            total_balance_data[key_rollup] = 0.
        total_balance_data[key] += acct.avail_balance
        total_balance_data[key_rollup] += acct.avail_balance
        total_balance_data[key_all_products_rollup] += acct.avail_balance
    new_results = set(
        (*key, round(value, 2))
        for (key, value) in total_balance_data.items()
    )

print(df)
print(results)
assert set(results) == new_results

"""SELECT account.product_cd, branch.name, sum(account.avail_balance) AS tot_balance 
FROM account JOIN branch ON account.open_branch_id = branch.branch_id GROUP BY ROLLUP(account.product_cd, branch.name)"""
   product_cd           name  tot_balance
0         BUS  So. NH Branch         0.00
1         BUS  Woburn Branch      9345.55
2         BUS           None      9345.55
3          CD   Headquarters     11500.00
4          CD  Woburn Branch      8000.00
5          CD           None     19500.00
6         CHK   Headquarters       782.16
7         CHK  Quincy Branch      1057.75
8         CHK  So. NH Branch     67852.33
9         CHK  Woburn Branch      3315.77
10        CHK           None     73008.01
11         MM   Headquarters     14832.64
12         MM  Quincy Branch      2212.50
13         MM           None     17045.14
14        SAV   Headquarters       767.77
15        SAV  So. NH Branch       387.99
16        SAV  Woburn Branch       700.00
17        SAV           None      18