# Chapter 8: Grouping and Aggregates


In [4]:
import os

from dotenv import load_dotenv
from sqlalchemy import create_engine, URL, select, func
from sqlalchemy.orm import Session
import pandas as pd

from utils import print_sql_statement


load_dotenv()

url_object = URL.create(
    os.environ["DB_ENGINE"],
    username=os.environ["DB_USER"],
    password=os.environ["DB_PASSWD"],
    host=os.environ["DB_HOST"],
    database=os.environ["DB_NAME"],
)

engine = create_engine(url_object)

## Grouping by expressions

Count the number of employees hired by branch and year

In [23]:
from collections import Counter

from sqlalchemy import literal

from model import Branch, Employee


with Session(engine) as session:

    # Using a raw query
    df = pd.read_sql_query(
        """
        SELECT
            b.name branch
            , EXTRACT(YEAR FROM e.start_date) year
            , COUNT(*) how_many
        FROM employee
        e JOIN branch b ON e.assigned_branch_id = b.branch_id
        GROUP BY EXTRACT(YEAR FROM e.start_date), branch
        ORDER BY EXTRACT(YEAR FROM e.start_date) ASC
        """,
        con=session.connection()
    )

    # Using SQLAlchemy functions
    statement = (
        select(
            Branch.name.label("branch"),
            func.extract("YEAR", Employee.start_date).label("year"),
            func.count(literal("*")).label("how_many")
        )
        .select_from(Employee)
        .join(
            Branch,
            Branch.branch_id == Employee.assigned_branch_id
        )
        .group_by(
            func.extract("YEAR", Employee.start_date),
            Branch.name
        )
        .order_by(
            func.extract("YEAR", Employee.start_date),
            Branch.name
        )
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

    # Using the SQLalchemy ORM relationships with native Python
    new_results = sorted(
        [
            # Each row is name, year, count
            (*key, count) for (key, count) in
            # Count the group (name, year)
            Counter(
                (emp.employee_branch.name, emp.start_date.year)
                for emp in session.query(Employee)
            )
            .items()
        ],
        # Order by year then name
        key=lambda tup: tuple(reversed(tup[:2]))
    )

print(df)
print(results)
assert results == new_results

"""SELECT branch.name AS branch, EXTRACT(YEAR FROM employee.start_date) AS year, count(:param_1) AS how_many 
FROM employee JOIN branch ON branch.branch_id = employee.assigned_branch_id GROUP BY EXTRACT(YEAR FROM employee.start_date), branch.name ORDER BY EXTRACT(YEAR FROM employee.start_date), branch.name"""
           branch  year  how_many
0    Headquarters  2000         1
1   Quincy Branch  2000         1
2   Woburn Branch  2000         1
3    Headquarters  2001         1
4   So. NH Branch  2001         1
5    Headquarters  2002         4
6   Quincy Branch  2002         1
7   So. NH Branch  2002         2
8   Woburn Branch  2002         1
9    Headquarters  2003         1
10  Quincy Branch  2003         1
11  Woburn Branch  2003         1
12   Headquarters  2004         2
[('Headquarters', 2000, 1), ('Quincy Branch', 2000, 1), ('Woburn Branch', 2000, 1), ('Headquarters', 2001, 1), ('So. NH Branch', 2001, 1), ('Headquarters', 2002, 4), ('Quincy Branch', 2002, 1), ('So. NH Branch', 2