# Chapter 5: Querying Multiple Tables

As seen in the previous chapters, joining tables is often or primarily required for information gathering

In [1]:
import os

from dotenv import load_dotenv
from sqlalchemy import create_engine, URL, select, func
from sqlalchemy.orm import Session
import pandas as pd

from utils import print_sql_statement


load_dotenv()

url_object = URL.create(
    os.environ["DB_ENGINE"],
    username=os.environ["DB_USER"],
    password=os.environ["DB_PASSWD"],
    host=os.environ["DB_HOST"],
    database=os.environ["DB_NAME"],
)

engine = create_engine(url_object)

# Joining Three Tables

Find all the accounts opened by 'experienced' (hired prior to 2003) tellers currently assigned to the 'Woburn Branch'

This is a little complicated, but the logic is
    * Get all accounts
    * Join all accounts with all the 'experienced' tellers
    * Join all experienced tellers at the Woburn branch

In [2]:
from datetime import date

from sqlalchemy import and_
from sqlalchemy.orm import aliased

from model import Account, Branch, Employee


with Session(engine) as session:
    df = pd.read_sql_query(
        """
        SELECT
            a.account_id
            , a.cust_id
            , a.open_date
            , a.product_cd
        FROM
            account a
        -- ASSOICATE ALL EXPERIENCED TELLERS WITH ACCOUNTS
        JOIN (
            SELECT
                emp_id
                , assigned_branch_id
            FROM
                employee
            WHERE
                employee.start_date <= '2003-01-01'
                AND
                employee.title LIKE '%Teller%'
        ) e ON e.emp_id = a.open_emp_id
        -- ASSOCIATED ALL EMPLOYEES AT THE WOBURN BRANCH
        JOIN (
            SELECT
                branch_id
            FROM
                branch
            WHERE
                branch.name = 'Woburn Branch'
        ) b ON b.branch_id = e.assigned_branch_id
        ;
        """,
        con=session.connection()
    )

    # Get the employee sub-query
    employee_subquery = (
        select(
            Employee.emp_id,
            Employee.assigned_branch_id
        )
        .select_from(Employee)
        .where(
            and_(
                Employee.title.like("%Teller%"),
                Employee.start_date <= date(2003, 1, 1)
            )
        )
        .subquery("e")
    )
    superior_employee_alias: Employee = aliased(Employee, employee_subquery)

    # Get the branch sub-query
    branch_subquery = (
        select(
            Branch.branch_id
        )
        .select_from(Branch)
        .where(Branch.name == "Woburn Branch")
        .subquery("b")
    )
    branch_alias: Branch = aliased(Branch, branch_subquery)

    statement = (
        # Get all accounts
        select(
            Account.account_id,
            Account.cust_id,
            Account.open_date,
            Account.product_cd
        )
        .select_from(Account)
        # Associate all accounts with the experienced teller
        .join(
            employee_subquery,
            Account.open_emp_id == superior_employee_alias.emp_id
        )
        # Associated all the tellers at the Woburn Branch
        .join(
            branch_subquery,
            branch_alias.branch_id == superior_employee_alias.assigned_branch_id
        )
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

print(df)
print(results)

"""SELECT account.account_id, account.cust_id, account.open_date, account.product_cd 
FROM account JOIN (SELECT employee.emp_id AS emp_id, employee.assigned_branch_id AS assigned_branch_id 
FROM employee 
WHERE employee.title LIKE :title_1 AND employee.start_date <= :start_date_1) AS e ON account.open_emp_id = e.emp_id JOIN (SELECT branch.branch_id AS branch_id 
FROM branch 
WHERE branch.name = :name_1) AS b ON b.branch_id = e.assigned_branch_id"""
   account_id  cust_id   open_date product_cd
0           1        1  2000-01-15        CHK
1           2        1  2000-01-15        SAV
2           3        1  2004-06-30         CD
3           4        2  2001-03-12        CHK
4           5        2  2001-03-12        SAV
5          17        7  2004-01-12         CD
6          27       11  2004-03-22        BUS
[(1, 1, datetime.date(2000, 1, 15), 'CHK'), (2, 1, datetime.date(2000, 1, 15), 'SAV'), (3, 1, datetime.date(2004, 6, 30), 'CD'), (4, 2, datetime.date(2001, 3, 12), 'CHK'), (5, 2, 

# Repeated Table Joins

List all checking (product_cd == 'CHK') account IDs with the account opening branch, opening employee, and the opening employee's currently assigned branch.

The branch ID is a foreign key in both employee and account. But we need to use the branch table in both. So we need to use two different aliases for the branch table.

In [42]:

with Session(engine) as session:
    df = pd.read_sql_query(
        """
        SELECT
            a.account_id account_id
            , e.emp_id emp_id
            , b_a.name open_branch
            , b_e.name emp_branch
        FROM
            account
        a JOIN (
            SELECT
                branch_id
                , name
            FROM branch
        )
        b_a ON b_a.branch_id = a.open_branch_id
        JOIN (
            SELECT
                emp_id
                , assigned_branch_id
            FROM
                employee
        )
        e ON e.emp_id = a.open_emp_id
        JOIN (
            SELECT
                branch_id
                , name
            FROM
                branch
        )
        b_e ON b_e.branch_id = e.assigned_branch_id
        WHERE
            a.product_cd = 'CHK'
        ORDER BY e.emp_id
        ;
        """,
        con=session.connection()
    )

    employee_subquery = (
        select(
            Employee.emp_id,
            Employee.assigned_branch_id
        )
        .select_from(Employee)
    ).subquery("e")
    superior_employee_alias: Employee = aliased(Employee, employee_subquery)

    branch_for_account_subquery = (
        select(
            Branch.branch_id,
            Branch.name
        )
        .select_from(Branch)
    ).subquery("b_a")
    branch_for_account_alias: Branch = aliased(Branch, branch_for_account_subquery)

    branch_for_employee_subquery = (
        select(
            Branch.branch_id,
            Branch.name
        )
        .select_from(Branch)
    ).subquery("b_e")
    branch_for_employee_alias: Branch = aliased(Branch, branch_for_employee_subquery)

    statement = (
        select(
            Account.account_id.label("account_id"),
            superior_employee_alias.emp_id.label("emp_id"),
            branch_for_account_alias.name.label("open_branch"),
            branch_for_employee_alias.name.label("emp_branch")
        )
        .select_from(Account)
        .join(
            employee_subquery,
            superior_employee_alias.emp_id == Account.open_emp_id
        )
        .join(
            branch_for_account_subquery,
            branch_for_account_alias.branch_id == Account.open_branch_id
        )
        .join(
            branch_for_employee_subquery,
            branch_for_employee_alias.branch_id == superior_employee_alias.assigned_branch_id
        )
        .where(Account.product_cd == "CHK")
        .order_by(superior_employee_alias.emp_id)
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

print(df)
print(results)

"""SELECT account.account_id AS account_id, e.emp_id AS emp_id, b_a.name AS open_branch, b_e.name AS emp_branch 
FROM account JOIN (SELECT employee.emp_id AS emp_id, employee.assigned_branch_id AS assigned_branch_id 
FROM employee) AS e ON e.emp_id = account.open_emp_id JOIN (SELECT branch.branch_id AS branch_id, branch.name AS name 
FROM branch) AS b_a ON b_a.branch_id = account.open_branch_id JOIN (SELECT branch.branch_id AS branch_id, branch.name AS name 
FROM branch) AS b_e ON b_e.branch_id = e.assigned_branch_id 
WHERE account.product_cd = :product_cd_1 ORDER BY e.emp_id"""
   account_id  emp_id    open_branch     emp_branch
0          10       1   Headquarters   Headquarters
1          14       1   Headquarters   Headquarters
2          21       1   Headquarters   Headquarters
3           1      10  Woburn Branch  Woburn Branch
4           4      10  Woburn Branch  Woburn Branch
5           7      13  Quincy Branch  Quincy Branch
6          13      16  So. NH Branch  So. NH Branc

# Self Joins

Find all employees and their associated superiors first and last names

In order to do this, we need to join the employee table to itself

In [45]:

with Session(engine) as session:
    df = pd.read_sql_query(
        """
        SELECT
            e.fname emp_fname
            , e.lname emp_lname
            , e_s.fname sup_fname
            , e_s.lname sup_lname
        FROM
            employee
        e JOIN (
            SELECT
                emp_id
                , fname
                , lname
            FROM
                employee
        )
        e_s ON e.superior_emp_id = e_s.emp_id
        ORDER BY e.lname, e.fname;
        """,
        con=session.connection()
    )

    superior_employee_subquery = (
        select(
            Employee.emp_id,
            Employee.fname,
            Employee.lname
        )
        .select_from(Employee)
    ).subquery("e_s")
    superior_employee_alias: Employee = aliased(Employee, superior_employee_subquery)

    statement = (
        select(
            Employee.fname,
            Employee.lname,
            superior_employee_alias.fname,
            superior_employee_alias.lname
        )
        .select_from(Employee)
        .join(
            superior_employee_subquery,
            superior_employee_alias.emp_id == Employee.superior_emp_id
        )
        .order_by(Employee.lname, Employee.fname)
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

print(df)
print(results)

"""SELECT employee.fname, employee.lname, e_s.fname AS fname_1, e_s.lname AS lname_1 
FROM employee JOIN (SELECT employee.emp_id AS emp_id, employee.fname AS fname, employee.lname AS lname 
FROM employee) AS e_s ON e_s.emp_id = employee.superior_emp_id ORDER BY employee.lname, employee.fname"""
   emp_fname  emp_lname sup_fname  sup_lname
0      Susan     Barker   Michael      Smith
1       John      Blake     Susan  Hawthorne
2      Helen    Fleming     Susan  Hawthorne
3       Beth     Fowler   Theresa    Markham
4       John    Gooding     Susan  Hawthorne
5       Jane   Grossman     Helen    Fleming
6      Susan  Hawthorne    Robert      Tyler
7   Samantha    Jameson     Paula    Roberts
8    Theresa    Markham     Susan  Hawthorne
9      Cindy      Mason      John      Blake
10     Sarah     Parker     Helen    Fleming
11     Frank    Portman      John      Blake
12     Paula    Roberts     Susan  Hawthorne
13     Chris     Tucker     Helen    Fleming
14      Rick     Tulman   The

# Non-Equi-Joins

Find all possible pairs of 'Teller' title employees where the pair (e1, e2) is the same as (e2, e1) for all employees e.

To perform this query, we are going to join on non-foreign keys. Also shown is that a JOIN condition can have conjunctions.

In [56]:
from sqlalchemy import and_


with Session(engine) as session:
    df = pd.read_sql_query(
        """
        SELECT
            e1.fname e1_fname
            , e1.lname e1_lname
            , e2.fname e2_fname
            , e2.lname e2_lname
        FROM
            employee e1
        JOIN (
            SELECT
                emp_id
                , title
                , fname
                , lname
            FROM
                employee
        )
        e2 ON (
            e1.emp_id < e2.emp_id
            AND e1.title = 'Teller'
            AND e2.title = 'Teller'
        )
        ;
        """,
        con=session.connection()
    )

    teller_employee_subquery = (
        select(
            Employee.emp_id,
            Employee.fname,
            Employee.lname,
            Employee.title
        )
        .select_from(Employee)
    ).subquery("e2")
    teller_employee_alias: Employee = aliased(
        Employee,
        teller_employee_subquery
    )

    statement = (
        select(
            Employee.fname,
            Employee.lname,
            teller_employee_alias.fname,
            teller_employee_alias.lname
        )
        .select_from(Employee)
        .join(
            teller_employee_subquery,
            and_(
                Employee.emp_id < teller_employee_alias.emp_id,
                Employee.title == "Teller",
                teller_employee_alias.title == "Teller"
            )
        )
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

print(df)
print(results)

"""SELECT employee.fname, employee.lname, e2.fname AS fname_1, e2.lname AS lname_1 
FROM employee JOIN (SELECT employee.emp_id AS emp_id, employee.fname AS fname, employee.lname AS lname, employee.title AS title 
FROM employee) AS e2 ON employee.emp_id < e2.emp_id AND employee.title = :title_1 AND e2.title = :title_2"""
    e1_fname  e1_lname  e2_fname  e2_lname
0      Chris    Tucker     Sarah    Parker
1      Chris    Tucker      Jane  Grossman
2      Chris    Tucker    Thomas   Ziegler
3      Chris    Tucker  Samantha   Jameson
4      Chris    Tucker     Cindy     Mason
5      Chris    Tucker     Frank   Portman
6      Chris    Tucker      Beth    Fowler
7      Chris    Tucker      Rick    Tulman
8      Sarah    Parker      Jane  Grossman
9      Sarah    Parker    Thomas   Ziegler
10     Sarah    Parker  Samantha   Jameson
11     Sarah    Parker     Cindy     Mason
12     Sarah    Parker     Frank   Portman
13     Sarah    Parker      Beth    Fowler
14     Sarah    Parker      Rick 

# Exercises

## 5-2
Write a query that returns the account ID for each non-business customer (customer.cust_type_cd = 'I') along with the customer's federal ID (customer.fed_id) and the name of the product on which the account is based (product.name)

## 5-3
Construct a query that finals all employees whose supervisor is assigned to a different department. Retrieve the employees' ID first name, and last name.

In [64]:
## Q: 5-2
from model import Customer, CustomerTypeEnum, Product


with Session(engine) as session:
    df = pd.read_sql_query(
        """
            SELECT
                a.account_id
                , c.fed_id
                , p.name
            FROM account
            -- JOIN accounts with customers
            a JOIN (
                SELECT
                    cust_id
                    , fed_id
                    , cust_type_cd
                FROM customer
            ) c
            ON (a.cust_id = c.cust_id)
            -- JOIN products with account
            JOIN (
                SELECT
                    name
                    , product_cd
                FROM
                    product
            ) p
            ON p.product_cd = a.product_cd
            WHERE (
                c.cust_type_cd = 'I'
            )
            ;
        """,
        con=session.connection()
    )

    customer_subquery = (
        select(
            Customer.cust_id,
            Customer.fed_id,
            Customer.cust_type_cd
        )
        .select_from(Customer)
    ).subquery("c")
    customer_alias: Customer = aliased(
        Customer,
        customer_subquery
    )

    product_subquery = (
        select(
            Product.name,
            Product.product_cd
        )
        .select_from(Product)
    ).subquery("p")
    product_alias: Product = aliased(Product, product_subquery)

    statement = (
        select(
            Account.account_id,
            customer_alias.fed_id,
            product_alias.name
        )
        .select_from(Account)
        .join(
            customer_subquery,
            Account.cust_id == customer_alias.cust_id
        )
        .join(
            product_subquery,
            Account.product_cd == product_alias.product_cd
        )
        .where(
            customer_alias.cust_type_cd == CustomerTypeEnum.I
        )
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

print(df)
print(results, len(results))

"""SELECT account.account_id, c.fed_id, p.name 
FROM account JOIN (SELECT customer.cust_id AS cust_id, customer.fed_id AS fed_id, customer.cust_type_cd AS cust_type_cd 
FROM customer) AS c ON account.cust_id = c.cust_id JOIN (SELECT product.name AS name, product.product_cd AS product_cd 
FROM product) AS p ON account.product_cd = p.product_cd 
WHERE c.cust_type_cd = :cust_type_cd_1"""
    account_id       fed_id                    name
0            1  111-11-1111        checking account
1            2  111-11-1111         savings account
2            3  111-11-1111  certificate of deposit
3            4  222-22-2222        checking account
4            5  222-22-2222         savings account
5            7  333-33-3333        checking account
6            8  333-33-3333    money market account
7           10  444-44-4444        checking account
8           11  444-44-4444         savings account
9           12  444-44-4444    money market account
10          13  555-55-5555        check

We can also use the established ORM relationships to programmatically solve the same problem

In [8]:
from model import Customer, Product, CustomerTypeEnum


with Session(engine) as session:
    account: Account
    customer: Customer
    product: Product
    counter: int = 0
    for account in session.scalars(select(Account)):
        customer = account.account_customer
        if customer.cust_type_cd != CustomerTypeEnum.I:
            continue
        product = account.account_product
        print(counter, account.account_id, customer.fed_id, product.name)
        counter += 1

0 1 111-11-1111 checking account
1 2 111-11-1111 savings account
2 3 111-11-1111 certificate of deposit
3 4 222-22-2222 checking account
4 5 222-22-2222 savings account
5 7 333-33-3333 checking account
6 8 333-33-3333 money market account
7 10 444-44-4444 checking account
8 11 444-44-4444 savings account
9 12 444-44-4444 money market account
10 13 555-55-5555 checking account
11 14 666-66-6666 checking account
12 15 666-66-6666 certificate of deposit
13 17 777-77-7777 certificate of deposit
14 18 888-88-8888 checking account
15 19 888-88-8888 savings account
16 21 999-99-9999 checking account
17 22 999-99-9999 money market account
18 23 999-99-9999 certificate of deposit


In [66]:
## Q: 5-3


with Session(engine) as session:
    df = pd.read_sql_query(
        """
        SELECT
            e.fname
            , e.lname
        FROM
            employee e
        JOIN (
            SELECT
                emp_id
                , dept_id
            FROM
                employee
        )
        e_s ON (
            e.superior_emp_id = e_s.emp_id
        )
        WHERE (
            e.dept_id != e_s.dept_id
        )
        ;
        """,
        con=session.connection()
    )

    superior_employee_subquery = (
        select(
            Employee.emp_id,
            Employee.dept_id
        )
        .select_from(Employee)
    ).subquery("e_s")
    superior_employee_alias: Employee = aliased(
        Employee,
        superior_employee_subquery
    )

    statement = (
        select(
            Employee.fname,
            Employee.lname
        )
        .select_from(Employee)
        .join(
            superior_employee_subquery,
            Employee.superior_emp_id == superior_employee_alias.emp_id
        )
        .where(
            Employee.dept_id != superior_employee_alias.dept_id
        )
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

print(df)
print(results)


"""SELECT employee.fname, employee.lname 
FROM employee JOIN (SELECT employee.emp_id AS emp_id, employee.dept_id AS dept_id 
FROM employee) AS e_s ON employee.superior_emp_id = e_s.emp_id 
WHERE employee.dept_id != e_s.dept_id"""
   fname      lname
0  Susan  Hawthorne
1   John    Gooding
[('Susan', 'Hawthorne'), ('John', 'Gooding')]


We can also use the established ORM relationships to programmatically solve the same problem

In [9]:

with Session(engine) as session:
    employee: Employee
    superior: Employee
    counter: int = 0
    for employee in session.scalars(select(Employee)):
        if employee.superior_emp is None:
            continue
        superior = employee.superior_emp
        if superior.dept_id == employee.dept_id:
            continue
        print(counter, employee.fname, employee.lname)
        counter += 1

0 Susan Hawthorne
1 John Gooding
