# Chapter 4: Filtering

The ability and possibilities of filtering is quite flexible in SQL. Let's take a look

In [1]:
import os

from dotenv import load_dotenv
from sqlalchemy import create_engine, URL, select, func
from sqlalchemy.orm import Session
import pandas as pd

from model import Customer, Employee, Department, Branch, Account
from utils import print_sql_statement


load_dotenv()

url_object = URL.create(
    os.environ["DB_ENGINE"],
    username=os.environ["DB_USER"],
    password=os.environ["DB_PASSWD"],
    host=os.environ["DB_HOST"],
    database=os.environ["DB_NAME"],
)

engine = create_engine(url_object)

# Range (Equality and Inequality)

The '=' and '!=' operator in a WHERE expression compares variables for equality and inequality, respectively. The operator 'BETWEEN' can express both succinctly


Find all the employees with start date between 2001 and 2003

In [5]:
from datetime import date


with Session(engine) as session:
    df = pd.read_sql_query(
        """
        SELECT
            e.emp_id
            , e.start_date
        FROM
            employee e
        WHERE
            e.start_date BETWEEN '2001-01-01' AND '2003-01-01'
        """,
        con=session.connection()
    )

    statement = (
        select(
            Employee.emp_id,
            Employee.start_date
        )
        .select_from(Employee)
        .where(
            Employee.start_date.between(date(2001, 1, 1), date(2003, 1, 1))
        )
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

print(df)
print(results)

"""SELECT employee.emp_id, employee.start_date 
FROM employee 
WHERE employee.start_date BETWEEN :start_date_1 AND :start_date_2"""
   emp_id  start_date
0       1  2001-06-22
1       2  2002-09-12
2       4  2002-04-24
3       8  2002-12-02
4       9  2002-05-03
5      10  2002-07-27
6      14  2002-08-09
7      16  2001-03-15
8      17  2002-06-29
9      18  2002-12-12
[(1, datetime.date(2001, 6, 22)), (2, datetime.date(2002, 9, 12)), (4, datetime.date(2002, 4, 24)), (8, datetime.date(2002, 12, 2)), (9, datetime.date(2002, 5, 3)), (10, datetime.date(2002, 7, 27)), (14, datetime.date(2002, 8, 9)), (16, datetime.date(2001, 3, 15)), (17, datetime.date(2002, 6, 29)), (18, datetime.date(2002, 12, 12))]


'BETWEEN' also works with numerics and strings too

Find all accounts where the available balance is between 3,000 and 5,000 and the federal ID of the customer is between '500-00-0000' and '999-99-9999'

In [7]:
from sqlalchemy import and_


with Session(engine) as session:
    df = pd.read_sql_query(
        """
        SELECT
            a.cust_id
            , a.avail_balance
            , c.fed_id
        FROM
            account a JOIN customer c ON a.cust_id = c.cust_id
        WHERE (
            (a.avail_balance BETWEEN 3000 AND 5000)
            AND
            (c.fed_id BETWEEN '500-00-0000' AND '999-99-9999')
        );
        """,
        con=session.connection()
    )

    statement = (
        select(
            Account.cust_id,
            Account.avail_balance,
            Customer.fed_id
        )
        .select_from(Account)
        .join(Customer, Account.cust_id == Customer.cust_id)
        .where(
            and_(
                Account.avail_balance.between(3000, 5000),
                Customer.fed_id.between("500-00-0000", "999-99-9999")
            )
        )
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

print(df)
print(results)

"""SELECT account.cust_id, account.avail_balance, customer.fed_id 
FROM account JOIN customer ON account.cust_id = customer.cust_id 
WHERE account.avail_balance BETWEEN :avail_balance_1 AND :avail_balance_2 AND customer.fed_id BETWEEN :fed_id_1 AND :fed_id_2"""
   cust_id  avail_balance       fed_id
0        7        5000.00  777-77-7777
1        8        3487.19  888-88-8888
[(7, 5000.0, '777-77-7777'), (8, 3487.19, '888-88-8888')]


# Membership

When filtering against multiple criteria for the same variable, the 'IN' operator is useful

Find all products that are not business 'BUS' and small business loan 'SBL'

In [34]:

with Session(engine) as session:

    df = pd.read_sql_query(
        """
        SELECT
            a.account_id
            , a.product_cd
            , a.cust_id
            , a.avail_balance
        FROM
            account a
        WHERE
            UPPER(a.product_cd) NOT IN ('BUS', 'SBL')
        ORDER BY a.account_id ASC, a.cust_id ASC
        """,
        con=session.connection()
    )

    statement = (
        select(
            Account.account_id,
            Account.product_cd,
            Account.cust_id,
            Account.avail_balance
        )
        .select_from(Account)
        .where(
            func.upper(Account.product_cd).not_in(
                ("BUS", "SBL")
            )
        )
        .order_by(Account.account_id.asc(), Account.cust_id.asc())
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

print(df)
print(results)

"""SELECT account.account_id, account.product_cd, account.cust_id, account.avail_balance 
FROM account 
WHERE (upper(account.product_cd) NOT IN (__[POSTCOMPILE_upper_1])) ORDER BY account.account_id ASC, account.cust_id ASC"""
    account_id product_cd  cust_id  avail_balance
0            1        CHK        1        1057.75
1            2        SAV        1         500.00
2            3         CD        1        3000.00
3            4        CHK        2        2258.02
4            5        SAV        2         200.00
5            7        CHK        3        1057.75
6            8         MM        3        2212.50
7           10        CHK        4         534.12
8           11        SAV        4         767.77
9           12         MM        4        5487.09
10          13        CHK        5        2237.97
11          14        CHK        6         122.37
12          15         CD        6       10000.00
13          17         CD        7        5000.00
14          18        C

# Filtering with Subqueries

Find all the same accounts knowing that all product type codes are 'ACCOUNT'

In [35]:
from copy import deepcopy

from model import Product


old_results = deepcopy(results)

with Session(engine) as session:

    df = pd.read_sql_query(
        """
        SELECT
            a.account_id
            , a.product_cd
            , a.cust_id
            , a.avail_balance
        FROM
            account a
        WHERE a.product_cd IN (
            SELECT
                p.product_cd
            FROM
                product p
            WHERE p.product_type_cd = 'ACCOUNT'
        )
        ORDER BY a.account_id ASC, a.cust_id ASC
        ;
        """,
        con=session.connection()
    )

    statement = (
        select(
            Account.account_id,
            Account.product_cd,
            Account.cust_id,
            Account.avail_balance
        )
        .select_from(Account)
        .where(
            Account.product_cd.in_(
                select(Product.product_cd)
                .select_from(Product)
                .where(Product.product_type_cd == "ACCOUNT")
            )
        )
        .order_by(Account.account_id.asc(), Account.cust_id.asc())
    )
    print_sql_statement(statement)
    results = session.execute(statement).all()

print(df)
print(results)
assert old_results == results

"""SELECT account.account_id, account.product_cd, account.cust_id, account.avail_balance 
FROM account 
WHERE account.product_cd IN (SELECT product.product_cd 
FROM product 
WHERE product.product_type_cd = :product_type_cd_1) ORDER BY account.account_id ASC, account.cust_id ASC"""
    account_id product_cd  cust_id  avail_balance
0            1        CHK        1        1057.75
1            2        SAV        1         500.00
2            3         CD        1        3000.00
3            4        CHK        2        2258.02
4            5        SAV        2         200.00
5            7        CHK        3        1057.75
6            8         MM        3        2212.50
7           10        CHK        4         534.12
8           11        SAV        4         767.77
9           12         MM        4        5487.09
10          13        CHK        5        2237.97
11          14        CHK        6         122.37
12          15         CD        6       10000.00
13          17    