# Chapter 10: Joins Revisited

The first types of join learned about are the cartesian and inner join. Now there are two more: LEFT/RIGHT OUTER JOIN and NATURAL JOIN.

In [1]:
import os

from dotenv import load_dotenv
from sqlalchemy import create_engine, URL
from sqlalchemy.orm import Session
import pandas as pd


load_dotenv()

url_object = URL.create(
    os.environ["DB_ENGINE"],
    username=os.environ["DB_USER"],
    password=os.environ["DB_PASSWD"],
    host=os.environ["DB_HOST"],
    database=os.environ["DB_NAME"],
)

engine = create_engine(url_object)

# Left Versus Right Outer Join

List all accounts and the first and last name of the customer, regardless of being a business or individual.

Using the account LEFT OUTER JOIN individual, we will get all the accounts and the business customers will be listed as NULL. If LEFT is replaced with RIGHT, we exclude the business customers altogether.

In [4]:
query = """
SELECT
    a.account_id
    , a.cust_id
    , i.fname
    , i.lname
FROM
    account a
    LEFT OUTER JOIN individual i
    ON a.cust_id = i.cust_id
;
"""

with Session(engine) as session:

    df_left = pd.read_sql_query(
        query,
        con=session.connection()
    )
    # Using a RIGHT OUTER JOIN changes the result
    df_right = pd.read_sql_query(
        query.replace("LEFT", "RIGHT"),
        con=session.connection()
    )

print(df_left)
print(df_right)

    account_id  cust_id     fname    lname
0            1        1     James   Hadley
1            2        1     James   Hadley
2            3        1     James   Hadley
3            4        2     Susan  Tingley
4            5        2     Susan  Tingley
5            7        3     Frank   Tucker
6            8        3     Frank   Tucker
7           10        4      John  Hayward
8           11        4      John  Hayward
9           12        4      John  Hayward
10          13        5   Charles  Frasier
11          14        6      John  Spencer
12          15        6      John  Spencer
13          17        7  Margaret    Young
14          18        8     Louis    Blake
15          19        8     Louis    Blake
16          21        9   Richard   Farley
17          22        9   Richard   Farley
18          23        9   Richard   Farley
19          24       10      None     None
20          25       10      None     None
21          27       11      None     None
22         

# Three-way Outer Join

Update the previous query to list the individual and business names.

I see that MySQL permits >2 OUTER JOINS, but other flavors do not. So let's try to use an agnostic query.

In [6]:
with Session(engine) as session:

    df = pd.read_sql_query(
        """
        SELECT
            act_ind.account_id
            , act_ind.cust_id
            , act_ind.person_name
            , bus.name business_name
        FROM (
            SELECT
                a.account_id
                , a.cust_id
                , CONCAT(i.fname, ' ', i.lname) person_name
            FROM
                account a LEFT OUTER JOIN individual i
                  ON a.cust_id = i.cust_id
        ) act_ind LEFT OUTER JOIN business bus
            ON act_ind.cust_id = bus.cust_id
        ;
        """,
        con=session.connection()
    )

print(df)

    account_id  cust_id      person_name           business_name
0            1        1     James Hadley                    None
1            2        1     James Hadley                    None
2            3        1     James Hadley                    None
3            4        2    Susan Tingley                    None
4            5        2    Susan Tingley                    None
5            7        3     Frank Tucker                    None
6            8        3     Frank Tucker                    None
7           10        4     John Hayward                    None
8           11        4     John Hayward                    None
9           12        4     John Hayward                    None
10          13        5  Charles Frasier                    None
11          14        6     John Spencer                    None
12          15        6     John Spencer                    None
13          17        7   Margaret Young                    None
14          18        8  

# Self Outer Joins

Find all superiors and their subordinates.



In [14]:
with Session(engine) as session:

    df = pd.read_sql_query(
        """
        SELECT
            e.fname e_fname
            , e.lname e_lname
            , SUBSTRING(e.title, 1, 11) e_title
            , e_sup.fname s_fname
            , e_sup.lname s_lname
            , SUBSTRING(e_sup.title, 1, 11) s_title
        FROM employee e RIGHT OUTER JOIN employee e_sup
          ON e.superior_emp_id = e_sup.emp_id
        ORDER BY e_sup.lname, e_sup.fname
        ;
        """,
        con=session.connection()
    )

print(df)

     e_fname    e_lname      e_title   s_fname    s_lname      s_title
0       None       None         None     Susan     Barker  Vice Presid
1      Cindy      Mason       Teller      John      Blake  Head Teller
2      Frank    Portman       Teller      John      Blake  Head Teller
3      Chris     Tucker       Teller     Helen    Fleming  Head Teller
4      Sarah     Parker       Teller     Helen    Fleming  Head Teller
5       Jane   Grossman       Teller     Helen    Fleming  Head Teller
6       None       None         None      Beth     Fowler       Teller
7       None       None         None      John    Gooding  Loan Manage
8       None       None         None      Jane   Grossman       Teller
9       John    Gooding  Loan Manage     Susan  Hawthorne  Operations 
10     Helen    Fleming  Head Teller     Susan  Hawthorne  Operations 
11     Paula    Roberts  Head Teller     Susan  Hawthorne  Operations 
12      John      Blake  Head Teller     Susan  Hawthorne  Operations 
13   T

Changing the LEFT to RIGHT shows only if the employee has at least one subordinate.

| e\_fname | e\_lname | e\_title | s\_fname | s\_lname | s\_title |
| :--- | :--- | :--- | :--- | :--- | :--- |
| Michael | Smith | President | null | null | null |
| Cindy | Mason | Teller | John | Blake | Head Teller |
| Frank | Portman | Teller | John | Blake | Head Teller |
| Chris | Tucker | Teller | Helen | Fleming | Head Teller |
| Sarah | Parker | Teller | Helen | Fleming | Head Teller |
| Jane | Grossman | Teller | Helen | Fleming | Head Teller |
| John | Gooding | Loan Manage | Susan | Hawthorne | Operations  |
| Helen | Fleming | Head Teller | Susan | Hawthorne | Operations  |
| Paula | Roberts | Head Teller | Susan | Hawthorne | Operations  |
| John | Blake | Head Teller | Susan | Hawthorne | Operations  |
| Theresa | Markham | Head Teller | Susan | Hawthorne | Operations  |
| Beth | Fowler | Teller | Theresa | Markham | Head Teller |
| Rick | Tulman | Teller | Theresa | Markham | Head Teller |
| Thomas | Ziegler | Teller | Paula | Roberts | Head Teller |
| Samantha | Jameson | Teller | Paula | Roberts | Head Teller |
| Susan | Barker | Vice Presid | Michael | Smith | President |
| Robert | Tyler | Treasurer | Michael | Smith | President |
| Susan | Hawthorne | Operations  | Robert | Tyler | Treasurer |


# Cartesian Product Revisited

Show every day of 2004 and the number of accounts opened on each day.

This will be an interesting one and long to show.

In [25]:
with Session(engine) as session:

    df = pd.read_sql_query(
        """
        SELECT
            dts.dt
            , COUNT(a.open_date) cnt
        FROM account a RIGHT OUTER JOIN (
            SELECT
                DATE_ADD('2004-01-01', INTERVAL (ones.num + tens.num + hundreds.num) DAY) dt
            FROM (
                SELECT 0 num UNION ALL
                SELECT 1 num UNION ALL
                SELECT 2 num UNION ALL
                SELECT 3 num UNION ALL
                SELECT 4 num UNION ALL
                SELECT 5 num UNION ALL
                SELECT 6 num UNION ALL
                SELECT 7 num UNION ALL
                SELECT 8 num UNION ALL
                SELECT 9 num
            ) ones
            CROSS JOIN (
                SELECT  0 num UNION ALL
                SELECT 10 num UNION ALL
                SELECT 20 num UNION ALL
                SELECT 20 num UNION ALL
                SELECT 30 num UNION ALL
                SELECT 40 num UNION ALL
                SELECT 50 num UNION ALL
                SELECT 60 num UNION ALL
                SELECT 70 num UNION ALL
                SELECT 80 num UNION ALL
                SELECT 90 num
            ) tens
            CROSS JOIN (
                SELECT   0 num UNION ALL
                SELECT 100 num UNION ALL
                SELECT 200 num UNION ALL
                SELECT 300 num
            ) hundreds
            WHERE (
                DATE_ADD('2004-01-01', INTERVAL (ones.num + tens.num + hundreds.num) DAY)
                    < '2005-01-01'
            )
        ) dts ON dts.dt = a.open_date
        GROUP BY dts.dt
        ORDER BY dts.dt
        ;
        """,
        con=session.connection()
    )

df.tail()

Unnamed: 0,dt,cnt
361,2004-12-27,0
362,2004-12-28,1
363,2004-12-29,0
364,2004-12-30,0
365,2004-12-31,0


This query is much nicer read, but uses advanced features not discussed yet

```mysql
SELECT
    dts.dt
    , COUNT(a.open_date) cnt
FROM account a RIGHT OUTER JOIN (
    WITH RECURSIVE date_range AS (
        SELECT
            DATE('2005-01-01') end_dt
            , DATE('2004-01-01') start_dt
    ),
    numbers AS (
        SELECT 0 delta
        UNION ALL
        SELECT delta + 1
        FROM numbers CROSS JOIN date_range
        WHERE delta < DATEDIFF(date_range.end_dt, date_range.start_dt)
    )
    SELECT
        DATE_ADD(date_range.start_dt, INTERVAL delta DAY) dt
    FROM numbers CROSS JOIN date_range
    WHERE (
        DATE_ADD(date_range.start_dt, INTERVAL delta DAY) < date_range.end_dt
    )
) dts ON dts.dt = a.open_date
GROUP BY dts.dt
ORDER BY dts.dt;
```