# Practice for Join Statements with SQL

Import the necessary packages and connect to the database 'data.sqlite'.

In [8]:
# Run without change

# Libraries and database

import sqlite3
import pandas as pd

conn = sqlite3.connect('data (3).sqlite')

1. Select the names (frist and last) of all employees in Boston

In [9]:
cur = conn.cursor()
cur.execute("""SELECT name FROM sqlite_master WHERE type = 'table';""")
cur.fetchall()

[('productlines',),
 ('offices',),
 ('customers',),
 ('employees',),
 ('orderdetails',),
 ('orders',),
 ('payments',),
 ('products',)]

In [18]:
# Your code here
# city is 

q = """
SELECT lastName, firstName, city
      FROM employees
      JOIN offices
      USING(officeCode)
      WHERE city = 'Boston'
"""
pd.read_sql(q, conn)

Unnamed: 0,lastName,firstName,city
0,Firrelli,Julie,Boston
1,Patterson,Steve,Boston


2. Are there any offices that have zero employees?

In [14]:
# Your code here
q = """
SELECT city, COUNT(*)
      FROM employees
      JOIN offices
      USING(officeCode)
      GROUP BY city
"""
pd.read_sql(q, conn)

Unnamed: 0,city,COUNT(*)
0,Boston,2
1,London,2
2,NYC,2
3,Paris,5
4,San Francisco,6
5,Sydney,4
6,Tokyo,2


In [25]:
# Your code here
# NOT BOSTON, it is worlwide

q = """
SELECT
    o.officeCode,
    o.city,
    COUNT(e.employeeNumber) AS n_employees
FROM offices AS o
LEFT JOIN employees AS e
    USING(officeCode)
GROUP BY officeCode
HAVING n_employees = 0
;
"""
pd.read_sql(q, conn)

Unnamed: 0,officeCode,city,n_employees
0,27,Boston,0


In [27]:
# Your code here
# Aliases in tables are optionals but it is recommended for reading better

q = """
SELECT
    officeCode,
    city,
    COUNT(employeeNumber) AS n_employees
FROM offices
LEFT JOIN employees
    USING(officeCode)
GROUP BY officeCode
HAVING n_employees = 0
;
"""
pd.read_sql(q, conn)

Unnamed: 0,officeCode,city,n_employees
0,27,Boston,0


3. How many customers are there per office?

In [35]:
# Your code here
# ON e.officeCode = o.officeCode is equivalent as USING(officeCode)
# MINE !!

q = """
SELECT
    o.officeCode,
    o.city,
    COUNT(c.customerNumber) AS n_customers
FROM employees AS e
LEFT JOIN customers AS c
    ON e.employeeNumber = c.salesRepEmployeeNumber
LEFT JOIN offices AS o
    ON e.officeCode = o.officeCode
GROUP BY o.officeCode
;
"""
pd.read_sql(q, conn)


Unnamed: 0,officeCode,city,n_customers
0,1,San Francisco,12
1,2,Boston,12
2,3,NYC,15
3,4,Paris,29
4,5,Tokyo,5
5,6,Sydney,10
6,7,London,17


In [36]:
# From The solution

q = """
SELECT
    o.officeCode,
    o.city,
    COUNT(c.customerNumber) AS n_customers
FROM offices AS o
JOIN employees AS e
    USING(officeCode)
JOIN customers AS c
    ON e.employeeNumber = c.salesRepEmployeeNumber
GROUP BY officeCode
;
"""
pd.read_sql(q, conn)

Unnamed: 0,officeCode,city,n_customers
0,1,San Francisco,12
1,2,Boston,12
2,3,NYC,15
3,4,Paris,29
4,5,Tokyo,5
5,6,Sydney,10
6,7,London,17


| Criterion                 | Query 1 (INNER JOIN)           | Query 2 (LEFT JOIN)                   |
| ------------------------- | ------------------------------ | ------------------------------------- |
| Simpler logic             | ✅ Yes                          | ❌ Slightly more complex               |
| Shows only active offices | ✅ Yes                          | ❌ No (includes zero-customer offices) |
| Shows inactive employees  | ❌ No                           | ✅ Yes (if needed)                     |
| Accurate customer count   | ✅ Yes                          | ✅ Yes (nulls ignored by `COUNT`)      |
| Safer against data gaps   | ❌ Might drop offices/employees | ✅ Keeps all, even with missing links  |


4. Display the names of every individual product that each employee has sold as a dataframe.

In [49]:
# Your code here
# I included distinct because I want every individual product

q = """
SELECT DISTINCT
    e.employeeNumber,
    CONCAT(e.firstName, ' ', e.lastName) AS employee_name,
    p.productName
FROM employees AS e
LEFT JOIN customers AS c
    ON e.employeeNumber = c.salesRepEmployeeNumber
LEFT JOIN orders AS o
    ON c.customerNumber = o.customerNumber
LEFT JOIN orderdetails AS d
    ON o.orderNumber = d.orderNumber
LEFT JOIN products AS p
    USING(productCode)
GROUP BY e.employeeNumber, p.productname
;
"""
pd.read_sql(q, conn)

Unnamed: 0,employeeNumber,employee_name,productName
0,1002,Diane Murphy,
1,1056,Mary Patterson,
2,1076,Jeff Firrelli,
3,1088,William Patterson,
4,1102,Gerard Bondur,
...,...,...,...
1373,1702,Martin Gerard,The Mayflower
1374,1702,Martin Gerard,The Queen Mary
1375,1702,Martin Gerard,The Schooner Bluenose
1376,1702,Martin Gerard,The Titanic


In [51]:
# FROM SOLUTION
# They exclode NONE because it's an inner Join
# We don't need to use aliases for the columns since they
# are conveniently already labeled as different kinds of
# names (firstName, lastName, productName)
q = """
SELECT firstName, lastName, productName
FROM employees AS e
JOIN customers AS c
    ON e.employeeNumber = c.salesRepEmployeeNumber
JOIN orders
    USING(customerNumber)
JOIN orderdetails
    USING(orderNumber)
JOIN products
    USING(productCode)
;
"""
df = pd.read_sql(q, conn)
df

Unnamed: 0,firstName,lastName,productName
0,Leslie,Jennings,1958 Setra Bus
1,Leslie,Jennings,1940 Ford Pickup Truck
2,Leslie,Jennings,1939 Cadillac Limousine
3,Leslie,Jennings,1996 Peterbilt 379 Stake Bed with Outrigger
4,Leslie,Jennings,1968 Ford Mustang
...,...,...,...
2991,Martin,Gerard,1954 Greyhound Scenicruiser
2992,Martin,Gerard,1950's Chicago Surface Lines Streetcar
2993,Martin,Gerard,Diamond T620 Semi-Skirted Tanker
2994,Martin,Gerard,1911 Ford Town Car


| Join Type | Keeps Left-only Rows? | Keeps Right-only Rows? | Keeps Only Matching Rows? |
| --------- | --------------------- | ---------------------- | ------------------------- |
| **INNER** | No                    | No                     | Yes                       |
| **LEFT**  | Yes                   | No                     | Yes                       |
| **RIGHT** | No                    | Yes                    | Yes                       |
| **FULL**  | Yes                   | Yes                    | Yes                       |


5. Display the number of products each employee has sold
- Alphabetize the results by employee last name.
- Use the quantityOrdered column from orderDetails.
- Think about how to group the data when some employees might have the same first or last name.


In [60]:
# Your code here
# In reality, thay are asking for the quantity sold by employee (All products together)
# INNER join to remove nulls

q = """
SELECT DISTINCT
    e.employeeNumber,
    CONCAT(e.lastName, ', ', e.firstName) AS employee_name,
    p.productName,
    SUM(quantityOrdered) As sum_orders
FROM employees AS e
JOIN customers AS c
    ON e.employeeNumber = c.salesRepEmployeeNumber
JOIN orders AS o
    ON c.customerNumber = o.customerNumber
JOIN orderdetails AS d
    ON o.orderNumber = d.orderNumber
JOIN products AS p
    USING(productCode)
GROUP BY e.employeeNumber, p.productname
ORDER BY lastName
;
"""
pd.read_sql(q, conn)

Unnamed: 0,employeeNumber,employee_name,productName,sum_orders
0,1337,"Bondur, Loui",18th century schooner,35
1,1337,"Bondur, Loui",1900s Vintage Bi-Plane,28
2,1337,"Bondur, Loui",1900s Vintage Tri-Plane,45
3,1337,"Bondur, Loui",1903 Ford Model A,72
4,1337,"Bondur, Loui",1904 Buick Runabout,45
...,...,...,...,...
1363,1323,"Vanauf, George",The Mayflower,40
1364,1323,"Vanauf, George",The Queen Mary,73
1365,1323,"Vanauf, George",The Schooner Bluenose,117
1366,1323,"Vanauf, George",The Titanic,52


In [66]:
# Your code here
# In reality, thay are asking for the quantity sold by employee (All products together, not each one)
# INNER join to remove nulls

q = """
SELECT
    e.employeeNumber,
    CONCAT(e.lastName, ', ', e.firstName) AS employee_name,
    SUM(d.quantityOrdered) As sum_orders
FROM employees AS e
JOIN customers AS c
    ON e.employeeNumber = c.salesRepEmployeeNumber
JOIN orders AS o
    ON c.customerNumber = o.customerNumber
JOIN orderdetails AS d
    ON o.orderNumber = d.orderNumber
GROUP BY employee_name
;
"""
pd.read_sql(q, conn)

Unnamed: 0,employeeNumber,employee_name,sum_orders
0,1337,"Bondur, Loui",6186
1,1501,"Bott, Larry",8205
2,1401,"Castillo, Pamela",9290
3,1188,"Firrelli, Julie",4227
4,1611,"Fixter, Andy",6246
5,1702,"Gerard, Martin",4180
6,1370,"Hernandez, Gerard",14231
7,1165,"Jennings, Leslie",11854
8,1504,"Jones, Barry",7486
9,1612,"Marsh, Peter",6632


6.  Display the names of employees who have sold more than 200 different products.

In [71]:
# Your codde here
# Inner join because I am not interested in Null
# Different Products, so quantity is not relevant

q = """
SELECT DISTINCT
    e.employeeNumber,
    CONCAT(e.lastName, ', ', e.firstName) AS employee_name,
    COUNT(p.productName) as num_products
FROM employees AS e
JOIN customers AS c
    ON e.employeeNumber = c.salesRepEmployeeNumber
JOIN orders AS o
    ON c.customerNumber = o.customerNumber
JOIN orderdetails AS d
    ON o.orderNumber = d.orderNumber
JOIN products AS p
    USING(productCode)
GROUP BY employee_name
HAVING num_products > 200
ORDER BY employee_name
;
""" 
pd.read_sql(q, conn)

Unnamed: 0,employeeNumber,employee_name,num_products
0,1501,"Bott, Larry",236
1,1401,"Castillo, Pamela",272
2,1370,"Hernandez, Gerard",396
3,1165,"Jennings, Leslie",331
4,1504,"Jones, Barry",220
5,1323,"Vanauf, George",211


In [72]:
conn.close()