# Filtering

In [1]:
import pandas as pd
import sqlite3 

conn = sqlite3.connect('data (2).sqlite')
pd.read_sql("""
SELECT *
  FROM employees;
""", conn).head()

Unnamed: 0,employeeNumber,lastName,firstName,extension,email,officeCode,reportsTo,jobTitle
0,1002,Murphy,Diane,x5800,dmurphy@classicmodelcars.com,1,,President
1,1056,Patterson,Mary,x4611,mpatterso@classicmodelcars.com,1,1002.0,VP Sales
2,1076,Firrelli,Jeff,x9273,jfirrelli@classicmodelcars.com,1,1002.0,VP Marketing
3,1088,Patterson,William,x4871,wpatterson@classicmodelcars.com,6,1056.0,Sales Manager (APAC)
4,1102,Bondur,Gerard,x5408,gbondur@classicmodelcars.com,4,1056.0,Sale Manager (EMEA)


In [2]:
pd.read_sql("""
SELECT *
  FROM employees
 WHERE lastName = "Patterson";
""", conn)

Unnamed: 0,employeeNumber,lastName,firstName,extension,email,officeCode,reportsTo,jobTitle
0,1056,Patterson,Mary,x4611,mpatterso@classicmodelcars.com,1,1002,VP Sales
1,1088,Patterson,William,x4871,wpatterson@classicmodelcars.com,6,1056,Sales Manager (APAC)
2,1216,Patterson,Steve,x4334,spatterson@classicmodelcars.com,2,1143,Sales Rep


In [3]:
# Ding the same in pandas

# Selecting all of the records in the database
result = pd.read_sql("SELECT * FROM employees;", conn)
# Create a list to store the records that match the query
employees_named_patterson = []
# Loop over all of the employees
for _, data in result.iterrows():
    # Check if the last name is "Patterson"
    if data["lastName"] == "Patterson":
        # Add to list
        employees_named_patterson.append(data)

# Display the result list as a DataFrame
pd.DataFrame(employees_named_patterson)

Unnamed: 0,employeeNumber,lastName,firstName,extension,email,officeCode,reportsTo,jobTitle
1,1056,Patterson,Mary,x4611,mpatterso@classicmodelcars.com,1,1002,VP Sales
3,1088,Patterson,William,x4871,wpatterson@classicmodelcars.com,6,1056,Sales Manager (APAC)
9,1216,Patterson,Steve,x4334,spatterson@classicmodelcars.com,2,1143,Sales Rep


In [4]:
#  combine WHERE clauses with SELECT statements other than SELECT * in order to filter rows and columns at the same time.

pd.read_sql("""
SELECT firstName, lastName, email
  FROM employees
 WHERE lastName = "Patterson";
""", conn)

Unnamed: 0,firstName,lastName,email
0,Mary,Patterson,mpatterso@classicmodelcars.com
1,William,Patterson,wpatterson@classicmodelcars.com
2,Steve,Patterson,spatterson@classicmodelcars.com


In [5]:
# Creating the column then filtering from there

pd.read_sql("""
SELECT *, length(firstName) AS name_length
  FROM employees
 WHERE name_length = 5;
""", conn)

Unnamed: 0,employeeNumber,lastName,firstName,extension,email,officeCode,reportsTo,jobTitle,name_length
0,1002,Murphy,Diane,x5800,dmurphy@classicmodelcars.com,1,,President,5
1,1188,Firrelli,Julie,x2173,jfirrelli@classicmodelcars.com,2,1143.0,Sales Rep,5
2,1216,Patterson,Steve,x4334,spatterson@classicmodelcars.com,2,1143.0,Sales Rep,5
3,1501,Bott,Larry,x2311,lbott@classicmodelcars.com,7,1102.0,Sales Rep,5
4,1504,Jones,Barry,x102,bjones@classicmodelcars.com,7,1102.0,Sales Rep,5
5,1612,Marsh,Peter,x102,pmarsh@classicmodelcars.com,6,1088.0,Sales Rep,5


In [6]:
# Anotehr ex

pd.read_sql("""
SELECT *, substr(firstName, 1, 1) AS first_initial
  FROM employees
 WHERE first_initial = "L";
""", conn)

Unnamed: 0,employeeNumber,lastName,firstName,extension,email,officeCode,reportsTo,jobTitle,first_initial
0,1165,Jennings,Leslie,x3291,ljennings@classicmodelcars.com,1,1143,Sales Rep,L
1,1166,Thompson,Leslie,x4065,lthompson@classicmodelcars.com,1,1143,Sales Rep,L
2,1337,Bondur,Loui,x6493,lbondur@classicmodelcars.com,4,1102,Sales Rep,L
3,1501,Bott,Larry,x2311,lbott@classicmodelcars.com,7,1102,Sales Rep,L


In [7]:
# Integer filter

pd.read_sql("""
SELECT *, CAST(round(priceEach) AS INTEGER) AS rounded_price_int
  FROM orderDetails
 WHERE rounded_price_int = 30;
""", conn)

Unnamed: 0,orderNumber,productCode,quantityOrdered,priceEach,orderLineNumber,rounded_price_int
0,10104,S24_2840,44,30.41,10,30
1,10173,S24_1937,31,29.87,9,30
2,10184,S24_2840,42,30.06,7,30
3,10280,S24_1937,20,29.87,12,30
4,10332,S24_1937,45,29.87,6,30
5,10367,S24_1937,23,29.54,13,30
6,10380,S24_1937,32,29.87,4,30


In [8]:
# Datetime

pd.read_sql("""
SELECT *, strftime("%m", orderDate) AS month
  FROM orders
 WHERE month = "01";
""", conn).head()

Unnamed: 0,orderNumber,orderDate,requiredDate,shippedDate,status,comments,customerNumber,month
0,10100,2003-01-06,2003-01-13,2003-01-10,Shipped,,363,1
1,10101,2003-01-09,2003-01-18,2003-01-11,Shipped,Check on availability.,128,1
2,10102,2003-01-10,2003-01-18,2003-01-14,Shipped,,181,1
3,10103,2003-01-29,2003-02-07,2003-02-02,Shipped,,121,1
4,10104,2003-01-31,2003-02-09,2003-02-01,Shipped,,141,1


In [9]:
# Datetime condition

pd.read_sql("""
SELECT *, julianday(shippedDate) - julianday(requiredDate) AS days_late
  FROM orders
 WHERE days_late > 0;
""", conn)

Unnamed: 0,orderNumber,orderDate,requiredDate,shippedDate,status,comments,customerNumber,days_late
0,10165,2003-10-22,2003-10-31,2003-12-26,Shipped,This order was on hold because customers's cre...,148,56.0


!= ("not equal to") - Similar to not combined with == in Python
> ("greater than") - Similar to > in Python
>= ("greater than or equal to") - Similar to >= in Python
< ("less than") - Similar to < in Python
<= ("less than or equal to") - Similar to <= in Python
AND - Similar to and in Python
OR - Similar to or in Python
BETWEEN - Similar to placing a value between two values with <= and and in Python, e.g. (2 <= x) and (x <= 5)
IN - Similar to in in Python
LIKE - Uses wildcards to find similar strings. No direct equivalent in Python, but similar to some Bash terminal commands.

In [10]:
conn.close()

# EXAMPLE 2

In [12]:
import pandas as pd
import sqlite3
conn = sqlite3.connect("data (3).sqlite")

In [13]:
pd.read_sql("""
SELECT *
  FROM products;
""", conn)

Unnamed: 0,productCode,productName,productLine,productScale,productVendor,productDescription,quantityInStock,buyPrice,MSRP
0,S10_1678,1969 Harley Davidson Ultimate Chopper,Motorcycles,1:10,Min Lin Diecast,"This replica features working kickstand, front...",7933,48.81,95.70
1,S10_1949,1952 Alpine Renault 1300,Classic Cars,1:10,Classic Metal Creations,Turnable front wheels; steering function; deta...,7305,98.58,214.30
2,S10_2016,1996 Moto Guzzi 1100i,Motorcycles,1:10,Highway 66 Mini Classics,"Official Moto Guzzi logos and insignias, saddl...",6625,68.99,118.94
3,S10_4698,2003 Harley-Davidson Eagle Drag Bike,Motorcycles,1:10,Red Start Diecast,"Model features, official Harley Davidson logos...",5582,91.02,193.66
4,S10_4757,1972 Alfa Romeo GTA,Classic Cars,1:10,Motor City Art Classics,Features include: Turnable front wheels; steer...,3252,85.68,136.00
...,...,...,...,...,...,...,...,...,...
105,S700_3505,The Titanic,Ships,1:700,Carousel DieCast Legends,"Completed model measures 19 1/2 inches long, 9...",1956,51.09,100.17
106,S700_3962,The Queen Mary,Ships,1:700,Welly Diecast Productions,Exact replica. Wood and Metal. Many extras inc...,5088,53.63,99.31
107,S700_4002,American Airlines: MD-11S,Planes,1:700,Second Gear Diecast,Polished finish. Exact replia with official lo...,8820,36.27,74.03
108,S72_1253,Boeing X-32A JSF,Planes,1:72,Motor City Art Classics,"10"" Wingspan with retractable landing gears.Co...",4857,32.77,49.66


In [16]:
pd.read_sql("""
SELECT *
  FROM products
 ORDER BY productName ASC;
""", conn)

# ASC by default, DESC Descending order

Unnamed: 0,productCode,productName,productLine,productScale,productVendor,productDescription,quantityInStock,buyPrice,MSRP
0,S18_3136,18th Century Vintage Horse Carriage,Vintage Cars,1:18,Red Start Diecast,Hand crafted diecast-like metal horse carriage...,5992,60.74,104.72
1,S24_2011,18th century schooner,Ships,1:24,Carousel DieCast Legends,All wood with canvas sails. Many extras includ...,1898,82.34,122.89
2,S24_2841,1900s Vintage Bi-Plane,Planes,1:24,Autoart Studio Design,Hand crafted diecast-like metal bi-plane is re...,5942,34.25,68.51
3,S24_4278,1900s Vintage Tri-Plane,Planes,1:24,Unimax Art Galleries,Hand crafted diecast-like metal Triplane is Re...,2756,36.23,72.45
4,S18_3140,1903 Ford Model A,Vintage Cars,1:18,Unimax Art Galleries,"Features opening trunk, working steering system",3913,68.30,136.59
...,...,...,...,...,...,...,...,...,...
105,S700_1938,The Mayflower,Ships,1:700,Studio M Art Models,Measures 31 1/2 inches Long x 25 1/2 inches Hi...,737,43.30,86.61
106,S700_3962,The Queen Mary,Ships,1:700,Welly Diecast Productions,Exact replica. Wood and Metal. Many extras inc...,5088,53.63,99.31
107,S700_1138,The Schooner Bluenose,Ships,1:700,Autoart Studio Design,All wood with canvas sails. Measures 31 1/2 in...,1897,34.00,66.67
108,S700_3505,The Titanic,Ships,1:700,Carousel DieCast Legends,"Completed model measures 19 1/2 inches long, 9...",1956,51.09,100.17


In [17]:
# Custom Sorting
# Create a column and sort by that

pd.read_sql("""
SELECT productName, length(productDescription) AS description_length
  FROM products
 ORDER BY description_length;
""", conn)

Unnamed: 0,productName,description_length
0,1928 British Royal Navy Airplane,28
1,P-51-D Mustang,45
2,1903 Ford Model A,48
3,1904 Buick Runabout,48
4,1930 Buick Marquette Phaeton,48
...,...,...
105,1936 Mercedes-Benz 500K Special Roadster,361
106,2002 Suzuki XREO,380
107,The Schooner Bluenose,390
108,1996 Moto Guzzi 1100i,391


In [18]:
# You can also sort by something without selecting it. 

pd.read_sql("""
SELECT productName
  FROM products
 ORDER BY length(productDescription);
""", conn)

Unnamed: 0,productName
0,1928 British Royal Navy Airplane
1,P-51-D Mustang
2,1903 Ford Model A
3,1904 Buick Runabout
4,1930 Buick Marquette Phaeton
...,...
105,1936 Mercedes-Benz 500K Special Roadster
106,2002 Suzuki XREO
107,The Schooner Bluenose
108,1996 Moto Guzzi 1100i


In [19]:
# Sorting By Multiple Columns, most useful if some rows have repeated values in a given and you want a "tiebreaker" value from another column

pd.read_sql("""
SELECT productVendor, productName, MSRP
  FROM products
 ORDER BY productVendor, productName;
""", conn)

Unnamed: 0,productVendor,productName,MSRP
0,Autoart Studio Design,1900s Vintage Bi-Plane,68.51
1,Autoart Studio Design,1932 Model A Ford J-Coupe,127.13
2,Autoart Studio Design,1937 Horch 930V Limousine,65.75
3,Autoart Studio Design,1962 Volkswagen Microbus,127.79
4,Autoart Studio Design,1968 Ford Mustang,194.57
...,...,...,...
105,Welly Diecast Productions,1968 Dodge Charger,117.44
106,Welly Diecast Productions,1969 Corvair Monza,151.08
107,Welly Diecast Productions,1969 Dodge Charger,115.16
108,Welly Diecast Productions,1971 Alpine Renault 1600s,61.23


In [26]:
# Unique
# With this result, we should give priority to vendors

pd.read_sql("""
SELECT COUNT(DISTINCT productVendor) AS num_product_vendors,
       COUNT(DISTINCT productName) AS num_product_names
  FROM products;
""", conn)

Unnamed: 0,num_product_vendors,num_product_names
0,13,110


In [30]:
# UNIQUE / DISTINCT
# Haw many unique products are for each vendor.

pd.read_sql("""
SELECT productVendor, COUNT(DISTINCT productVendor) AS num_product_vendors,
       COUNT(DISTINCT productName) AS num_product_names
  FROM products
  GROUP BY productVendor
;""", conn)

Unnamed: 0,productVendor,num_product_vendors,num_product_names
0,Autoart Studio Design,1,8
1,Carousel DieCast Legends,1,9
2,Classic Metal Creations,1,10
3,Exoto Designs,1,9
4,Gearbox Collectibles,1,9
5,Highway 66 Mini Classics,1,9
6,Min Lin Diecast,1,8
7,Motor City Art Classics,1,9
8,Red Start Diecast,1,7
9,Second Gear Diecast,1,8


In [32]:
# ORDER BY number, we need to make sure the numbers are integer or float, otherwise the result will be non sense

pd.read_sql("""
SELECT productName, quantityInStock
  FROM products
 ORDER BY CAST(quantityInStock AS INTEGER);
""", conn).head(10)

Unnamed: 0,productName,quantityInStock
0,1960 BSA Gold Star DBD34,15
1,1968 Ford Mustang,68
2,1928 Ford Phaeton Deluxe,136
3,1997 BMW F650 ST,178
4,Pont Yacht,414
5,1911 Ford Town Car,540
6,1928 Mercedes-Benz SSK,548
7,F/A 18 Hornet 1/72,551
8,2002 Yamaha YZR M1,600
9,The Mayflower,737


In [33]:
# Limit +  ORDER BY

pd.read_sql("""
SELECT *
  FROM orders
 ORDER BY length(comments) DESC
 LIMIT 10;
""", conn)

Unnamed: 0,orderNumber,orderDate,requiredDate,shippedDate,status,comments,customerNumber
0,10167,2003-10-23,2003-10-30,,Cancelled,Customer called to cancel. The warehouse was n...,448
1,10179,2003-11-11,2003-11-17,2003-11-13,Cancelled,Customer cancelled due to urgent budgeting iss...,496
2,10253,2004-06-01,2004-06-09,2004-06-02,Cancelled,Customer disputed the order and we agreed to c...,201
3,10173,2003-11-05,2003-11-15,2003-11-09,Shipped,Cautious optimism. We have happy customers her...,278
4,10279,2004-08-09,2004-08-19,2004-08-15,Shipped,Cautious optimism. We have happy customers her...,141
5,10377,2005-02-09,2005-02-21,2005-02-12,Shipped,Cautious optimism. We have happy customers her...,186
6,10124,2003-05-21,2003-05-29,2003-05-25,Shipped,Customer very concerned about the exact color ...,112
7,10230,2004-03-15,2004-03-24,2004-03-20,Shipped,Customer very concerned about the exact color ...,128
8,10328,2004-11-12,2004-11-21,2004-11-18,Shipped,Customer very concerned about the exact color ...,278
9,10367,2005-01-12,2005-01-21,2005-01-16,Resolved,This order was disputed and resolved on 2/1/20...,205


In [35]:
# Limit +  WHERE

pd.read_sql("""
SELECT *
  FROM orders
 WHERE status = "Cancelled"
 ORDER BY length(comments) DESC
 LIMIT 10;
""", conn)

Unnamed: 0,orderNumber,orderDate,requiredDate,shippedDate,status,comments,customerNumber
0,10167,2003-10-23,2003-10-30,,Cancelled,Customer called to cancel. The warehouse was n...,448
1,10179,2003-11-11,2003-11-17,2003-11-13,Cancelled,Customer cancelled due to urgent budgeting iss...,496
2,10253,2004-06-01,2004-06-09,2004-06-02,Cancelled,Customer disputed the order and we agreed to c...,201
3,10260,2004-06-16,2004-06-22,,Cancelled,Customer heard complaints from their customers...,357
4,10262,2004-06-24,2004-07-01,,Cancelled,This customer found a better offer from one of...,141
5,10248,2004-05-07,2004-05-14,,Cancelled,Order was mistakenly placed. The warehouse not...,131


In [36]:
pd.read_sql("""
SELECT *
  FROM orders
 WHERE status IN ("Cancelled", "Resolved")
 ORDER BY length(comments) DESC
 LIMIT 10;
""", conn)

Unnamed: 0,orderNumber,orderDate,requiredDate,shippedDate,status,comments,customerNumber
0,10167,2003-10-23,2003-10-30,,Cancelled,Customer called to cancel. The warehouse was n...,448
1,10179,2003-11-11,2003-11-17,2003-11-13,Cancelled,Customer cancelled due to urgent budgeting iss...,496
2,10253,2004-06-01,2004-06-09,2004-06-02,Cancelled,Customer disputed the order and we agreed to c...,201
3,10367,2005-01-12,2005-01-21,2005-01-16,Resolved,This order was disputed and resolved on 2/1/20...,205
4,10327,2004-11-10,2004-11-19,2004-11-13,Resolved,Order was disputed and resolved on 12/1/04. Th...,145
5,10164,2003-10-21,2003-10-30,2003-10-23,Resolved,"This order was disputed, but resolved on 11/1/...",452
6,10260,2004-06-16,2004-06-22,,Cancelled,Customer heard complaints from their customers...,357
7,10262,2004-06-24,2004-07-01,,Cancelled,This customer found a better offer from one of...,141
8,10386,2005-03-01,2005-03-09,2005-03-06,Resolved,Disputed then Resolved on 3/15/2005. Customer ...,141
9,10248,2004-05-07,2004-05-14,,Cancelled,Order was mistakenly placed. The warehouse not...,131


In [37]:
# LIMIT + DATE

pd.read_sql("""
SELECT DISTINCT customerNumber, orderDate
  FROM orders
 ORDER BY orderDate
 LIMIT 5;
""", conn)

Unnamed: 0,customerNumber,orderDate
0,363,2003-01-06
1,128,2003-01-09
2,181,2003-01-10
3,121,2003-01-29
4,141,2003-01-31


In [38]:
# What is the order that took the longest to fulfill, and how long did it take?

pd.read_sql("""
SELECT *,
       julianday(shippedDate) - julianday(orderDate) AS days_to_fulfill
  FROM orders
 WHERE shippedDate != ""
 ORDER BY days_to_fulfill DESC
 LIMIT 1;
""", conn)

Unnamed: 0,orderNumber,orderDate,requiredDate,shippedDate,status,comments,customerNumber,days_to_fulfill
0,10165,2003-10-22,2003-10-31,2003-12-26,Shipped,This order was on hold because customers's cre...,148,65.0


In [39]:
conn.close()