# SQL 2

In [13]:
import sqlite3 
conn = sqlite3.connect('data (1).sqlite')

In [10]:
conn.close()

In [15]:
import pandas as pd
pd.read_sql("""SELECT * FROM employees;""", conn).tail(2)

Unnamed: 0,employeeNumber,lastName,firstName,extension,email,officeCode,reportsTo,jobTitle
21,1625,Kato,Yoshimi,x102,ykato@classicmodelcars.com,5,1621,Sales Rep
22,1702,Gerard,Martin,x2312,mgerard@classicmodelcars.com,4,1102,Sales Rep


In [None]:
SELECT col1, col2, col3
FROM table
WHERE records match criteria
LIMIT 100;

pd.read_sql("""
SELECT *
  FROM employees;
""", conn)

# Or  another table
pd.read_sql("""SELECT * FROM orderDetails;""", conn)

In [None]:
#   if we just wanted to select the last and first names of the employees: Any specific order

pd.read_sql("""
SELECT lastName, firstName
  FROM employees;
""", conn).head()

In [None]:
# we can use aliases (AS keyword) to change the column names in our query result:

pd.read_sql("""
SELECT firstName AS name
  FROM employees;
""", conn).head()

In [None]:
# CASE
# we use the jobTitle field to bin all employees into role categories based on whether or not their job title is "Sales Rep":
# Interacting with the database to create a new column with info I need: CASE basically is the new column

pd.read_sql("""
SELECT firstName, lastName, jobTitle,
       CASE
       WHEN jobTitle = "Sales Rep" THEN "Sales Rep"
       ELSE "Not Sales Rep"
       END AS role
  FROM employees;
""", conn).head(10)

In [3]:
# CASE

pd.read_sql("""
SELECT firstName, lastName, officeCode,
       CASE
       WHEN officeCode = "1" THEN "San Francisco, CA"
       WHEN officeCode = "2" THEN "Boston, MA"
       WHEN officeCode = "3" THEN "New York, NY"
       WHEN officeCode = "4" THEN "Paris, France"
       WHEN officeCode = "5" THEN "Tokyo, Japan"
       END AS office
  FROM employees;
""", conn).head(10)

In [None]:
# CASE
# Shorter than above

pd.read_sql("""
SELECT firstName, lastName, officeCode,
       CASE officeCode
       WHEN "1" THEN "San Francisco, CA"
       WHEN "2" THEN "Boston, MA"
       WHEN "3" THEN "New York, NY"
       WHEN "4" THEN "Paris, France"
       WHEN "5" THEN "Tokyo, Japan"
       END AS office
  FROM employees;
""", conn).head(10)

In [None]:
# Length
# If we wanted to find the length of the first names of all employees

pd.read_sql("""
SELECT length(firstName) AS name_length
  FROM employees;
""", conn).head()

In [4]:
# Upper
# Now let's say we wanted to return all of the employee names in all cap

pd.read_sql("""
SELECT upper(firstName) AS name_in_all_caps
  FROM employees;
""", conn).head()

In [None]:
# Substr
# Let's say we wanted just the first initial (first letter of the first name) for each employee

pd.read_sql("""
SELECT substr(firstName, 1, 1) AS first_initial
  FROM employees;
""", conn).head()

In [None]:
# If we wanted to add a . after each first initial, we could use the SQLite || (concatenate) operator. This works similarly to + with strings in Python:

pd.read_sql("""
SELECT substr(firstName, 1, 1) || "." AS first_initial
  FROM employees;
""", conn).head()

In [None]:
# combine multiple column values, not just string literals. For example, below we combine the first and last name

pd.read_sql("""
SELECT firstName || lastName AS full_name
  FROM employees;
""", conn).head()

In [None]:
pd.read_sql("""
SELECT firstName || " " || lastName AS full_name
  FROM employees;
""", conn).head()

In [None]:
# we wanted to round the price to the nearest dollar.

pd.read_sql("""
SELECT round(priceEach) AS rounded_price
  FROM orderDetails;
""", conn)

In [None]:
# Integer

pd.read_sql("""
SELECT CAST(round(priceEach) AS INTEGER) AS rounded_price_int
  FROM orderDetails;
""", conn)

In [None]:
# Math opperations

pd.read_sql("""
SELECT priceEach * quantityOrdered AS total_price
  FROM orderDetails;
""", conn)

In [None]:
# Datetime

pd.read_sql("""
SELECT requiredDate - orderDate
  FROM orders;
""", conn)

In [None]:
# we need to specify that we want the difference in days

pd.read_sql("""
SELECT julianday(requiredDate) - julianday(orderDate) AS days_from_order_to_required
  FROM orders;
""", conn)

In [None]:
# we wanted to select the order dates as well as dates 1 week after the order dates

pd.read_sql("""
SELECT orderDate, date(orderDate, "+7 days") AS one_week_later
  FROM orders;
""", conn)

In [None]:
# Striptime
# You can also use the strftime function, which is very similar to the Python version. 
#This is useful if you want to split apart a date or time value into different sub-parts

pd.read_sql("""
SELECT orderDate,
       strftime("%m", orderDate) AS month,
       strftime("%Y", orderDate) AS year,
       strftime("%d", orderDate) AS day
  FROM orders;
""", conn)

Result 

#	orderDate	month	year	day
0	2003-01-06	01	2003	06
1	2003-01-09	01	2003	09
2	2003-01-10	01	2003	10

In [None]:
# Count # of rows (*) that meet the condition

SELECT COUNT(*) FROM students WHERE grade > 85;

# Module 2

In [None]:
# The filter goes after 

SELECT product_name, SUM(price * quantity) AS total_sales
FROM sales
WHERE category = 'Electronics'
AND price > 500
AND sale_date BETWEEN '2023-10-01' AND '2023-12-31'
GROUP BY product_name;

In [None]:
conn = sqlite3.connect('pets_database.db')
cursor = conn.cursor()
pd.read_sql("SELECT * FROM cats;", conn)

In [None]:
# Where >=

pd.read_sql("""
SELECT *
 FROM cats
WHERE age >= 5;
""", conn)

In [None]:
# Between

SELECT column_name(s)
  FROM table_name
 WHERE column_name BETWEEN value1 AND value2;

pd.read_sql("""
SELECT *
  FROM cats
 WHERE age BETWEEN 1 AND 3;
""", conn)

In [None]:
# Not Nul

SELECT column(s)
  FROM table_name
 WHERE column_name IS (NOT) NULL;

pd.read_sql("""
SELECT *
  FROM cats
WHERE owner_id IS NULL;
""", conn)

In [None]:
# Like
# It uses wildcards to specify which parts of the string query need to be an exact match and which parts can be variable.

SELECT column(s)
  FROM table_name
 WHERE column_name LIKE 'string_with_wildcards';

In [None]:
# Like select all cats with names that start with "M" (or "m")

pd.read_sql("""
SELECT *
  FROM cats
 WHERE name LIKE 'M%';
""", conn)

# we also could have used the substr SQL built-in function here to perform the same task:
pd.read_sql("""
SELECT *
  FROM cats
 WHERE substr(name, 1, 1) = "M";
""", conn)

# Chatgpt says it's cap sensitive
pd.read_sql("""
SELECT *
  FROM cats
 WHERE UPPER(substr(name, 1, 1)) = "M";
""", conn)

In [None]:
# select all cats with names where the second letter is "a" and the name is four (4) letters long:

pd.read_sql("""
SELECT *
  FROM cats
 WHERE name LIKE '_a__';
""", conn)

# SAME
pd.read_sql("""
SELECT *
  FROM cats
 WHERE length(name) = 4 AND substr(name, 2, 1) = "a";
""", conn)

In [None]:
# number of cats who have an owner_id of 1
# The formula is applied to the whole table, the the filter.

pd.read_sql("""
SELECT COUNT(owner_id)
  FROM cats
 WHERE owner_id = 1;
""", conn)


# Python
# Load the table into a DataFrame
cats_df = pd.read_sql("SELECT * FROM cats;", conn)

# Filter and count
count = (cats_df['owner_id'] == 1).sum() # This is a boolean serie

# Module 3

SELECT product_name, SUM(quantity) AS total_quantity
FROM sales
WHERE sale_date >= '2023-01-01'
GROUP BY product_name
ORDER BY total_quantity DESC
LIMIT 5;
Let’s unpack John’s query:

SELECT product_name, SUM(quantity) AS total_quantity: Selects the product name and calculates the total quantity sold for each product.
FROM sales: Specifies the sales table as the source of data.
WHERE sale_date >= '2023-01-01': Filters the data to include sales from January 1, 2023, onwards.
GROUP BY product_name: Groups the results by product name to aggregate the quantities.
ORDER BY total_quantity DESC: Sorts the results by total quantity in descending order, so the highest-selling products come first.
LIMIT 5: Limits the results to the top 5 products.


In [16]:
conn.close()