diff --git a/02_activities/Assignment1_Sandbox.sql b/02_activities/Assignment1_Sandbox.sql new file mode 100644 index 000000000..25d46fd04 --- /dev/null +++ b/02_activities/Assignment1_Sandbox.sql @@ -0,0 +1,186 @@ +/* One-to-Many: where a given row within a table can be referenced by multiple rows in +another table */ + +/* Check number of booth numbers available */ +SELECT booth_number +FROM booth -- 12 booth numbers available + +/* Compare how many booth_numbers are in the vendor_booth_assignments, one select with distinct, one without. */ +SELECT booth_number +FROM vendor_booth_assignments -- 921 rows; There are 921 booth_number rows in vendor_booth_assignments. + +SELECT DISTINCT booth_number +FROM vendor_booth_assignments -- 7 rows; There are 7 unique booth numbers assigned to vendor booths. + +/* Compare how many booth_numbers and vendor_id are in vendor_booth_assignments, one select with distinct, one without. */ +SELECT booth_number, vendor_id +FROM vendor_booth_assignments -- 921 rows. There are 921 booth_number and vendor_id rows. + +SELECT DISTINCT booth_number, vendor_id +FROM vendor_booth_assignments -- 11 rows. + +/* Compare how many booth_numbers, vendor_id and market_date are in vendor_booth_assignments, one select with distinct, one without. */ +SELECT booth_number, vendor_id, market_date +FROM vendor_booth_assignments -- 921 rows + +SELECT DISTINCT booth_number, vendor_id, market_date +FROM vendor_booth_assignments -- 921 rows + + +/* Assignment 1 - Section 2 */ + +/* Write a query that returns everything in the customer table. */ + +SELECT customer_id, customer_first_name, customer_last_name, customer_postal_code +FROM customer + +SELECT * +FROM customer + +/* Write a query that displays all of the columns and 10 rows from the customer table, sorted by customer_last_name, then customer_first_ name. */ +SELECT customer_id, customer_first_name, customer_last_name, customer_postal_code +FROM customer +ORDER BY customer_last_name, customer_first_name +LIMIT 10; + +/* 1. Write a query that returns all customer purchases of product IDs 4 and 9. */ +SELECT * +FROM customer_purchases +WHERE product_id = 4 + +SELECT * +FROM customer_purchases +WHERE product_id = 9 + +SELECT * +FROM customer_purchases +WHERE product_id IN (4,9) + +SELECT * +FROM customer_purchases +WHERE product_id = 4 +OR product_id = 9 + +/* 2. Write a query that returns all customer purchases and a new calculated column 'price' (quantity * cost_to_customer_per_qty), filtered by customer IDs between 8 and 10 (inclusive) using either: + 1. two conditions using AND + 2. one condition using BETWEEN */ + +SELECT product_id, vendor_id, market_date, customer_id, quantity, cost_to_customer_per_qty, transaction_time, (quantity*cost_to_customer_per_qty) AS price +FROM customer_purchases +WHERE customer_id BETWEEN 8 AND 10 + +/* CASE - Q1 */ +SELECT product_id, product_name +, CASE WHEN product_qty_type = 'unit' + THEN 'unit' + ELSE 'bulk' +END prod_qty_type_condensed +FROM product; + +/* CASE - Q2 Add a column to the previous query called `pepper_flag` that outputs a 1 if the product_name contains the word “pepper” (regardless of capitalization), and otherwise outputs 0. + */ +SELECT product_id, product_name +, CASE WHEN product_qty_type = 'unit' THEN 'unit' + ELSE 'bulk' + END AS prod_qty_type_condensed +, CASE WHEN product_name LIKE '%pepper%' + THEN 1 + ELSE 0 + END AS pepper_flag +FROM product + +/* Section 2 - JOIN 1. Write a query that `INNER JOIN`s the `vendor` table to the `vendor_booth_assignments` table on the `vendor_id` field they both have in common, +and sorts the result by `vendor_name`, then `market_date`. */ + +SELECT +v.vendor_id, +vendor_name, +vendor_type, +vendor_owner_first_name, +vendor_owner_last_name, +booth_number, +market_date +FROM vendor AS v +INNER JOIN vendor_booth_assignments AS vb + ON v.vendor_id = vb.vendor_id + ORDER BY vendor_name, market_date + +/* Secton 3 - AGGREGATE 1. Write a query that determines how many times each vendor has rented a booth at the farmer’s market by counting the vendor booth assignments per `vendor_id`. */ + +SELECT +COUNT(booth_number) +, vendor_id +FROM vendor_booth_assignments +GROUP BY vendor_id + +/* 2. The Farmer’s Market Customer Appreciation Committee wants to give a bumper sticker to everyone who has ever spent more than $2000 at the market. +Write a query that generates a list of customers for them to give stickers to, sorted by last name, then first name. +**HINT**: This query requires you to join two tables, use an aggregate function, and use the HAVING keyword. */ + +SELECT +cp.customer_id, +product_id, +quantity, +cost_to_customer_per_qty, +customer_first_name, +customer_last_name, +market_date, +transaction_time + FROM customer_purchases AS cp + LEFT JOIN customer AS c + ON cp.customer_id = c.customer_id + ORDER BY customer_last_name, customer_first_name + + +SUM(quantity*cost_to_customer_per_qty) AS purchase_total + , customer_id + GROUP BY customer_id + +/* TEMP TABLE 1. Insert the original vendor table into a temp.new_vendor and then add a 10th vendor: Thomass Superfood Store, a Fresh Focused store, owned by Thomas Rosenthal +**HINT**: This is two total queries -- first create the table from the original, then insert the new 10th vendor. +When inserting the new vendor, you need to appropriately align the columns to be inserted (there are five columns to be inserted, I've given you the details, but not the syntax) +To insert the new row use VALUES, specifying the value you want for each column: +`VALUES(col1,col2,col3,col4,col5)` +*/ + +/* DATE +1. Get the customer_id, month, and year (in separate columns) of every purchase in the customer_purchases table. +**HINT**: you might need to search for strfrtime modifers sqlite on the web to know what the modifers for month and year are! + +2. Using the previous query as a base, determine how much money each customer spent in April 2022. Remember that money spent is `quantity*cost_to_customer_per_qty`. +**HINTS**: you will need to AGGREGATE, GROUP BY, and filter...but remember, STRFTIME returns a STRING for your WHERE statement!! +*/ + + + + + + + + + + +-- Number of unique vendor_id values +SELECT COUNT(DISTINCT vendor_id) +FROM vendor_booth_assignments + +-- Number of rows with unique combinations of vendor_id and booth_number +SELECT COUNT(*) +From ( + SELECT DISTINCT vendor_id, booth_number + FROM vendor_booth_assignments +) AS number_vendor_booths + +-- Sum of rows with unique combinations of vendor_id and booth_number +SELECT COUNT(*) FROM vendor_booth_assignments + + + + +FROM ( + SELECT DISTINCT vendor_id +vendor_booth_assignments + + + + diff --git a/02_activities/assignments/DC_Cohort/Assignment1.md b/02_activities/assignments/DC_Cohort/Assignment1.md index f78778f5b..b29353bf1 100644 --- a/02_activities/assignments/DC_Cohort/Assignment1.md +++ b/02_activities/assignments/DC_Cohort/Assignment1.md @@ -206,4 +206,5 @@ Consider, for example, concepts of fariness, inequality, social structures, marg ``` Your thoughts... +When I consider a database that I have spent some time exploring, and the value systems embedded in it, I think of the Ontario Ministry of Education's datasets stored in the Government of Ontario's Data Catalogue: https://data.ontario.ca/dataset/?keywords_en=Education+and+Training. In the two datasets that I've recently downloaded and linked on the variable "district school board" the following: 1. School board financial reports, https://data.ontario.ca/dataset/school-board-financial-reports-estimates-revised-estimates-and-financial-statements, and 2. School information and student demographics https://data.ontario.ca/dataset/school-information-and-student-demographics, political, economic, and social value systems are evident. My interest in the datasets is primarily centred on special education identification and resourcing for students with special education needs. Data variable definitions can be vague and student special education counts vary across school boards, in part because board policies depend not only on the provincial legal framework government policy, but also on local political, economic and community pressures,. This can have the effect of marginalizing some student learning needs and can reinforce inequality of learning opportunity across Ontario's public education system. Moreover, data information definitions do not necessarily track changes in how special education programs are actually administered, generating, in the case of Ontario, essentially a bifurcation in how special education need is measured between "formally" and "informally" identified students and the corresponding funding they receive. Here, the Ontario education data system fails to capture the dynamic nature of public education policy development across the province, over time. ``` diff --git a/02_activities/assignments/DC_Cohort/Assignment1_ERD.png b/02_activities/assignments/DC_Cohort/Assignment1_ERD.png new file mode 100644 index 000000000..9e5daee75 Binary files /dev/null and b/02_activities/assignments/DC_Cohort/Assignment1_ERD.png differ diff --git a/02_activities/assignments/DC_Cohort/assignment1.sql b/02_activities/assignments/DC_Cohort/assignment1.sql index c992e3205..b6dee4333 100644 --- a/02_activities/assignments/DC_Cohort/assignment1.sql +++ b/02_activities/assignments/DC_Cohort/assignment1.sql @@ -4,67 +4,113 @@ --SELECT /* 1. Write a query that returns everything in the customer table. */ - - +SELECT customer_id, customer_first_name, customer_last_name, customer_postal_code +FROM customer; /* 2. Write a query that displays all of the columns and 10 rows from the cus- tomer table, sorted by customer_last_name, then customer_first_ name. */ - - +SELECT customer_id, customer_last_name, customer_first_name, customer_postal_code +FROM customer +ORDER BY customer_last_name, customer_first_name +LIMIT 10; --WHERE /* 1. Write a query that returns all customer purchases of product IDs 4 and 9. */ - - +SELECT * +FROM customer_purchases +WHERE product_id IN (4,9); /*2. Write a query that returns all customer purchases and a new calculated column 'price' (quantity * cost_to_customer_per_qty), filtered by customer IDs between 8 and 10 (inclusive) using either: 1. two conditions using AND 2. one condition using BETWEEN */ --- option 1 +-- option 1 +SELECT product_id, vendor_id, market_date, customer_id, quantity, cost_to_customer_per_qty, transaction_time, (quantity*cost_to_customer_per_qty) AS price +FROM customer_purchases +WHERE customer_id = 8 OR customer_id = 9 +AND customer_id = 9 OR customer_id = 10; -- option 2 - - +SELECT product_id, vendor_id, market_date, customer_id, quantity, cost_to_customer_per_qty, transaction_time, (quantity*cost_to_customer_per_qty) AS price +FROM customer_purchases +WHERE customer_id BETWEEN 8 AND 10; --CASE /* 1. Products can be sold by the individual unit or by bulk measures like lbs. or oz. Using the product table, write a query that outputs the product_id and product_name columns and add a column called prod_qty_type_condensed that displays the word “unit” if the product_qty_type is “unit,” and otherwise displays the word “bulk.” */ - +SELECT product_id, product_name +, CASE WHEN product_qty_type = 'unit' + THEN 'unit' + ELSE 'bulk' +END prod_qty_type_condensed +FROM product; /* 2. We want to flag all of the different types of pepper products that are sold at the market. add a column to the previous query called pepper_flag that outputs a 1 if the product_name contains the word “pepper” (regardless of capitalization), and otherwise outputs 0. */ - - +SELECT product_id, product_name +, CASE WHEN product_qty_type = 'unit' THEN 'unit' + ELSE 'bulk' + END AS prod_qty_type_condensed +, CASE WHEN product_name LIKE '%pepper%' + THEN 1 + ELSE 0 + END AS pepper_flag +FROM product; --JOIN /* 1. Write a query that INNER JOINs the vendor table to the vendor_booth_assignments table on the vendor_id field they both have in common, and sorts the result by vendor_name, then market_date. */ - - - +SELECT +v.vendor_id, +vendor_name, +vendor_type, +vendor_owner_first_name, +vendor_owner_last_name, +booth_number, +market_date +FROM vendor AS v +INNER JOIN vendor_booth_assignments AS vb + ON v.vendor_id = vb.vendor_id +ORDER BY vendor_name, market_date; /* SECTION 3 */ -- AGGREGATE /* 1. Write a query that determines how many times each vendor has rented a booth at the farmer’s market by counting the vendor booth assignments per vendor_id. */ - - +SELECT +COUNT(booth_number) +, vendor_id +FROM vendor_booth_assignments +GROUP BY vendor_id; /* 2. The Farmer’s Market Customer Appreciation Committee wants to give a bumper sticker to everyone who has ever spent more than $2000 at the market. Write a query that generates a list of customers for them to give stickers to, sorted by last name, then first name. HINT: This query requires you to join two tables, use an aggregate function, and use the HAVING keyword. */ - - +SELECT +cp.customer_id +--,product_id +--,quantity +--,cost_to_customer_per_qty +,customer_last_name +,customer_first_name +--,market_date +--,transaction_time +,SUM(quantity*cost_to_customer_per_qty) AS purchase_total +FROM customer_purchases AS cp +LEFT JOIN customer AS c + ON cp.customer_id = c.customer_id +GROUP BY cp.customer_id +HAVING purchase_total > 2000 +ORDER BY customer_last_name, customer_first_name; --Temp Table /* 1. Insert the original vendor table into a temp.new_vendor and then add a 10th vendor: @@ -77,7 +123,30 @@ When inserting the new vendor, you need to appropriately align the columns to be -> To insert the new row use VALUES, specifying the value you want for each column: VALUES(col1,col2,col3,col4,col5) */ +-- if a table named new_vendor exists, delete it, other do NOTHING +DROP TABLE IF EXISTS temp.new_vendor; + +--make the temporary new_vendor table +CREATE TABLE temp.new_vendor AS + +-- define the table +SELECT * +FROM vendor; + +-- put the temp.new_vendor into temp.new_new_vendor + +-- if a table named new_new_vendor exists, delete it, other do NOTHING +DROP TABLE IF EXISTS temp.new_new_vendor; + +-- make the temporary new_new_vendor table +CREATE TABLE temp.new_new_vendor AS + +SELECT * +FROM temp.new_vendor; +-- add a single row of additonal data (see: https://www.w3schools.com/sql/sql_insert.asp) +INSERT INTO temp.new_new_vendor +VALUES (10,'Thomass Superfood Store','Fresh Focused','Thomas','Rosenthal'); -- Date diff --git a/04_this_cohort/live_code/DC/module_2/MOD2_SQL2.sql b/04_this_cohort/live_code/DC/module_2/MOD2_SQL2.sql new file mode 100644 index 000000000..54199b345 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_2/MOD2_SQL2.sql @@ -0,0 +1,29 @@ +/* MODULE 2 */ +/* SELECT */ + + +/* 1. Select everything in the customer table */ +SELECT * +FROM customer; + +/* 2. Use sql as a calculator */ +SELECT 1 + 1 as somethingelse, 10*5 as somethingmore; + + +/* 3. Add order by and limit clauses */ +SELECT * +From customer +ORDER BY customer_first_name +LIMIT 10; + + +/* 4. Select multiple specific columns */ +SELECT customer_id, customer_first_name +FROM customer; + + + +/* 5. Add a static value in a column */ +SELECT 2025 as this_year, 'October' as this_month, customer_id +FROM customer; + diff --git a/04_this_cohort/live_code/DC/module_2/module_2.sqbpro b/04_this_cohort/live_code/DC/module_2/module_2.sqbpro index 73ace631b..c5d015156 100644 --- a/04_this_cohort/live_code/DC/module_2/module_2.sqbpro +++ b/04_this_cohort/live_code/DC/module_2/module_2.sqbpro @@ -1,94 +1,147 @@ -
/* MODULE 2 */ +
/* MODULE 2 */ /* SELECT */ /* 1. Select everything in the customer table */ -SELECT +SELECT * +FROM customer; /* 2. Use sql as a calculator */ - +SELECT 1 + 1 as somethingelse, 10*5 as somethingmore; /* 3. Add order by and limit clauses */ +SELECT * +From customer +ORDER BY customer_first_name +LIMIT 10; +/* 4. Select multiple specific columns */ +SELECT customer_id, customer_first_name +FROM customer; -/* 4. Select multiple specific columns * / +/* 5. Add a static value in a column */ +SELECT 2025 as this_year, 'October' as this_month, customer_id +FROM customer; -/* 5. Add a static value in a column *//* MODULE 2 */ +/* MODULE 2 */ /* WHERE */ /* 1. Select only customer 1 from the customer table */ SELECT * FROM customer -WHERE +WHERE customer_id = 1; /* 2. Differentiate between AND and OR */ - +SELECT * +FROM customer +WHERE customer_id = 1 +AND customer_id = 2; /* 3. IN */ - +SELECT * +FROM customer +WHERE customer_id IN (3, 4,5) +OR customer_postal_code IN ('M4M, M1L'); -- customers in these postal codes /* 4. LIKE */ +-- all the peppers +SELECT * FROM product +WHERE product_name LIKE '%pepper%'; +-- customer with a last name starting with a +SELECT * FROM customer +WHERE customer_last_name LIKE 'a%'; -- customers with last names that start with letter a +/* 5. Nulls and Blanks */ +SELECT * FROM product +WHERE product_size IS NULL +OR product_size = ''; -- empty string -/* 5. Nulls and Blanks* / - - +/* 6. BETWEEN x AND y */ +SELECT * +FROM customer +WHERE customer_id BETWEEN 1 AND 20; -- between is inclusive -/* 6. BETWEEN x AND y *//* MODULE 2 */ +SELECT market_date +FROM market_date_info +WHERE market_date BETWEEN '2022-10-01' AND '2022-10-31'/* MODULE 2 */ /* CASE */ SELECT * /* 1. Add a CASE statement declaring which days vendors should come */ - +, CASE WHEN vendor_type = 'Fresh Focused' THEN 'Wednesday' + WHEN vendor_type = 'Prepared Foods' THEN 'Thursday' + ELSE 'Saturday' +END as day_of_specialty /* 2. Add another CASE statement for Pie Day */ - - +, case WHEN vendor_name = "Annie's Pies" -- double quotes will work here! + THEN 'Annie is great' + END as pie_day /* 3. Add another CASE statement with an ELSE clause to handle rows evaluating to False */ +, CASE WHEN vendor_name LIKE '%pie%' + THEN 'Wednesday' + ELSE 'Friday' + END as also_pie_day - - +FROM vendor; + /* 4. Experiment with selecting a different column instead of just a string value */ +SELECT * +, CASE WHEN cost_to_customer_per_qty<'1.00' -- decimal values need to be in strings; but integers can be without string quotes +THEN cost_to_customer_per_qty*5 +ELSE cost_to_customer_per_qty +END AS inflation +FROM customer_purchases -FROM vendor/* MODULE 2 */ +/* MODULE 2 */ /* DISTINCT */ /* 1. Compare how many customer_ids are the customer_purchases table, one select with distinct, one without */ -- 4221 rows -SELECT customer_id FROM customer_purchases - +SELECT customer_id FROM customer_purchases; +SELECT DISTINCT customer_id FROM customer_purchases; /* 2. Compare the difference between selecting market_day in market_date_info, with and without distinct: what do these difference mean?*/ + -- market is open for 150 days +SELECT market_day +FROM market_date_info; +-- market is only open 2 days/week +SELECT DISTINCT market_day +FROM market_date_info; /* 3. Which vendor has sold products to a customer */ +-- 3 vendors have sold products +SELECT DISTINCT vendor_id +FROM customer_purchases; - -/* 4. Which vendor has sold products to a customer ... and which product was it * / - +/* 4. Which vendor has sold products to a customer ... and which product was it */ +SELECT DISTINCT vendor_id, product_id +FROM customer_purchases; /* 5. Which vendor has sold products to a customer ... and which product was it? ... AND to whom was it sold*/ - +SELECT DISTINCT vendor_id, product_id, customer_id +FROM customer_purchases; /* MODULE 2 */ /* INNER JOIN */ @@ -97,7 +150,15 @@ SELECT customer_id FROM customer_purchases ... use an INNER JOIN to see only products that have been purchased */ -- without table aliases +SELECT product_name, -- coming from the product TABLE +vendor_id, -- coming from customer_purchases TABLE +market_date, +customer_id, +customer_purchases.product_id -- MUST specify which table product_id to be used in the inner join relationship!! +FROM product +INNER JOIN customer_purchases + ON customer_purchases.product_id = product.product_id @@ -107,6 +168,16 @@ SELECT customer_id FROM customer_purchases Add customers' first and last names with an INNER JOIN */ -- using table aliases +SELECT DISTINCT +vendor_id, +product_id, +customer_first_name, -- coming from customer +customer_last_name, +c.customer_id -- disimbiguate which table customer_id is to be used in the inner join relationship +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id + /* MODULE 2 */ @@ -116,45 +187,104 @@ SELECT customer_id FROM customer_purchases /* 1. There are products that have been bought ... but are there products that have not been bought? Use a LEFT JOIN to find out*/ +SELECT DISTINCT +p.product_id, +cp.product_id as [cp.product_id], -- name it, so we can see it (in this example) +product_name +FROM product as p +LEFT JOIN customer_purchases as cp + ON p.product_id = cp.product_id; /* 2. Directions of LEFT JOINs matter ...*/ +-- only products that have been sold ... becuase there are no product_id's in customer purchases that aren't in product +SELECT DISTINCT +p.product_id, +cp.product_id as [cp.product_id], -- name it, so we can see it (in this example) +product_name - - +FROM customer_purchases as p -- switch product and customer purchases (all else same) +LEFT JOIN product as cp + ON p.product_id = cp.product_id; /* 3. As do which values you filter on ... */ +SELECT DISTINCT +p.product_id, +cp.product_id as [cp.product_id], -- name it, so we can see it (in this example) +product_name +FROM product as p +LEFT JOIN customer_purchases as cp + ON p.product_id = cp.product_id +WHERE cp.product_id BETWEEN 1 AND 6; /* 4. Without using a RIGHT JOIN, make this query return the RIGHT JOIN result set -...**Hint, flip the order of the joins** ... +...**Hint, flip the order of the joins** ... */ +-- Original code SELECT * FROM product_category AS pc LEFT JOIN product AS p ON pc.product_category_id = p.product_category_id - ORDER by pc.product_category_id + ORDER by pc.product_category_id; + +/* BUT if use right join, now filtering on the left table for observations only present on the right: */ -...Note how the row count changed from 24 to 23 -*/ +-- right join +/*...Note how the row count changes from 24 to 23 using the RIGHT JOIN */ +SELECT * -/* MODULE 2 */ +FROM product_category as pc +RIGHT JOIN product p + ON pc.product_category_id = p.product_category_id + ORDER BY pc.product_category_id; + +-- left join to return the same result +SELECT * + +FROM product AS p +LEFT JOIN product_category AS pc + ON pc.product_category_id = p.product_category_id/* MODULE 2 */ /* Multiple Table JOINs */ /* 1. Using the Query #4 from DISTINCT earlier (Which vendor has sold products to a customer AND which product was it AND to whom was it sold) - Replace all the IDs (customer, vendor, and product) with the names instead*/ - - + Replace all the IDs (customer, vendor, and product) with the names instead */ + +SELECT DISTINCT +cp.vendor_id -- could also be v.vendor_id +,vendor_name +--, product_id +, product_name +--, customer_id +, customer_first_name +, customer_last_name + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id +INNER JOIN vendor as v + ON v.vendor_id = cp.vendor_id +INNER JOIN product as p + ON p.product_id = cp.product_id; + /* 2. Select product_category_name, everything from the product table, and then LEFT JOIN the customer_purchases table ... how does this LEFT JOIN affect the number of rows? Why do we have more rows now?*/ -
+SELECT product_category_name,p.*, cp.product_id as product_id_in_purchases_table + +FROM product_category as pc +INNER JOIN product as p + ON p.product_category_id = pc.product_category_id +LEFT JOIN customer_purchases as cp + ON cp.product_id = p.product_id + +ORDER BY cp.product_id
-- Reference to file "C:/Users/CECi/Desktop/DSI/sql/02_activities/Assignment1_Sandbox.sql" (not supported by this version) ---- Reference to file "C:/Users/CECi/Desktop/DSI/sql/02_activities/assignments/DC_Cohort/assignment1.sql" (not supported by this version) --
diff --git a/04_this_cohort/live_code/DC/module_3/codeshare_mod3 b/04_this_cohort/live_code/DC/module_3/codeshare_mod3 new file mode 100644 index 000000000..d190d6d74 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_3/codeshare_mod3 @@ -0,0 +1,331 @@ +/* MODULE 2 */ +/* Multiple Table JOINs */ + + +/* 1. Using the Query #4 from DISTINCT earlier + (Which vendor has sold products to a customer AND which product was it AND to whom was it sold) + + Replace all the IDs (customer, vendor, and product) with the names instead*/ +SELECT DISTINCT +--v.vendor_id +vendor_name +--, product_id +,product_name +--,customer_id -- first/last name +,customer_first_name +,customer_last_name + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id +INNER JOIN vendor as v + ON v.vendor_id = cp.vendor_id +INNER JOIN product as p + ON p.product_id = cp.product_id; + + +/* 2. Select product_category_name, everything from the product table, +and then LEFT JOIN the customer_purchases table +... how does this LEFT JOIN affect the number of rows? + +Why do we have more rows now?*/ + +SELECT product_category_name, p.*, cp.product_id as productid_in_purchases_table + +FROM product_category as pc +INNER JOIN product as p + ON p.product_category_id = pc.product_category_id +LEFT JOIN customer_purchases as cp + ON cp.product_id = p.product_id + +ORDER BY cp.product_id + + + +/* MODULE 3 */ +/* COUNT */ + + +/* 1. Count the number of products */ + SELECT COUNT(product_id) as num_of_product + FROM product; + + +/* 2. How many products per product_qty_type */ +SELECT product_qty_type, COUNT(product_id) as num_of_product +FROM product +GROUP BY product_qty_type; + +/* 3. How many products per product_qty_type and per their product_size */ +SELECT product_size +,product_qty_type +, COUNT(product_id) as num_of_product +FROM product +GROUP BY product_size, product_qty_type + +ORDER BY product_qty_type; + +/* COUNT DISTINCT + 4. How many unique products were bought */ +SELECT COUNT(DISTINCT product_id) as bought_prods +FROM customer_purchases + +/* MODULE 3 */ +/* SUM & AVG */ + + +/* 1. How much did customers spend each (per) day */ +SELECT +market_date +,customer_id +,SUM(quantity*cost_to_customer_per_qty) as total_spend + +FROM customer_purchases +GROUP BY market_date,customer_id; + + +/* 2. How much does each customer spend on average */ +SELECT +customer_first_name +,customer_last_name +,ROUND(AVG(quantity*cost_to_customer_per_qty),2) as avg_spend + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id + +GROUP BY c.customer_id + + +/* MODULE 3 */ +/* MIN & MAX */ + + +/* 1. What is the most expensive product +...pay attention to how it doesn't handle ties very well +*/ +SELECT product_name, max(original_price) as most_expensive + +FROM vendor_inventory as vi +INNER JOIN product as p + ON p.product_id = vi.product_id; + + +/* 2. Prove that max is working */ +SELECT DISTINCT +product_name, +original_price + +FROM vendor_inventory as vi +INNER JOIN product as p + ON p.product_id = vi.product_id; + + +/* 3. Find the minimum price per each product_qty_type */ +SELECT product_name +,product_qty_type +,min(original_price) + +FROM vendor_inventory as vi +INNER JOIN product as p + ON p.product_id = vi.product_id + +GROUP BY product_qty_type; + +/* 4. Prove that min is working */ +SELECT DISTINCT product_name +,product_qty_type +--,min(original_price) +,original_price + +FROM vendor_inventory as vi +INNER JOIN product as p + ON p.product_id = vi.product_id; + +/* 5. Min/max on a string +... not particularly useful? */ +SELECT max(product_name) +FROM product + +/* Arithmitic */ + + +/* 1. power, pi(), ceiling, division, integer division, etc */ +SELECT power(4,2), pi(); + +SELECT 10.0 / 3.0 as division, +CAST(10.0 as INT) / CAST(3.0 as INT) as integer_division; + +/* 2. Every even vendor_id with modulo */ +SELECT * FROM vendor +WHERE vendor_id % 2 = 0; + +/* 3. What about every third? */ +SELECT * FROM vendor +WHERE vendor_id % 3 = 0; + +/* MODULE 3 */ +/* HAVING */ + + +/* 1. How much did a customer spend on each day? +Filter to customer_id between 1 and 5 and total_spend > 50 +... What order of execution occurs?*/ +SELECT +market_date +,customer_id +,SUM(quantity*cost_to_customer_per_qty) as total_spend + +FROM customer_purchases +WHERE customer_id BETWEEN 1 AND 5 + +GROUP BY market_date, customer_id +HAVING total_spend > 50; + +/* 2. How many products were bought? +Filter to number of purchases between 300 and 500 */ +SELECT count(product_id) as num_of_prod, product_id +FROM customer_purchases +GROUP BY product_id +HAVING count(product_id) BETWEEN 300 AND 500 + +/* MODULE 3 */ +/* Subquery FROM */ + + +/*1. Simple subquery in a FROM statement, e.g. for inflation +...we could imagine joining this to a more complex query perhaps */ +SELECT DISTINCT +product_id, inflation + +FROM ( + SELECT product_id, cost_to_customer_per_qty, + CASE WHEN cost_to_customer_per_qty < '1.00' THEN cost_to_customer_per_qty*5 + ELSE cost_to_customer_per_qty END AS inflation + + FROM customer_purchases +); + + +/* 2. What is the single item that has been bought in the greatest quantity?*/ +--outer query +SELECT product_name, MAX(quantity_purchased) + +FROM product AS p +INNER JOIN ( +-- inner query + SELECT product_id + ,count(quantity) as quantity_purchased + + FROM customer_purchases + GROUP BY product_id +) AS x ON p.product_id = x.product_id + + +/* MODULE 3 */ +/* Subquery WHERE */ + + +/* 1. How much did each customer spend at each vendor for each day at the market WHEN IT RAINS */ + +SELECT +market_date, +customer_id, +vendor_id, +SUM(quantity*cost_to_customer_per_qty) as total_spend + +FROM customer_purchases + +-- filter by rain_flag +-- "what dates was it raining" +WHERE market_date IN ( + SELECT market_date + FROM market_date_info + WHERE market_rain_flag = 1 +) + +GROUP BY market_date, vendor_id, customer_id; + + + +/* 2. What is the name of the vendor who sells pie */ + +SELECT DISTINCT vendor_name + +FROM customer_purchases as cp +INNER JOIN vendor as v + ON cp.vendor_id = v.vendor_id + +WHERE product_id IN ( + SELECT product_id + FROM product + WHERE product_name LIKE '%pie%' +) + +/* MODULE 3 */ +/* Temp Tables */ + + +/* 1. Put our inflation query into a temp table, e.g. as temp.new_vendor_inventory*/ + +/* some structural code */ +/* ...heads up, sometimes this query can be finnicky -- it's good to try highlighting different sections to help it succeed...*/ + +-- if a table named new_vendor_inventory exists, delete it, other do NOTHING +DROP TABLE IF EXISTS temp.new_vendor_inventory; + +--make the table +CREATE TABLE temp.new_vendor_inventory AS + +-- definition of the table +SELECT *, +original_price*5 as inflation +FROM vendor_inventory; + + +/* 2. put the previous table into another temp table, e.g. as temp.new_new_vendor_inventory */ +DROP TABLE IF EXISTS temp.new_new_vendor_inventory; +CREATE TABLE temp.new_new_vendor_inventory AS + +SELECT * +,inflation*2 as super_inflation +FROM temp.new_vendor_inventory + +/* MODULE 3 */ +/* Common Table Expression (CTE) */ + + +/* 1. Calculate sales per vendor per day */ +WITH vendor_daily_sales AS ( + SELECT + md.market_date + ,market_day + ,market_week + ,market_year + ,vendor_name + ,SUM(quantity*cost_to_customer_per_qty) as sales + + FROM customer_purchases cp + INNER JOIN vendor v -- we want the vendor_name + ON v.vendor_id = cp.vendor_id + INNER JOIN market_date_info md -- all the date columns + ON cp.market_date = md.market_date + + GROUP BY md.market_date, v.vendor_id + +) + + +/* ... re-aggregate the daily sales for each WEEK instead now */ + +SELECT +market_year +,market_week +,vendor_name +,SUM(sales) as sales + +FROM vendor_daily_sales +GROUP BY market_year,market_week, vendor_name + + +FROM: https://codeshare.io/uoft-dsi-sql \ No newline at end of file diff --git a/04_this_cohort/live_code/DC/module_3/module_3.sqbpro b/04_this_cohort/live_code/DC/module_3/module_3.sqbpro index 3d421003d..0327fa20f 100644 --- a/04_this_cohort/live_code/DC/module_3/module_3.sqbpro +++ b/04_this_cohort/live_code/DC/module_3/module_3.sqbpro @@ -1,166 +1,178 @@ -
/* MODULE 3 */ -/* COUNT */ - - -/* 1. Count the number of products */ - - - -/* 2. How many products per product_qty_type */ - - - -/* 3. How many products per product_qty_type and per their product_size */ - - - -/* COUNT DISTINCT - 4. How many unique products were bought */ - - -/* MODULE 3 */ -/* SUM & AVG */ - - -/* 1. How much did customers spend each day */ - - - -/* 2. How much does each customer spend on average */ - - -/* MODULE 3 */ -/* MIN & MAX */ - - -/* 1. What is the most expensive product -...pay attention to how it doesn't handle ties very well -*/ - - -/* 2. Prove that max is working */ - - - -/* 3. Find the minimum price per each product_qty_type */ - - - -/* 4. Prove that min is working */ - - - -/* 5. Min/max on a string -... not particularly useful? */ - - -/* MODULE 3 */ -/* Arithmitic */ - - -/* 1. power, pi(), ceiling, division, integer division, etc */ -SELECT - - -/* 2. Every even vendor_id with modulo */ - - - -/* 3. What about every third? */ - -/* MODULE 3 */ -/* HAVING */ - - -/* 1. How much did a customer spend on each day? -Filter to customer_id between 1 and 5 and total_cost > 50 -... What order of execution occurs?*/ - - - -/* 2. How many products were bought? -Filter to number of purchases between 300 and 500 */ - -/* MODULE 3 */ -/* Subquery FROM */ - - -/*1. Simple subquery in a FROM statement, e.g. for inflation -...we could imagine joining this to a more complex query perhaps */ - - - - -/* 2. What is the single item that has been bought in the greatest quantity?*/ - - -/* MODULE 3 */ -/* Subquery WHERE */ - - -/* 1. How much did each customer spend at each vendor for each day at the market WHEN IT RAINS */ - - - - -/* 2. What is the name of the vendor who sells pie */ - -/* MODULE 3 */ -/* Common Table Expression (CTE) */ - - -/* 1. Calculate sales per vendor per day */ -SELECT - - - - - -/* ... re-aggregate the daily sales for each WEEK instead now */ - -/* MODULE 3 */ -/* Temp Tables */ - - -/* 1. Put our inflation query into a temp table, e.g. as temp.new_vendor_inventory*/ - -/* some structural code */ -/* ...heads up, sometimes this query can be finnicky -- it's good to try highlighting different sections to help it succeed...*/ - --- if a table named new_vendor_inventory exists, delete it, other do NOTHING -DROP TABLE IF EXISTS temp.new_vendor_inventory; - ---make the table -CREATE TABLE temp.new_vendor_inventory AS - --- definition of the table - - - - - -/* 2. put the previous table into another temp table, e.g. as temp.new_new_vendor_inventory */ - - -/* MODULE 3 */ -/* Date functions */ - - -/* 1. now */ -SELECT - - -/* 2. strftime */ - - - -/* 3. adding dates, e.g. last date of the month */ - - - -/* 4. difference between dates, - a. number of days between now and each market_date - b. number of YEARS between now and market_date - c. number of HOURS bewtween now and market_date - */ -
+/* MODULE 3 */ +/* COUNT */ + + +/* 1. Count the number of products */ +SELECT COUNT(product_id) as num_of_product +FROM product; + + +/* 2. How many products per product_qty_type */ +SELECT product_qty_type, COUNT(product_id) as num_of_product +FROM product +GROUP BY product_qty_type; + + +/* 3. How many products per product_qty_type and per their product_size */ +SELECT product_size +,product_qty_type +, COUNT(product_id) as num_of_product +FROM product +GROUP BY product_size, product_qty_type + +ORDER BY product_qty_type; + + +/* COUNT DISTINCT + 4. How many unique products were bought */ +SELECT COUNT(DISTINCT product_id) as bought_prods +FROM customer_purchases + + + +/* MODULE 3 */ +/* SUM & AVG */ + + +/* 1. How much did customers spend each day */ + + + +/* 2. How much does each customer spend on average */ + + +/* MODULE 3 */ +/* MIN & MAX */ + + +/* 1. What is the most expensive product +...pay attention to how it doesn't handle ties very well +*/ + + +/* 2. Prove that max is working */ + + + +/* 3. Find the minimum price per each product_qty_type */ + + + +/* 4. Prove that min is working */ + + + +/* 5. Min/max on a string +... not particularly useful? */ + + +/* MODULE 3 */ +/* Arithmitic */ + + +/* 1. power, pi(), ceiling, division, integer division, etc */ +SELECT + + +/* 2. Every even vendor_id with modulo */ + + + +/* 3. What about every third? */ + +/* MODULE 3 */ +/* HAVING */ + + +/* 1. How much did a customer spend on each day? +Filter to customer_id between 1 and 5 and total_cost > 50 +... What order of execution occurs?*/ + + + +/* 2. How many products were bought? +Filter to number of purchases between 300 and 500 */ + +/* MODULE 3 */ +/* Subquery FROM */ + + +/*1. Simple subquery in a FROM statement, e.g. for inflation +...we could imagine joining this to a more complex query perhaps */ + + + + +/* 2. What is the single item that has been bought in the greatest quantity?*/ + + +/* MODULE 3 */ +/* Subquery WHERE */ + + +/* 1. How much did each customer spend at each vendor for each day at the market WHEN IT RAINS */ + + + + +/* 2. What is the name of the vendor who sells pie */ + +/* MODULE 3 */ +/* Common Table Expression (CTE) */ + + +/* 1. Calculate sales per vendor per day */ +SELECT + + + + + +/* ... re-aggregate the daily sales for each WEEK instead now */ + +/* MODULE 3 */ +/* Temp Tables */ + + +/* 1. Put our inflation query into a temp table, e.g. as temp.new_vendor_inventory*/ + +/* some structural code */ +/* ...heads up, sometimes this query can be finnicky -- it's good to try highlighting different sections to help it succeed...*/ + +-- if a table named new_vendor_inventory exists, delete it, other do NOTHING +DROP TABLE IF EXISTS temp.new_vendor_inventory; + +--make the table +CREATE TABLE temp.new_vendor_inventory AS + +-- definition of the table + + + + + +/* 2. put the previous table into another temp table, e.g. as temp.new_new_vendor_inventory */ + + +/* MODULE 3 */ +/* Date functions */ + + +/* 1. now */ +SELECT + + +/* 2. strftime */ + + + +/* 3. adding dates, e.g. last date of the month */ + + + +/* 4. difference between dates, + a. number of days between now and each market_date + b. number of YEARS between now and market_date + c. number of HOURS bewtween now and market_date + */ + diff --git a/sql b/sql new file mode 100644 index 000000000..e69de29bb