## <span style='color:Blue'> CREATE DATABASE & TABLE </span>

In [0]:
%sql

CREATE DATABASE IF NOT EXISTS dbacademy;
-- The following grants SELECT, CREATE, MODIFY, READ_METADATA, and CREATE_NAMED_FUNCTION privileges to the learner for this database
-- **You MUST change the username to the learner's username**
--GRANT ALL PRIVILEGES ON DATABASE dbacademy TO `jaime.vera.palomino@gmail.com`;
USE dbacademy;
--The following ensures the tables use the latest data set
DROP TABLE IF EXISTS basic_sql_for_databricks_sql_customers_csv;
DROP TABLE IF EXISTS basic_sql_for_databricks_sql_customers;
DROP TABLE IF EXISTS basic_sql_for_databricks_sql_loyalty_segments;
DROP TABLE IF EXISTS basic_sql_for_databricks_sql_loyalty_segments_csv;
DROP TABLE IF EXISTS basic_sql_for_databricks_sql_sales_gold;
DROP TABLE IF EXISTS basic_sql_for_databricks_sql_silver_promo_prices;
DROP TABLE IF EXISTS basic_sql_for_databricks_sql_silver_purchase_orders;
DROP TABLE IF EXISTS basic_sql_for_databricks_sql_silver_sales_orders;
DROP TABLE IF EXISTS basic_sql_for_databricks_sql_source_silver_suppliers;
DROP TABLE IF EXISTS intro_to_databricks_sql_gym_logs;
-- The following creates a table for use in the current course
-- Data for the table comes from a read-only object store

-- Gym Logs table
CREATE TABLE intro_to_databricks_sql_gym_logs 
    USING JSON
    LOCATION 'wasbs://courseware@dbacademy.blob.core.windows.net/introduction-to-databricks-sql/v01/gym-logs';
-- The following ensures everything worked correctly
SELECT * FROM intro_to_databricks_sql_gym_logs;

-- Customers table
CREATE TABLE basic_sql_for_databricks_sql_customers_csv
  USING csv 
  OPTIONS (
    path "wasbs://courseware@dbacademy.blob.core.windows.net/basic-sql-for-databricks-sql/v01/retail-org/customers",
    header "true",
    inferSchema "true"
);

CREATE TABLE basic_sql_for_databricks_sql_customers AS
  SELECT * FROM basic_sql_for_databricks_sql_customers_csv;
DROP TABLE basic_sql_for_databricks_sql_customers_csv;

-- Loyalty Segments table
CREATE TABLE basic_sql_for_databricks_sql_loyalty_segments_csv
  USING csv 
  OPTIONS (
    path "wasbs://courseware@dbacademy.blob.core.windows.net/basic-sql-for-databricks-sql/v01/retail-org/loyalty_segments",
    header "true",
    inferSchema "true"
);

CREATE TABLE basic_sql_for_databricks_sql_loyalty_segments AS
  SELECT * FROM basic_sql_for_databricks_sql_loyalty_segments_csv;
DROP TABLE basic_sql_for_databricks_sql_loyalty_segments_csv;

CREATE TABLE basic_sql_for_databricks_sql_sales_gold AS
  SELECT * FROM delta.`wasbs://courseware@dbacademy.blob.core.windows.net/basic-sql-for-databricks-sql/v01/retail-org/solutions/gold/sales`;
CREATE TABLE basic_sql_for_databricks_sql_silver_promo_prices AS
  SELECT * FROM delta.`wasbs://courseware@dbacademy.blob.core.windows.net/basic-sql-for-databricks-sql/v01/retail-org/solutions/silver/promo_prices`;
CREATE TABLE basic_sql_for_databricks_sql_silver_purchase_orders AS
  SELECT * FROM delta.`wasbs://courseware@dbacademy.blob.core.windows.net/basic-sql-for-databricks-sql/v01/retail-org/solutions/silver/purchase_orders.delta`;
CREATE TABLE basic_sql_for_databricks_sql_silver_sales_orders AS
  SELECT * FROM delta.`wasbs://courseware@dbacademy.blob.core.windows.net/basic-sql-for-databricks-sql/v01/retail-org/solutions/silver/sales_orders`;
CREATE TABLE basic_sql_for_databricks_sql_silver_suppliers AS
  SELECT * FROM delta.`wasbs://courseware@dbacademy.blob.core.windows.net/basic-sql-for-databricks-sql/v01/retail-org/solutions/silver/suppliers`;
CREATE TABLE basic_sql_for_databricks_sql_source_silver_suppliers AS
  SELECT * FROM delta.`wasbs://courseware@dbacademy.blob.core.windows.net/basic-sql-for-databricks-sql/v01/retail-org/solutions/silver/suppliers`;


## <span style='color:Blue'> Retrieving Data </span>

In [0]:
%sql
-- SELECT ... FROM
-- SELECT * FROM dbacademy.basic_sql_for_databricks_sql_customers;

-- SELECT specific columns and AS
-- SELECT customer_name AS Customer FROM dbacademy.basic_sql_for_databricks_sql_customers;

-- DISTINCT
-- SELECT DISTINCT state FROM dbacademy.basic_sql_for_databricks_sql_customers;

-- WHERE
-- SELECT * FROM dbacademy.basic_sql_for_databricks_sql_customers WHERE loyalty_segment = 3;

-- GROUP BY
-- This query will fail:
-- SELECT * FROM dbacademy.basic_sql_for_databricks_sql_customers GROUP BY loyalty_segment;

-- GROUP BY (cont.)
-- SELECT loyalty_segment, count(loyalty_segment) from dbacademy.basic_sql_for_databricks_sql_customers GROUP BY loyalty_segment;

-- ORDER BY
-- SELECT loyalty_segment, count(loyalty_segment) from dbacademy.basic_sql_for_databricks_sql_customers GROUP BY loyalty_segment ORDER BY loyalty_segment;

-- HAVING
-- SELECT loyalty_segment, count(loyalty_segment) AS loyalty_count from dbacademy.basic_sql_for_databricks_sql_customers GROUP BY loyalty_segment HAVING loyalty_count > 4000 ORDER BY loyalty_segment;

-- SELECT on Delta tables
-- DESCRIBE HISTORY dbacademy.basic_sql_for_databricks_sql_customers;

-- UPDATE dbacademy.basic_sql_for_databricks_sql_customers SET loyalty_segment = 10 WHERE loyalty_segment = 0;
-- DESCRIBE HISTORY dbacademy.basic_sql_for_databricks_sql_customers;

-- UPDATE dbacademy.basic_sql_for_databricks_sql_customers SET loyalty_segment = 0 WHERE loyalty_segment = 10;
-- DESCRIBE HISTORY dbacademy.basic_sql_for_databricks_sql_customers;

-- TIMESTAMP AS OF
-- You will need to change the timestamp to a time that works with your tables' history
-- SELECT loyalty_segment FROM dbacademy.basic_sql_for_databricks_sql_customers TIMESTAMP AS OF '2021-11-18T01:39:30.013Z';

-- VERSION AS OF
-- You may need to change the version number to a number that works with your table's history
-- SELECT loyalty_segment FROM dbacademy.basic_sql_for_databricks_sql_customers VERSION AS OF 1;

## <span style='color:Blue'> Column Expressions </span>

In [0]:
%sql
-- Mathematical expressions of two columns
-- SELECT * FROM dbacademy.basic_sql_for_databricks_sql_silver_promo_prices;
-- SELECT sales_price - sales_price * promo_disc AS Calculated_Discount, discounted_price AS Discounted_Price FROM dbacademy.basic_sql_for_databricks_sql_silver_promo_prices;

-- Built-in Functions
-- https://docs.databricks.com/sql/language-manual/sql-ref-functions-builtin.html

-- String column manipulation
-- SELECT * FROM dbacademy.basic_sql_for_databricks_sql_customers;
-- SELECT lower(city) AS City FROM dbacademy.basic_sql_for_databricks_sql_customers;
-- SELECT initcap(lower(city)) AS City FROM dbacademy.basic_sql_for_databricks_sql_customers;

-- Date Functions
-- SELECT * FROM dbacademy.basic_sql_for_databricks_sql_silver_promo_prices;
-- SELECT from_unixtime(promo_began) FROM dbacademy.basic_sql_for_databricks_sql_silver_promo_prices;
-- Datetime Patterns: https://docs.databricks.com/spark/latest/spark-sql/language-manual/sql-ref-datetime-pattern.html
-- SELECT from_unixtime(promo_began, "d MMM, y") AS Beginning_Date FROM dbacademy.basic_sql_for_databricks_sql_silver_promo_prices;

-- Date Calculations
-- SELECT * FROM dbacademy.basic_sql_for_databricks_sql_silver_promo_prices;
-- SELECT current_date() - to_date(from_unixtime(promo_began)) FROM dbacademy.basic_sql_for_databricks_sql_silver_promo_prices;

-- CASE WHEN
-- SELECT customer_name, loyalty_segment,
--  CASE 
--     WHEN loyalty_segment = 0 THEN 'Rare'
--     WHEN loyalty_segment = 1 THEN 'Occasional'
--     WHEN loyalty_segment = 2 THEN 'Frequent'
--     WHEN loyalty_segment = 3 THEN 'Daily'
--  END AS Loyalty 
--  FROM dbacademy.basic_sql_for_databricks_sql_customers;

-- CASE WHEN in ORDER BY
-- SELECT * FROM dbacademy.basic_sql_for_databricks_sql_customers WHERE state = 'UT'
-- ORDER BY
-- (CASE 
--     WHEN city IS NULL THEN state
--     ELSE city
-- END);


## <span style='color:Blue'> Manipulating Stored Data </span>

In [0]:
%sql
-- UPDATE
-- SELECT city FROM dbacademy.basic_sql_for_databricks_sql_customers;
-- SELECT initcap(lower(city)) AS City FROM dbacademy.basic_sql_for_databricks_sql_customers;
-- UPDATE dbacademy.basic_sql_for_databricks_sql_customers SET city = initcap(lower(city));
-- SELECT city FROM dbacademy.basic_sql_for_databricks_sql_customers;

-- INSERT INTO
-- SELECT * FROM dbacademy.basic_sql_for_databricks_sql_loyalty_segments;
-- INSERT INTO dbacademy.basic_sql_for_databricks_sql_loyalty_segments 
-- (loyalty_segment_id, loyalty_segment_description, unit_threshold, valid_from, valid_to)
-- VALUES
-- (4, 'level_4', 100, current_date(), Null);

-- INSERT TABLE
-- SELECT * FROM dbacademy.basic_sql_for_databricks_sql_silver_suppliers where password_hash = 'f6899b07c3868a5975438ee0caea6623';
-- INSERT INTO dbacademy.basic_sql_for_databricks_sql_silver_suppliers TABLE dbacademy.basic_sql_for_databricks_sql_source_silver_suppliers; 
-- SELECT * FROM dbacademy.basic_sql_for_databricks_sql_silver_suppliers where password_hash = 'f6899b07c3868a5975438ee0caea6623';

-- INSERT OVERWRITE
-- select count(*) from  dbacademy.basic_sql_for_databricks_sql_silver_suppliers;
-- INSERT OVERWRITE dbacademy.basic_sql_for_databricks_sql_silver_suppliers TABLE dbacademy.basic_sql_for_databricks_sql_source_silver_suppliers; 
-- select count(*) from  dbacademy.basic_sql_for_databricks_sql_silver_suppliers;

-- MERGE INTO
-- MERGE INTO dbacademy.basic_sql_for_databricks_sql_silver_suppliers
--   USING dbacademy.basic_sql_for_databricks_sql_source_silver_suppliers
--   ON dbacademy.basic_sql_for_databricks_sql_silver_suppliers.EAN13 = dbacademy.basic_sql_for_databricks_sql_source_silver_suppliers.EAN13
--   WHEN NOT MATCHED THEN INSERT *
-- UPDATE dbacademy.basic_sql_for_databricks_sql_source_silver_suppliers SET EAN13 = EAN13 + 1 WHERE EAN13 = 2198122549911;
-- MERGE INTO dbacademy.basic_sql_for_databricks_sql_silver_suppliers
--   USING dbacademy.basic_sql_for_databricks_sql_source_silver_suppliers
--   ON dbacademy.basic_sql_for_databricks_sql_silver_suppliers.EAN13 = dbacademy.basic_sql_for_databricks_sql_source_silver_suppliers.EAN13
--   WHEN NOT MATCHED THEN INSERT *

## <span style='color:Blue'> Subsetting Data Using Subqueries</span>

In [0]:
%sql
-- Subqueries

-- Creating a table using a subquery
-- CREATE TABLE dbacademy.high_loyalty_customers AS
--     SELECT * FROM dbacademy.basic_sql_for_databricks_sql_customers WHERE loyalty_segment = 3;

-- DROP TABLE dbacademy.high_loyalty_customers;

-- Creating a view using a subquery
-- CREATE VIEW dbacademy.high_loyalty_customers AS
--     SELECT * FROM dbacademy.basic_sql_for_databricks_sql_customers WHERE loyalty_segment = 3;
    
-- DROP VIEW dbacademy.high_loyalty_customers;

## <span style='color:Blue'>Working With Joins</span>

In [0]:
%sql
-- INNER JOIN
-- SELECT
--   customer_name,
--   loyalty_segment_description,
--   unit_threshold
-- FROM
--   dbacademy.basic_sql_for_databricks_sql_customers
--   INNER JOIN dbacademy.basic_sql_for_databricks_sql_loyalty_segments
--     ON basic_sql_for_databricks_sql_customers.loyalty_segment = basic_sql_for_databricks_sql_loyalty_segments.loyalty_segment_id;

-- LEFT [OUTER] JOIN
-- SELECT
--   basic_sql_for_databricks_sql_customers.customer_name,
--   product_category,
--   total_price
-- FROM
--   dbacademy.basic_sql_for_databricks_sql_customers
--   LEFT JOIN dbacademy.basic_sql_for_databricks_sql_sales_gold 
--     ON basic_sql_for_databricks_sql_customers.customer_id = basic_sql_for_databricks_sql_sales_gold.customer_id
-- WHERE
--   state = 'NC'
-- ORDER BY product_category DESC;

-- RIGHT [OUTER] JOIN
-- SELECT
--   region,
--   product_category,
--   total_price
-- FROM
--   dbacademy.basic_sql_for_databricks_sql_customers
--   RIGHT JOIN dbacademy.basic_sql_for_databricks_sql_sales_gold 
--     ON basic_sql_for_databricks_sql_customers.customer_id = basic_sql_for_databricks_sql_sales_gold.customer_id
-- WHERE
--     product_category = 'Sioneer'
-- ORDER BY product_category DESC;

-- FULL [OUTER] JOIN
-- SELECT
--   region,
--   product_category,
--   total_price
-- FROM
--   dbacademy.basic_sql_for_databricks_sql_customers
--   FULL JOIN dbacademy.basic_sql_for_databricks_sql_sales_gold 
--     ON basic_sql_for_databricks_sql_customers.customer_id = basic_sql_for_databricks_sql_sales_gold.customer_id
-- ORDER BY product_category DESC;

-- LEFT [SEMI] JOIN
-- SELECT
--   *
-- FROM
--   dbacademy.basic_sql_for_databricks_sql_customers
--   LEFT SEMI JOIN dbacademy.basic_sql_for_databricks_sql_sales_gold 
--     ON basic_sql_for_databricks_sql_customers.customer_id = basic_sql_for_databricks_sql_sales_gold.customer_id
-- WHERE
--   state = 'NC';
  
-- LEFT [ANTI] JOIN
-- SELECT
--   *
-- FROM
--   dbacademy.basic_sql_for_databricks_sql_customers
--   LEFT ANTI JOIN dbacademy.basic_sql_for_databricks_sql_sales_gold 
--     ON basic_sql_for_databricks_sql_customers.customer_id = basic_sql_for_databricks_sql_sales_gold.customer_id
-- WHERE
--   state = 'NC';

-- CROSS JOIN
-- SELECT
--   count(*)
-- FROM
--   dbacademy.basic_sql_for_databricks_sql_sales_gold;
  
-- SELECT
--   count(*)
-- FROM
--   dbacademy.basic_sql_for_databricks_sql_customers;

-- SELECT
--   count(*)
-- FROM
--   dbacademy.basic_sql_for_databricks_sql_customers
--   CROSS JOIN dbacademy.basic_sql_for_databricks_sql_sales_gold;

## <span style='color:Blue'>Using Aggregations</span>

In [0]:
%sql
-- count()
-- SELECT count(*) AS Number_of_Customers FROM dbacademy.basic_sql_for_databricks_sql_customers;

-- sum()
-- SELECT sum(units_purchased) AS Total_California_Units FROM dbacademy.basic_sql_for_databricks_sql_customers WHERE state = 'CA';

-- min(), max()
-- SELECT min(discounted_price) AS Lowest_Discounted_Price, max(discounted_price) AS Highest_Discounted_Price FROM dbacademy.basic_sql_for_databricks_sql_silver_promo_prices;

-- avg(), mean()
-- SELECT avg(total_price) AS Mean_Total_Price from dbacademy.basic_sql_for_databricks_sql_sales_gold;

-- std(), stddev()
-- SELECT std(total_price) AS SD_Total_Price from dbacademy.basic_sql_for_databricks_sql_sales_gold;

-- var_samp(), variance()
-- SELECT variance(total_price) AS Variance_Total_Price from dbacademy.basic_sql_for_databricks_sql_sales_gold;

-- Combining built in functions
-- SELECT price FROM dbacademy.basic_sql_for_databricks_sql_silver_purchase_orders;
-- SELECT
--   price AS Price,
--   int(regexp_replace(price, '(\\$\\s)|(\\$)|(USD\\s)|(USD)', '')) AS Cleaned_USD_Price
-- FROM
--   dbacademy.basic_sql_for_databricks_sql_silver_purchase_orders
-- WHERE
--   price like '\$%'
--   OR price like 'USD%';

-- SELECT
--   corr(
--     int(
--       regexp_replace(price, '(\\$\\s)|(\\$)|(USD\\s)|(USD)', '')
--     ),
--     quantity
--   ) AS Correlation_USD_Price_Quantity
-- FROM
--   dbacademy.basic_sql_for_databricks_sql_silver_purchase_orders
-- WHERE
--   price like '\$%'
--   OR price like 'USD%';