In [None]:
# The Operations team would like to know which countries we currently operate 
# in and which country now has the most stores. Perform a query on the database 
# to get the information, it should return the following information:

+----------+-----------------+
| country  | total_no_stores |
+----------+-----------------+
| GB       |             265 |
| DE       |             141 |
| US       |              34 |
+----------+-----------------+

# Note: DE is short for Deutschland(Germany)

In [None]:
SELECT country_code,
    COUNT(country_code) AS total_no_stores
FROM
    dim_store_details
WHERE
    store_type != 'Web Portal'
GROUP BY
    country_code
ORDER BY
    total_no_stores DESC;


In [None]:
# The business stakeholders would like to know which locations currently have the most stores.

# They would like to close some stores before opening more in other locations.

# Find out which locations have the most stores currently. The query should return the following:

+-------------------+-----------------+
|     locality      | total_no_stores |
+-------------------+-----------------+
| Chapletown        |              14 |
| Belper            |              13 |
| Bushley           |              12 |
| Exeter            |              11 |
| High Wycombe      |              10 |
| Arbroath          |              10 |
| Rutherglen        |              10 |
+-------------------+-----------------+

In [None]:
SELECT locality,
    COUNT(locality) AS total_no_stores
FROM
    dim_store_details
GROUP BY
    locality
ORDER BY
    total_no_stores DESC
LIMIT
    7;

In [None]:
#Query the database to find out which months have produced the most sales. 
#The query should return the following information:

+-------------+-------+
| total_sales | month |
+-------------+-------+
|   673295.68 |     8 |
|   668041.45 |     1 |
|   657335.84 |    10 |
|   650321.43 |     5 |
|   645741.70 |     7 |
|   645463.00 |     3 |
+-------------+-------+

In [None]:
WITH product_sale_total AS (
    SELECT
        orders_table.product_code,
        orders_table.date_uuid,
        orders_table.product_quantity,
        dim_products.product_price,
        (dim_products.product_price * orders_table.product_quantity) AS product_sales
    FROM
        orders_table
        LEFT JOIN dim_products ON orders_table.product_code = dim_products.product_code
),
order_months AS (
    SELECT
    dim_date_times.date_uuid,
    dim_date_times.month
    FROM
        dim_date_times
    LEFT JOIN
        orders_table ON dim_date_times.date_uuid = orders_table.date_uuid
),
monthly_sales AS (
    SELECT
        order_months.month,
        ROUND(SUM(product_sales)::numeric, 2) AS total_sales
    FROM
        product_sale_total
    INNER JOIN
    order_months ON product_sale_total.date_uuid = order_months.date_uuid
    GROUP BY
        order_months.month
)

SELECT * FROM monthly_sales
    ORDER BY
        total_sales DESC
    LIMIT
        6;

In [None]:
# The company is looking to increase its online sales.

# They want to know how many sales are happening online vs offline.

# Calculate how many products were sold and the amount of sales made for online and offline purchases.

# You should get the following information:

+------------------+-------------------------+----------+
| numbers_of_sales | product_quantity_count  | location |
+------------------+-------------------------+----------+
|            26957 |                  107739 | Web      |
|            93166 |                  374047 | Offline  |
+------------------+-------------------------+----------+

Originally, did a multiple CTE but was counting completely wrong. 

Much simpler creating a new column and then creating a single CTE for calculations. 

In [None]:
--add column for store_location
ALTER TABLE dim_store_details
ADD COLUMN location VARCHAR(20);

UPDATE dim_store_details
SET location = CASE
    WHEN store_type = 'Web Portal' THEN 'Web'
    ELSE 'Offline'
END;

In [None]:
WITH on_off_line_sales AS (
    SELECT
        dim_store_details.location,
        COUNT(*) AS number_of_sales,
        SUM(orders_table.product_quantity) AS product_quantity_count
    FROM
        orders_table
    INNER JOIN
        dim_store_details ON orders_table.store_code = dim_store_details.store_code
    GROUP BY
        location
)

SELECT
    number_of_sales,
    product_quantity_count,
    location
FROM
    on_off_line_sales
ORDER BY 
    location DESC;

In [None]:
# The sales team wants to know which of the different store types is generated the most revenue so they know where to focus.
# Find out the total and percentage of sales coming from each of the different store types.
# The query should return:

+-------------+-------------+---------------------+
| store_type  | total_sales | percentage_total(%) |
+-------------+-------------+---------------------+
| Local       |  3440896.52 |               44.87 |
| Web portal  |  1726547.05 |               22.44 |
| Super Store |  1224293.65 |               15.63 |
| Mall Kiosk  |   698791.61 |                8.96 |
| Outlet      |   631804.81 |                8.10 |
+-------------+-------------+---------------------+

In [None]:
WITH product_sale_total AS (
    SELECT
        orders_table.product_code,
        orders_table.store_code,
        orders_table.product_quantity,
        dim_products.product_price,
        (dim_products.product_price * orders_table.product_quantity) AS product_sales
    FROM
        orders_table
    LEFT JOIN dim_products ON orders_table.product_code = dim_products.product_code
),
store_orders AS (
    SELECT
        dim_store_details.store_code,
        dim_store_details.store_type,
        product_sale_total.*
    FROM
        dim_store_details
    LEFT JOIN
        product_sale_total ON dim_store_details.store_code = product_sale_total.store_code
),
store_sales AS (
    SELECT
        store_orders.store_type,
        COUNT(store_orders.store_type) AS number_of_stores,
        ROUND(SUM(store_orders.product_sales)::numeric, 2) AS total_sales
    FROM
        store_orders
    GROUP BY
        store_orders.store_type
)

SELECT
    store_type,
    total_sales,
    ROUND((number_of_stores / SUM(number_of_stores) OVER ())::numeric * 100, 2) AS percentage_total --here need ti add the over() function!
FROM
    store_sales
ORDER BY
    percentage_total DESC;

In [None]:
# The company stakeholders want assurances that the company has been doing well recently.

# Find which months in which years have had the most sales historically.

# The query should return the following information:

+-------------+------+-------+
| total_sales | year | month |
+-------------+------+-------+
|    27936.77 | 1994 |     3 |
|    27356.14 | 2019 |     1 |
|    27091.67 | 2009 |     8 |
|    26679.98 | 1997 |    11 |
|    26310.97 | 2018 |    12 |
|    26277.72 | 2019 |     8 |
|    26236.67 | 2017 |     9 |
|    25798.12 | 2010 |     5 |
|    25648.29 | 1996 |     8 |
|    25614.54 | 2000 |     1 |
+-------------+------+-------+

In [None]:
WITH product_sale_total AS (
    SELECT
        orders_table.product_code,
        orders_table.date_uuid,
        orders_table.product_quantity,
        dim_products.product_price,
        (dim_products.product_price * orders_table.product_quantity) AS product_sales
    FROM
        orders_table
        LEFT JOIN dim_products ON orders_table.product_code = dim_products.product_code
),
order_months AS (
    SELECT
    dim_date_times.date_uuid,
    dim_date_times.month,
    dim_date_times.year
    FROM
        dim_date_times
    LEFT JOIN
        orders_table ON dim_date_times.date_uuid = orders_table.date_uuid
),
monthly_sales AS (
    SELECT
        order_months.month,
        order_months.year,
        ROUND(SUM(product_sales)::numeric, 2) AS total_sales
    FROM
        product_sale_total
    INNER JOIN
    order_months ON product_sale_total.date_uuid = order_months.date_uuid
    GROUP BY
        order_months.month,
        order_months.year
)

SELECT 
    total_sales,
    year,
    month
FROM monthly_sales
    ORDER BY
        total_sales DESC
    LIMIT
        10;

In [None]:
# The operations team would like to know the overall staff numbers in each location around the world. Perform a query to determine the staff numbers in each of the countries the company sells in.

# The query should return the values:

+---------------------+--------------+
| total_staff_numbers | country_code |
+---------------------+--------------+
|               13307 | GB           |
|                6123 | DE           |
|                1384 | US           |
+---------------------+--------------+

In [None]:
SELECT country_code,
    SUM(staff_numbers) AS total_staff_numbers
FROM
    dim_store_details
GROUP BY
    country_code
ORDER BY
    total_staff_numbers DESC;

In [None]:
# The sales team is looking to expand their territory in Germany. Determine which type of store is generating the most sales in Germany.

# The query will return:

+--------------+-------------+--------------+
| total_sales  | store_type  | country_code |
+--------------+-------------+--------------+
|   198373.57  | Outlet      | DE           |
|   247634.20  | Mall Kiosk  | DE           |
|   384625.03  | Super Store | DE           |
|  1109909.59  | Local       | DE           |
+--------------+-------------+--------------+

In [None]:
WITH product_sale_total AS (
    SELECT
        orders_table.product_code,
        orders_table.store_code,
        orders_table.product_quantity,
        dim_products.product_price,
        (dim_products.product_price * orders_table.product_quantity) AS product_sales
    FROM
        orders_table
    LEFT JOIN dim_products ON orders_table.product_code = dim_products.product_code
),
store_orders AS (
    SELECT
        dim_store_details.store_code,
        dim_store_details.store_type,
        dim_store_details.country_code,
        product_sale_total.*
    FROM
        dim_store_details
    LEFT JOIN
        product_sale_total ON dim_store_details.store_code = product_sale_total.store_code
),
store_sales AS (
    SELECT
        store_orders.store_type,
        store_orders.country_code,
        COUNT(store_orders.store_type) AS number_of_stores,
        ROUND(SUM(store_orders.product_sales)::numeric, 2) AS total_sales
    FROM
        store_orders
    WHERE
        store_orders.country_code = 'DE'
    GROUP BY
        store_orders.store_type,
        store_orders.country_code
)

SELECT
    total_sales,
    store_type,
    country_code
FROM
    store_sales
ORDER BY
    total_sales;

In [None]:
# Sales would like the get an accurate metric for how quickly 
#the company is making sales.

# Determine the average time taken between each sale grouped by year, 
#the query should return the following information:

 +------+-------------------------------------------------------+
 | year |                           actual_time_taken           |
 +------+-------------------------------------------------------+
 | 2013 | "hours": 2, "minutes": 17, "seconds": 12, "millise... |
 | 1993 | "hours": 2, "minutes": 15, "seconds": 35, "millise... |
 | 2002 | "hours": 2, "minutes": 13, "seconds": 50, "millise... | 
 | 2022 | "hours": 2, "minutes": 13, "seconds": 6,  "millise... |
 | 2008 | "hours": 2, "minutes": 13, "seconds": 2,  "millise... |
 +------+-------------------------------------------------------+

#First get all timestamps and day, month, year - group by date?  and calculate time
#between each rows, grouped by the date. Sales for that date,

#Then once sales are calculated can get the average time of sales over each year.
#

## My Solution

In [None]:

with order_timing AS (
    SELECT
        dim_date_times.timestamp,
        dim_date_times.day,
        dim_date_times.month,
        dim_date_times.year,
        TO_TIMESTAMP(CONCAT(year, '/', month, '/', day, '/', timestamp), 'YYYY/MM/DD/HH24:MI:ss') as times
    FROM
        dim_date_times
	ORDER BY
        year
),
sale_times AS (
    SELECT
        order_timing.timestamp,
        order_timing.year,   
        times - LAG (times) OVER (PARTITION BY year ORDER BY timestamp, year) AS time_between_sales
    FROM
        order_timing
), 

SELECT 
    year,
    AVG(time_between_sales) AS total_time_between_sales
FROM 
    sale_times
GROUP BY
     year




## AI Core solution

In [None]:
WITH date_times AS (
SELECT
year,
month,
day,
timestamp,
TO_TIMESTAMP(CONCAT(year, '/', month, '/', day, '/', timestamp), 'YYYY/MM/DD/HH24:MI:ss') as times
FROM dim_date_times d
		JOIN orders_table o -- this join is unnecessary 
		ON d.date_uuid = o.date_uuid
		JOIN dim_store_details s
		ON o.store_code = s.store_code
ORDER BY times DESC),
		   	
next_times AS(
SELECT year,
timestamp,
times,
LEAD(times) OVER(ORDER BY times DESC) AS next_times
FROM date_times),

avg_times AS(
SELECT year,
(AVG(times - next_times)) AS avg_times
FROM next_times
GROUP BY year
ORDER BY avg_times DESC)

SELECT year,
-- concat('hours: ', cast(round(avg(EXTRACT(HOUR FROM avg_times))) as text),
-- 	   ', minutes: ', cast(round(avg(EXTRACT(MINUTE FROM avg_times))) as text),
-- 	   ', seconds: ', cast(round(avg(EXTRACT(SECOND FROM avg_times))) as text))
-- 	   as actual_time_taken

	CONCAT('"Hours": ', (EXTRACT(HOUR FROM avg_times)),','
	' "minutes" :', (EXTRACT(MINUTE FROM avg_times)),','
    ' "seconds" :', ROUND(EXTRACT(SECOND FROM avg_times)),','
     ' "milliseconds" :', ROUND((EXTRACT( SECOND FROM avg_times)- FLOOR(EXTRACT(SECOND FROM avg_times)))*100))
	
   as actual_time_taken


FROM avg_times
GROUP BY year, avg_times
ORDER BY avg_times DESC
LIMIT 5;

## My adjustment to AI Core solution

In [None]:
WITH date_times AS (
    SELECT
        year,
        month,
        day,
        timestamp,
        TO_TIMESTAMP(CONCAT(year, '/', month, '/', day, '/', timestamp), 'YYYY/MM/DD/HH24:MI:ss') AS times --creating a datetime column
    FROM 
        dim_date_times d
    ORDER BY 
        times DESC
),		   	
next_times AS(
    SELECT 
        year,
        timestamp,
        times,
        LEAD(times) OVER(ORDER BY times DESC) AS next_times -- adds the next sales timestamp to a new column
    FROM 
        date_times
),
avg_times AS(
    SELECT 
        year,
        (AVG(times - next_times)) AS avg_times
    FROM 
        next_times
    GROUP BY 
        year
    ORDER BY 
        avg_times DESC
)

SELECT 
    year,
	CONCAT('"Hours": ', (EXTRACT(HOUR FROM avg_times)),','
	' "minutes" :', (EXTRACT(MINUTE FROM avg_times)),','
    ' "seconds" :', ROUND(EXTRACT(SECOND FROM avg_times)),','
    ' "milliseconds" :', ROUND((EXTRACT( SECOND FROM avg_times) - FLOOR(EXTRACT(SECOND FROM avg_times)))*100)) AS actual_time_taken
FROM 
    avg_times
GROUP BY 
    year, avg_times
ORDER BY 
    avg_times DESC
LIMIT 
    5;