## Importing Libraries

In [1]:
import pandas as pd
import mysql.connector
from credentials import db_password

## Connecting to **gdb023** database

In [2]:
mydb = mysql.connector.connect(host = 'localhost', user = 'root', password = db_password, database = 'gdb023')
cursorObject = mydb.cursor()

query = '''
        SHOW TABLES
        '''
cursorObject.execute(query)
tables_list = cursorObject.fetchall()

for table in tables_list:
    print(table[0])

dim_customer
dim_product
fact_gross_price
fact_manufacturing_cost
fact_pre_invoice_deductions
fact_sales_monthly


## Requests :

### Q1. Provide the list of markets in which customer **"Atliq Exclusive"** operates its business in the **APAC** region.

In [3]:
QUERY = '''
        SELECT DISTINCT(market) FROM dim_customer
            WHERE region = 'APAC' AND customer = 'Atliq Exclusive'
        '''
cursorObject.execute(QUERY)
pd.DataFrame(cursorObject.fetchall(), columns = cursorObject.column_names)

Unnamed: 0,market
0,India
1,Indonesia
2,Japan
3,Philiphines
4,South Korea
5,Australia
6,Newzealand
7,Bangladesh


### Q2. What is the percentage of unique product increase in 2021 vs. 2020?
The final output contains these fields,
- unique_products_2020
- unique_products_2021
- percentage_chg

In [4]:
QUERY = '''
        WITH fy20 AS (
                SELECT COUNT(DISTINCT(product_code)) AS up_20 FROM fact_sales_monthly
                    WHERE fiscal_year = 2020),
                    
            fy21 AS (
                SELECT COUNT(DISTINCT(product_code)) AS up_21 FROM fact_sales_monthly
                    WHERE fiscal_year = 2021)
                    
        SELECT fy20.up_20 AS unique_products_2020,
            fy21.up_21 AS unique_products_2021,
            ROUND((fy21.up_21-fy20.up_20) * 100/fy20.up_20, 2) as percentage_chg
            FROM fy20, fy21
        '''
cursorObject.execute(QUERY)
pd.DataFrame(cursorObject.fetchall(), columns = cursorObject.column_names)

Unnamed: 0,unique_products_2020,unique_products_2021,percentage_chg
0,245,334,36.33


### Q3. Provide a report with all the unique product counts for each **segment** and sort them in descending order of product counts.
The final output contains 2 fields,
- segment
- product_count

In [5]:
QUERY = '''
        SELECT segment, count(product) AS product_count FROM dim_product
            GROUP BY segment
            ORDER BY product_count DESC
        '''
cursorObject.execute(QUERY)
pd.DataFrame(cursorObject.fetchall(), columns = cursorObject.column_names)

Unnamed: 0,segment,product_count
0,Notebook,129
1,Accessories,116
2,Peripherals,84
3,Desktop,32
4,Storage,27
5,Networking,9


### Q4. Follow-up: Which segment had the most increase in unique products in 2021 vs 2020?
The final output contains these fields,
- segment
- product_count_2020
- product_count_2021
- difference

In [6]:
QUERY = '''
    WITH fy20 AS(
            SELECT segment, COUNT(DISTINCT(fm.product_code)) AS seg20 FROM fact_sales_monthly fm
                JOIN dim_product dp
                ON fm.product_code = dp.product_code
                WHERE fiscal_year = 2020
                GROUP BY dp.segment),
                
        fy21 AS(
            SELECT segment, COUNT(DISTINCT(fm.product_code)) AS seg21 FROM fact_sales_monthly fm
                JOIN dim_product dp
                ON fm.product_code = dp.product_code
                WHERE fiscal_year = 2021
                GROUP BY dp.segment)
                
    SELECT fy20.segment, seg20 AS product_count_2020, seg21 AS product_count_2021, seg21-seg20 AS difference FROM fy20
        JOIN fy21
        ON fy20.segment = fy21.segment
        ORDER BY difference DESC
        '''
cursorObject.execute(QUERY)
pd.DataFrame(cursorObject.fetchall(), columns = cursorObject.column_names)

Unnamed: 0,segment,product_count_2020,product_count_2021,difference
0,Accessories,69,103,34
1,Notebook,92,108,16
2,Peripherals,59,75,16
3,Desktop,7,22,15
4,Storage,12,17,5
5,Networking,6,9,3


### Q5. Get the products that have the highest and lowest manufacturing costs.
The final output should contain these fields,
- product_code
- product
- manufacturing_cost

In [7]:
QUERY = '''
        SELECT fc.product_code, product, manufacturing_cost FROM fact_manufacturing_cost as fc
            JOIN dim_product as dp
            ON fc.product_code = dp.product_code
            WHERE fc.manufacturing_cost = (SELECT max(manufacturing_cost) FROM fact_manufacturing_cost) OR
                fc.manufacturing_cost = (SELECT min(manufacturing_cost) FROM fact_manufacturing_cost)
            ORDER BY manufacturing_cost DESC
        '''
cursorObject.execute(QUERY)
pd.DataFrame(cursorObject.fetchall(), columns = cursorObject.column_names)

Unnamed: 0,product_code,product,manufacturing_cost
0,A6120110206,AQ HOME Allin1 Gen 2,240.5364
1,A2118150101,AQ Master wired x1 Ms,0.892


### Q6. Generate a report which contains the top 5 customers who received an average high pre_invoice_discount_pct for the **fiscal year 2021** and in the **Indian** market.
The final output contains these fields,
- customer_code
- customer
- average_discount_percentage

In [8]:
QUERY = '''
        SELECT fd.customer_code, customer, pre_invoice_discount_pct FROM fact_pre_invoice_deductions fd
            JOIN dim_customer dc
            ON fd.customer_code = dc.customer_code
            WHERE market = "India"
            ORDER BY pre_invoice_discount_pct DESC
            LIMIT 5
        '''
cursorObject.execute(QUERY)
pd.DataFrame(cursorObject.fetchall(), columns = cursorObject.column_names)

Unnamed: 0,customer_code,customer,pre_invoice_discount_pct
0,90002009,Flipkart,0.3083
1,90002006,Viveks,0.3038
2,90002003,Ezone,0.3028
3,90002002,Croma,0.3025
4,90002016,Amazon,0.2933
