## Load Libraries

In [1]:
import pandas as pd
import psycopg2
from sqlalchemy import create_engine

In [2]:
# Define connection URL
conn_url = 'postgresql+psycopg2://postgres:123@localhost/foodmart'

# Create an engine that connects to PostgreSQL
engine = create_engine(conn_url)

## Sales Report by Departments

In [3]:
def query_sales_report_data(engine):
    """
    Queries Sales data and returns the results as a Pandas DataFrame.

    Args:
        engine (str): PostgreSQL connection string.

    Returns:
        pd.DataFrame: DataFrame containing the query results.
    """
    query = """
    SELECT
        t.store_id,
        DATE_TRUNC('month', t.transaction_date)::date AS sale_month,
        d.dept_name,
        SUM(s.quantity) AS total_quantity_sold,
        COUNT(DISTINCT t.transaction_id) AS total_transactions
    FROM
        sales s
    INNER JOIN transactions t
        ON s.transaction_id = t.transaction_id
    INNER JOIN products p
        ON s.product_id = p.product_id
    INNER JOIN department d
        ON p.dept_id = d.dept_id
    WHERE
        t.transaction_type = 0 -- Include only sales (exclude returns)
    GROUP BY
        t.store_id, sale_month, d.dept_name
    ORDER BY
        t.store_id, sale_month, d.dept_name;

    """
    # Execute the query and return results as DataFrame
    with engine.connect() as connection:
        df = pd.read_sql_query(query, connection)
    
    return df

In [4]:
store_sales = query_sales_report_data(engine)

store_sales

Unnamed: 0,store_id,sale_month,dept_name,total_quantity_sold,total_transactions
0,1,2024-10-01,Bakery,34,9
1,1,2024-10-01,Beverages,25,6
2,1,2024-10-01,Dairy,27,7
3,1,2024-10-01,Frozen Foods,30,8
4,1,2024-10-01,Health & Beauty,49,14
...,...,...,...,...,...
145,5,2024-12-01,Household Supplies,14,3
146,5,2024-12-01,Meat & Seafood,9,4
147,5,2024-12-01,Pantry,8,1
148,5,2024-12-01,Produce,7,2


## Sales Report Top Products from Each Store

In [5]:
def query_top_worst_products(engine):
    """
    Queries best and worst products and returns the results as a Pandas DataFrame.

    Args:
        engine (str): PostgreSQL connection string.

    Returns:
        pd.DataFrame: DataFrame containing the query results.
    """
    query = """
WITH product_sales AS (
    SELECT
        t.store_id,
        s.store_name,
        p.product_id,
        p.product_name,
        SUM(sales.quantity) AS total_quantity_sold
    FROM
        sales
    INNER JOIN transactions t
        ON sales.transaction_id = t.transaction_id
    INNER JOIN store s
        ON t.store_id = s.store_id
    INNER JOIN products p
        ON sales.product_id = p.product_id
    WHERE
        t.transaction_type = 0 -- Include only sales (exclude returns)
    GROUP BY
        t.store_id, s.store_name, p.product_id, p.product_name
),
ranked_sales AS (
    SELECT
        ps.store_id,
        ps.store_name,
        ps.product_id,
        ps.product_name,
        ps.total_quantity_sold,
        RANK() OVER (PARTITION BY ps.store_id ORDER BY ps.total_quantity_sold DESC) AS rank_best,
        RANK() OVER (PARTITION BY ps.store_id ORDER BY ps.total_quantity_sold ASC) AS rank_least
    FROM
        product_sales ps
)
SELECT
    store_id,
    store_name,
    product_id,
    product_name,
    total_quantity_sold,
    CASE
        WHEN rank_best <= 5 THEN 'Top-Selling'
        WHEN rank_least <= 5 THEN 'Least-Selling'
    END AS product_category
FROM
    ranked_sales
WHERE
    rank_best <= 5 OR rank_least <= 5
ORDER BY
    store_id, product_category, total_quantity_sold DESC;


    """
    # Execute the query and return results as DataFrame
    with engine.connect() as connection:
        df = pd.read_sql_query(query, connection)
    
    return df

In [6]:
products = query_top_worst_products(engine)

# Display the DataFrame
products

Unnamed: 0,store_id,store_name,product_id,product_name,total_quantity_sold,product_category
0,1,ABC Food Mart Queens 1,56,DailyEssentials Spaghetti Pasta,2,Least-Selling
1,1,ABC Food Mart Queens 1,25,GoodFood Sparkling Water,2,Least-Selling
2,1,ABC Food Mart Queens 1,48,GreenValley Spaghetti Pasta,2,Least-Selling
3,1,ABC Food Mart Queens 1,70,OrganicChoice Pretzels,1,Least-Selling
4,1,ABC Food Mart Queens 1,23,BudgetBuy Green Tea,1,Least-Selling
...,...,...,...,...,...,...
66,5,ABC Food Mart Brooklyn 3,37,BudgetBuy Baguette,13,Top-Selling
67,5,ABC Food Mart Brooklyn 3,10,NatureDelight Chicken Breast,13,Top-Selling
68,5,ABC Food Mart Brooklyn 3,50,GoodFood Dish Soap,13,Top-Selling
69,5,ABC Food Mart Brooklyn 3,28,GreenValley Toilet Paper,13,Top-Selling


## Vendor Delivery Tracking

In [7]:
def query_delivery_data(engine):
    """
    Queries deliveries data and returns the results as a Pandas DataFrame.

    Args:
        engine (str): PostgreSQL connection string.

    Returns:
        pd.DataFrame: DataFrame containing the query results.
    """
    query = """
    SELECT
        v.vendor_id,
        v.vendor_name,
        p.product_id,
        p.product_name,
        s.store_id,
        s.store_name,
        d.delivery_date,
        d.quantity
    FROM
        deliveries d
    INNER JOIN products p
        ON d.product_id = p.product_id
    INNER JOIN vendors v
        ON d.vendor_id = v.vendor_id
    INNER JOIN store s
        ON d.store_id = s.store_id
    WHERE 
        v.vendor_id = 3
    AND d.delivery_date BETWEEN '2024-11-01' AND '2024-12-01'
    ORDER BY
        v.vendor_id, d.delivery_date DESC;

    """
    
    # Execute the query and return results as DataFrame
    with engine.connect() as connection:
        df = pd.read_sql_query(query, connection)
    
    return df

In [8]:
delivery = query_delivery_data(engine)

# Display the DataFrame
delivery

Unnamed: 0,vendor_id,vendor_name,product_id,product_name,store_id,store_name,delivery_date,quantity
0,3,Mckay-Hughes,30,GreenValley Salmon Fillet,2,ABC Food Mart Queens 2,2024-11-30,267
1,3,Mckay-Hughes,37,BudgetBuy Baguette,3,ABC Food Mart Brooklyn 1,2024-11-24,653
2,3,Mckay-Hughes,89,OrganicChoice Grapes,5,ABC Food Mart Brooklyn 3,2024-11-24,322
3,3,Mckay-Hughes,68,BudgetBuy Cola,3,ABC Food Mart Brooklyn 1,2024-11-23,628
4,3,Mckay-Hughes,27,BudgetBuy Cheddar Cheese,4,ABC Food Mart Brooklyn 2,2024-11-21,602
5,3,Mckay-Hughes,67,PureTaste Strawberries,3,ABC Food Mart Brooklyn 1,2024-11-17,555
6,3,Mckay-Hughes,2,PureTaste Potato Chips,2,ABC Food Mart Queens 2,2024-11-16,693
7,3,Mckay-Hughes,92,GreenValley Hand Soap,3,ABC Food Mart Brooklyn 1,2024-11-16,320
8,3,Mckay-Hughes,61,FreshFarm Ice Cream,1,ABC Food Mart Queens 1,2024-11-14,864
9,3,Mckay-Hughes,75,GreenValley Orange Juice,2,ABC Food Mart Queens 2,2024-11-13,931


## Employee Pay Periods

In [9]:
def query_employee_payment_data(engine):
    """
    Queries employee payment data and returns the results as a Pandas DataFrame.

    Args:
        engine (str): PostgreSQL connection string.

    Returns:
        pd.DataFrame: DataFrame containing the query results.
    """
    query = """
    SELECT
        ett.employee_id,
        p.pay_period_start,
        p.pay_period_end,
        SUM(EXTRACT(EPOCH FROM (ett.end_time - ett.start_time)) / 3600) AS total_hours_worked,
        p.hour_wage,
        SUM(EXTRACT(EPOCH FROM (ett.end_time - ett.start_time)) / 3600) * p.hour_wage AS total_amount_paid
    FROM
        employee_time_tracking ett
    INNER JOIN payroll p
        ON ett.employee_id = p.employee_id
        AND ett.start_time::date BETWEEN p.pay_period_start AND p.pay_period_end
    WHERE
        ett.start_time::date >= '2023-11-01' AND ett.start_time::date <= CURRENT_DATE
    GROUP BY
        ett.employee_id, p.pay_period_start, p.pay_period_end, p.hour_wage
    ORDER BY
        ett.employee_id, p.pay_period_start;
    """
    
    # Execute the query and return results as DataFrame
    with engine.connect() as connection:
        df = pd.read_sql_query(query, connection)
    
    return df

In [10]:
employee_payment_df = query_employee_payment_data(engine)

# Display the DataFrame
employee_payment_df

Unnamed: 0,employee_id,pay_period_start,pay_period_end,total_hours_worked,hour_wage,total_amount_paid
0,1,2024-11-15,2024-11-28,93.0,40.88,3801.84
1,1,2024-11-29,2024-12-12,79.0,40.88,3229.52
2,2,2024-11-15,2024-11-28,81.0,36.04,2919.24
3,2,2024-11-29,2024-12-12,74.0,36.04,2666.96
4,3,2024-11-15,2024-11-28,84.0,44.81,3764.04
...,...,...,...,...,...,...
175,88,2024-11-29,2024-12-12,80.0,36.14,2891.20
176,89,2024-11-15,2024-11-28,91.0,21.51,1957.41
177,89,2024-11-29,2024-12-12,83.0,21.51,1785.33
178,90,2024-11-15,2024-11-28,80.0,42.40,3392.00


## Inventory Level Report

In [11]:
def query_inventory(engine):
    """
    Queries Inventory and returns the results as a Pandas DataFrame.

    Args:
        engine (str): PostgreSQL connection string.

    Returns:
        pd.DataFrame: DataFrame containing the query results.
    """
    query = """
    SELECT
        i.store_id,
        s.store_name,
        i.product_id,
        p.product_name,
        i.quantity AS inventory_level,
        i.last_updated
    FROM
        inventory i
    INNER JOIN store s
        ON i.store_id = s.store_id
    INNER JOIN products p
        ON i.product_id = p.product_id
    ORDER BY
        i.store_id, i.product_id;

    """
    
    # Execute the query and return results as DataFrame
    with engine.connect() as connection:
        df = pd.read_sql_query(query, connection)
    
    return df

In [12]:
inventory = query_inventory(engine)

inventory.tail(10)

Unnamed: 0,store_id,store_name,product_id,product_name,inventory_level,last_updated
488,5,ABC Food Mart Brooklyn 3,91,BudgetBuy Granola Bars,920,2024-12-12 11:34:39.416181
489,5,ABC Food Mart Brooklyn 3,92,GreenValley Hand Soap,1033,2024-12-12 11:34:39.050681
490,5,ABC Food Mart Brooklyn 3,93,FamilyFare Hand Soap,3909,2024-12-12 11:34:38.882414
491,5,ABC Food Mart Brooklyn 3,94,HarvestBest Shrimp,1279,2024-12-12 11:34:39.050681
492,5,ABC Food Mart Brooklyn 3,95,FreshFarm Green Tea,1252,2024-12-12 11:34:39.050681
493,5,ABC Food Mart Brooklyn 3,96,OrganicChoice Granola Bars,1547,2024-12-12 11:34:39.050681
494,5,ABC Food Mart Brooklyn 3,97,DailyEssentials Toilet Paper,-5,2024-12-12 11:34:39.050681
495,5,ABC Food Mart Brooklyn 3,98,GoodFood Frozen Vegetables,755,2024-12-12 11:34:39.050681
496,5,ABC Food Mart Brooklyn 3,99,DailyEssentials Green Tea,551,2024-12-12 11:34:39.050681
497,5,ABC Food Mart Brooklyn 3,100,GoodFood Salmon Fillet,971,2024-12-12 11:34:39.050681


## Top Selling Products by Location

In [13]:
def query_top_products_overtime(engine):
    """
    Queries top product and transactions over time and returns the results as a Pandas DataFrame.

    Args:
        engine (str): PostgreSQL connection string.

    Returns:
        pd.DataFrame: DataFrame containing the query results.
    """
    query = """
WITH TopProducts AS (
    SELECT 
        s.product_id,
        p.product_name,
        t.store_id,
        st.store_name,
        SUM(s.quantity) AS total_quantity
    FROM 
        sales s
    JOIN 
        products p ON s.product_id = p.product_id
    JOIN 
        transactions t ON s.transaction_id = t.transaction_id
    JOIN 
        store st ON t.store_id = st.store_id
    GROUP BY 
        s.product_id, p.product_name, t.store_id, st.store_name
    ORDER BY 
        total_quantity DESC
    LIMIT 10 -- Adjust this limit for more/less top products
),
ProductSalesOverTime AS (
    SELECT 
        s.product_id,
        t.store_id,
        DATE(t.transaction_date) AS transaction_date,
        SUM(s.quantity) AS daily_quantity
    FROM 
        sales s
    JOIN 
        transactions t ON s.transaction_id = t.transaction_id
    GROUP BY 
        s.product_id, t.store_id, DATE(t.transaction_date)
)
SELECT 
    tp.product_name,
    tp.store_name,
    ps.transaction_date,
    ps.daily_quantity
FROM 
    ProductSalesOverTime ps
JOIN 
    TopProducts tp ON ps.product_id = tp.product_id AND ps.store_id = tp.store_id
ORDER BY 
    tp.store_name, tp.product_name, ps.transaction_date;
    """
    
    # Execute the query and return results as DataFrame
    with engine.connect() as connection:
        df = pd.read_sql_query(query, connection)
    
    return df

In [14]:
top_products = query_top_products_overtime(engine)

top_products.head(10)

Unnamed: 0,product_name,store_name,transaction_date,daily_quantity
0,GoodFood Dish Soap,ABC Food Mart Brooklyn 1,2024-10-02,5
1,GoodFood Dish Soap,ABC Food Mart Brooklyn 1,2024-10-31,4
2,GoodFood Dish Soap,ABC Food Mart Brooklyn 1,2024-11-15,2
3,GoodFood Dish Soap,ABC Food Mart Brooklyn 1,2024-11-26,7
4,GoodFood Dish Soap,ABC Food Mart Brooklyn 1,2024-11-29,4
5,GoodFood Dish Soap,ABC Food Mart Brooklyn 1,2024-12-01,1
6,GreenValley Toilet Paper,ABC Food Mart Brooklyn 2,2024-10-14,4
7,GreenValley Toilet Paper,ABC Food Mart Brooklyn 2,2024-10-19,1
8,GreenValley Toilet Paper,ABC Food Mart Brooklyn 2,2024-10-28,1
9,GreenValley Toilet Paper,ABC Food Mart Brooklyn 2,2024-10-29,3


In [29]:
engine.close()

AttributeError: 'Engine' object has no attribute 'close'