# Walmart - Everyday Necessities Price Optimization for Stores

```SQL

CREATE TABLE fct_sales (
    sale_id INT,
    product_id INT,
    quantity_sold INT,
    sale_date DATE,
    unit_price DECIMAL(10, 2)
);

CREATE TABLE dim_products (
    product_id INT,
    product_name VARCHAR(255),
    category VARCHAR(255)
);

INSERT INTO fct_sales (sale_id, product_id, quantity_sold, sale_date, unit_price)
VALUES
    (1, 1, 10, '2024-07-05', 4.5),
    (2, 2, 3, '2024-07-06', 65),
    (3, 3, 7, '2024-07-07', 16),
    (4, 4, 12, '2024-07-08', 3.99),
    (5, 1, 8, '2024-07-10', 4.75),
    (6, 2, 6, '2024-07-11', 7.5),
    (7, 3, 4, '2024-07-12', 15.5),
    (8, 7, 15, '2024-07-13', 5),
    (9, 7, 9, '2024-07-14', 17),
    (10, 4, 11, '2024-07-15', 3.5),
    (11, 5, 20, '2024-07-16', 8),
    (12, 6, 30, '2024-07-17', 2.5),
    (13, 8, 5, '2024-07-18', 12),
    (14, 2, 4, '2024-07-19', 6.5),
    (15, 1, 3, '2024-07-20', 4.25),
    (16, 9, 10, '2024-07-20', 4),
    (17, 10, 6, '2024-07-21', 16.5);

INSERT INTO dim_products (product_id, product_name, category)
VALUES
    (1, 'Paper Towels', 'Essential Household'),
    (2, 'Toilet Paper', 'Essential Household'),
    (3, 'Laundry Detergent', 'Essential Household'),
    (4, 'Dish Soap', 'Essential Household'),
    (5, 'Batteries', 'Other'),
    (6, 'Snacks', 'Other'),
    (7, 'Cleaning Spray', 'Essential Household'),
    (8, 'Shampoo', 'Other'),
    (9, 'Hand Soap', 'Essential Household'),
    (10, 'Face Mask', 'Essential Household');

SELECT * FROM fct_sales;

SELECT * FROM dim_products;
```

In [1]:
import pandas as pd
import numpy as np

In [4]:
df_product = pd.read_csv('Data/024/dim_products.csv')
df_sales = pd.read_csv('Data/024/fct_sales.csv', parse_dates=['sale_date'])

df_product.head()

Unnamed: 0,product_id,product_name,category
0,1,Paper Towels,Essential Household
1,2,Toilet Paper,Essential Household
2,3,Laundry Detergent,Essential Household
3,4,Dish Soap,Essential Household
4,5,Batteries,Other


In [5]:
df_sales.head()

Unnamed: 0,sale_id,product_id,quantity_sold,sale_date,unit_price
0,1,1,10,2024-07-05,4.5
1,2,2,3,2024-07-06,65.0
2,3,3,7,2024-07-07,16.0
3,4,4,12,2024-07-08,3.99
4,5,1,8,2024-07-10,4.75


# Pregunta 1

### ¿Cuál es el volumen total de ventas (es decir, la cantidad total vendida) para los artículos domésticos esenciales (essential household items) en julio de 2024? Proporcione el resultado con una columna llamada 'Total_Sales_Volume'.

In [9]:
df_merge = df_product.merge(df_sales, on='product_id')

df_july = df_merge[
    (df_merge['sale_date'].between('2024-07-01','2024-07-31')) &
    (df_merge['category'] == 'Essential Household')
]

total = df_july['quantity_sold'].sum()

resultado1 = pd.DataFrame({'Total_Sales_Volume': [total]})

resultado1

Unnamed: 0,Total_Sales_Volume
0,108


```SQL
SELECT
    SUM(s.quantity_sold) AS Total_Sales_Volume
FROM fct_sales s
JOIN dim_products p ON s.product_id = p.product_id
WHERE s.sale_date BETWEEN '2024-07-01' AND '2024-07-31'
AND p.category = 'Essential Household'
```

# Pregunta 2

### Para los artículos domésticos esenciales (essential household items) vendidos en julio de 2024, clasifique los artículos en rangos de precio 'Bajo' (Low), 'Medio' (Medium) y 'Alto' (High) basándose en su precio promedio. Utilice los siguientes criterios: 'Low' para precios por debajo de $5, 'Medium' para precios entre $5 y $15, y 'High' para precios por encima de $15.

In [10]:
# 1. Unir y filtrar
df_merged = df_product.merge(df_sales, on='product_id')
df_essential = df_merged[
    (df_merged['category'] == 'Essential Household') & 
    (df_merged['sale_date'].between('2024-07-01', '2024-07-31'))
]

# 2. Calcular el promedio por producto
df_avg = df_essential.groupby('product_name')['unit_price'].mean().reset_index()

# 3. Categorizar con pd.cut
bins = [0, 5, 15, float('inf')]
labels = ['Low', 'Medium', 'High']
df_avg['price_range'] = pd.cut(df_avg['unit_price'], bins=bins, labels=labels, right=False)

df_avg

Unnamed: 0,product_name,unit_price,price_range
0,Cleaning Spray,11.0,Medium
1,Dish Soap,3.745,Low
2,Face Mask,16.5,High
3,Hand Soap,4.0,Low
4,Laundry Detergent,15.75,High
5,Paper Towels,4.5,Low
6,Toilet Paper,26.333333,High


```SQL
WITH ProductAvgPrices AS (
    SELECT 
        p.product_name,
        AVG(s.unit_price) as avg_price
    FROM fct_sales s
    JOIN dim_products p ON s.product_id = p.product_id
    WHERE p.category = 'Essential Household'
      AND s.sale_date BETWEEN '2024-07-01' AND '2024-07-31'
    GROUP BY p.product_name
)
SELECT 
    product_name,
    avg_price,
    CASE 
        WHEN avg_price < 5 THEN 'Low'
        WHEN avg_price <= 15 THEN 'Medium'
        ELSE 'High'
    END AS price_range
FROM ProductAvgPrices;
```

# Pregunta 3

### Identifique el rango de precio con el mayor volumen total de ventas (cantidad total vendida) para los artículos domésticos esenciales en julio de 2024. Utilice los mismos criterios de la pregunta anterior: 'Low' para precios por debajo de $5, 'Medium' para precios entre $5 y $15, y 'High' para precios por encima de $15.

In [11]:
# 1. Unir y filtrar (Igual que antes)
df_merged = df_product.merge(df_sales, on='product_id')
df_filtered = df_merged[
    (df_merged['category'] == 'Essential Household') & 
    (df_merged['sale_date'].between('2024-07-01', '2024-07-31'))
].copy()

# 2. Asignar etiquetas (El CASE statement de Pandas es pd.cut)
bins = [0, 5, 15.01, float('inf')]
labels = ['Low', 'Medium', 'High']
df_filtered['price_range'] = pd.cut(df_filtered['unit_price'], bins=bins, labels=labels, right=False)

# 3. Agregar y ordenar
resultado = df_filtered.groupby('price_range', observed=False)['quantity_sold'].sum().reset_index()
resultado = resultado.sort_values(by='quantity_sold', ascending=False).head(1)

print(resultado)

  price_range  quantity_sold
0         Low             54


```SQL
SELECT 
    CASE 
        WHEN unit_price < 5 THEN 'Low'
        WHEN unit_price >= 5 AND unit_price <= 15 THEN 'Medium'
        ELSE 'High'
    END AS price_range,
    SUM(quantity_sold) AS Total_Sales_Volume
FROM fct_sales s
JOIN dim_products p ON s.product_id = p.product_id
WHERE p.category = 'Essential Household' 
  AND s.sale_date BETWEEN '2024-07-01' AND '2024-07-31'
GROUP BY 1
ORDER BY Total_Sales_Volume DESC
LIMIT 1;
```