# Amazon - Reorder Patterns for Amazon Fresh

```SQL

CREATE TABLE fct_orders (
    order_id int,
    customer_id int,
    product_id int,
    reorder_flag int,
    order_date date
);

CREATE TABLE dim_products (
    product_id int,
    product_code varchar,
    category varchar
);

CREATE TABLE dim_customers (
    customer_id int,
    customer_name varchar
);

INSERT INTO fct_orders (order_id, customer_id, product_id, reorder_flag, order_date)
VALUES
    (1001, 1, 1, 1, '2024-10-05'),
    (1002, 2, 2, 0, '2024-10-12'),
    (1003, 3, 3, 1, '2024-10-20'),
    (1004, 4, 4, 0, '2024-11-01'),
    (1005, 5, 5, 1, '2024-11-15'),
    (1006, 6, 6, 1, '2024-11-20'),
    (1007, 7, 7, 0, '2024-12-05'),
    (1008, 8, 8, 1, '2024-12-10'),
    (1009, 9, 9, 0, '2024-12-15'),
    (1010, 10, 10, 1, '2024-12-20'),
    (1011, 1, 1, 1, '2024-10-25'),
    (1012, 2, 3, 1, '2024-11-05'),
    (1013, 3, 2, 0, '2024-11-18'),
    (1014, 4, 4, 1, '2024-12-02'),
    (1015, 5, 5, 0, '2024-12-08'),
    (1016, 6, 6, 1, '2024-12-12'),
    (1017, 7, 7, 1, '2024-10-30'),
    (1018, 8, 2, 0, '2024-11-22'),
    (1019, 9, 9, 1, '2024-11-28'),
    (1020, 10, 10, 0, '2024-12-25');

INSERT INTO dim_products (product_id, product_code, category)
VALUES
    (1, 'FRU001', 'Fruits'),
    (2, 'VEG002', 'Vegetables'),
    (3, 'MIL003', 'Dairy'),
    (4, 'BEV004', 'Beverages'),
    (5, 'BAK005', 'Bakery'),
    (6, 'FRU006', 'Fruits'),
    (7, 'VEG007', 'Vegetables'),
    (8, 'MIL008', 'Dairy'),
    (9, 'BEV009', 'Beverages'),
    (10, 'BAK010', 'Bakery');

INSERT INTO dim_customers (customer_id, customer_name)
VALUES
    (1, 'Alice Smith'),
    (2, 'Bob Johnson'),
    (3, 'Charlie Davis'),
    (4, 'Diana Garcia'),
    (5, 'Ethan Martinez'),
    (6, 'Fiona Clark'),
    (7, 'George Lewis'),
    (8, 'Hannah Walker'),
    (9, 'Ian Hall'),
    (10, 'Julia Allen');

SELECT * FROM fct_orders;

SELECT * FROM dim_products;

SELECT * FROM dim_customers;
```

In [1]:
import pandas as pd
import numpy as np

In [2]:
df_customers = pd.read_csv('Data/013/dim_customers.csv')
df_products = pd.read_csv('Data/013/dim_products.csv')
df_orders = pd.read_csv('Data/013/fct_orders.csv')

df_customers.head()

Unnamed: 0,customer_id,customer_name
0,1,Alice Smith
1,2,Bob Johnson
2,3,Charlie Davis
3,4,Diana Garcia
4,5,Ethan Martinez


In [3]:
df_products.head()

Unnamed: 0,product_id,product_code,category
0,1,FRU001,Fruits
1,2,VEG002,Vegetables
2,3,MIL003,Dairy
3,4,BEV004,Beverages
4,5,BAK005,Bakery


In [4]:
df_orders.head()

Unnamed: 0,order_id,customer_id,product_id,reorder_flag,order_date
0,1001,1,1,1,2024-10-05
1,1002,2,2,0,2024-10-12
2,1003,3,3,1,2024-10-20
3,1004,4,4,0,2024-11-01
4,1005,5,5,1,2024-11-15


# Pregunta 1

### The product team wants to analyze the most frequently reordered product categories. Can you provide a list of the product category codes (using first 3 letters of product code) and their reorder counts for Q4 2024?

In [6]:
df_merge = pd.merge(df_products, df_orders, on='product_id')

df_merge['cat_code'] = df_merge['product_code'].str[:3]

df_q4 = df_merge[
    df_merge['order_date'].between('2024-10-01','2024-12-31')
]

resultado = df_q4.groupby('cat_code')['reorder_flag'].sum().reset_index()

resultado_q4 = resultado.sort_values(by = 'reorder_flag', ascending=False)

resultado_q4

Unnamed: 0,cat_code,reorder_flag
2,FRU,4
3,MIL,3
0,BAK,2
1,BEV,2
4,VEG,1


```SQL
SELECT
    LEFT(d.product_code,3) AS category_code,
    SUM(o.reorder_flag) AS total_reorders
FROM dim_products d
JOIN fct_orders o ON d.product_id = o.product_id
WHERE o.order_date BETWEEN '2024-10-01' AND '2024-12-31'
GROUP BY category_code
ORDER BY total_reorders DESC;
```

# Pregunta 2

### Para entender mejor las preferencias de los clientes, el equipo necesita conocer los detalles de los clientes que recompran productos específicos. ¿Puedes recuperar la información del cliente junto con los códigos de los productos que recompró durante el cuarto trimestre (Q4) de 2024?

In [15]:
df_full = df_orders.merge(df_customers, on='customer_id').merge(df_products, on='product_id')

df_reorders = df_full[
    (df_full['order_date'].between('2024-10-01','2024-12-31')) &
    (df_full['reorder_flag'] == 1)
]

resultado = df_reorders[['customer_name', 'product_code', 'order_date']]

resultado



Unnamed: 0,customer_name,product_code,order_date
0,Alice Smith,FRU001,2024-10-05
2,Charlie Davis,MIL003,2024-10-20
4,Ethan Martinez,BAK005,2024-11-15
5,Fiona Clark,FRU006,2024-11-20
7,Hannah Walker,MIL008,2024-12-10
9,Julia Allen,BAK010,2024-12-20
10,Alice Smith,FRU001,2024-10-25
11,Bob Johnson,MIL003,2024-11-05
13,Diana Garcia,BEV004,2024-12-02
15,Fiona Clark,FRU006,2024-12-12


```SQL
SELECT
    c.customer_name,
    p.product_code,
    o.reorder_flag
FROM dim_customers c
JOIN fct_orders o ON c.customer_id = o.customer_id
JOIN dim_products p ON o.product_id = p.product_id
WHERE o.order_date BETWEEN '2024-10-01' AND '2024-12-31'
AND o.reorder_flag = 1
ORDER BY c.customer_name
```

# Pregunta 3

### Al calcular la frecuencia promedio de recompra, es importante manejar los casos donde los conteos de recompra pueden faltar o ser cero. ¿Puedes calcular la frecuencia de recompra promedio a través de las categorías de productos, asegurándote de que cualquier valor faltante o nulo sea gestionado apropiadamente para el cuarto trimestre (Q4) de 2024?

In [18]:
df_merged = pd.merge(df_products, df_orders, on='product_id', how='left')

df_q4 = df_merged[
    (df_merged['order_date'].between('2024-10-01','2024-12-31')) | (df_merged['order_date'].isna())
]

df_q4['reorder_flag'] = df_q4['reorder_flag'].fillna(0)

avg_freq = df_q4.groupby('category')['reorder_flag'].sum().mean()

avg_freq

np.float64(2.4)

```SQL
WITH reorders_by_cat AS (
    SELECT
        p.category,
        SUM(COALESCE(o.reorder_flag, 0)) AS total_reorders
    FROM dim_products p
    LEFT JOIN fct_orders o 
        ON p.product_id = o.product_id 
        AND o.order_date BETWEEN '2024-10-01' AND '2024-12-31'
    GROUP BY p.category
)
SELECT AVG(total_reorders) AS average_reorder_frequency
FROM reorders_by_cat;
```