# Walmart - Physical Store Queue Management at Checkout

```SQL
CREATE TABLE dim_stores (
    store_id INTEGER,
    store_name VARCHAR,
    location VARCHAR
);

CREATE TABLE fct_checkout_times (
    store_id INTEGER,
    transaction_id INTEGER,
    checkout_start_time TIMESTAMP,
    checkout_end_time TIMESTAMP
);

INSERT INTO dim_stores (store_id, store_name, location)
VALUES
    (1, 'Walmart Supercenter - Downtown', 'New York, NY'),
    (2, 'Walmart Neighborhood Market', 'Los Angeles, CA'),
    (3, 'Walmart Supercenter - Suburb', 'Chicago, IL'),
    (4, 'Walmart Discount', 'Houston, TX'),
    (5, 'Walmart Express', 'Phoenix, AZ'),
    (6, 'Walmart Supercenter - West', 'San Francisco, CA'),
    (7, 'Walmart Extra', 'Dallas, TX'),
    (8, 'Walmart Market', 'Miami, FL'),
    (9, 'Walmart Supercenter - East', 'Boston, MA'),
    (10, 'Walmart Outlet', 'Detroit, MI');


INSERT INTO fct_checkout_times (store_id, transaction_id, checkout_start_time, checkout_end_time)
VALUES
    (1, 1, '2024-07-05 10:00:00', '2024-07-05 10:05:00'),
    (1, 2, '2024-07-12 15:30:00', '2024-07-12 15:40:00'),
    (2, 3, '2024-07-06 11:15:00', '2024-07-06 11:20:00'),
    (2, 4, '2024-07-20 12:00:00', '2024-07-20 12:08:00'),
    (3, 5, '2024-07-07 13:00:00', '2024-07-07 13:15:00'),
    (3, 6, '2024-07-21 14:30:00', '2024-07-21 14:45:00'),
    (4, 7, '2024-07-08 16:00:00', '2024-07-08 16:12:00'),
    (4, 8, '2024-07-22 09:30:00', '2024-07-22 09:43:00'),
    (5, 9, '2024-07-09 08:00:00', '2024-07-09 08:06:00'),
    (5, 10, '2024-07-23 17:45:00', '2024-07-23 17:50:00'),
    (6, 11, '2024-07-10 19:00:00', '2024-07-10 19:20:00'),
    (6, 12, '2024-07-24 20:00:00', '2024-07-24 20:15:00'),
    (7, 13, '2024-07-11 10:30:00', '2024-07-11 10:36:00'),
    (7, 14, '2024-07-25 11:00:00', '2024-07-25 11:05:00'),
    (8, 15, '2024-07-12 12:00:00', '2024-07-12 12:14:00'),
    (8, 16, '2024-07-26 13:30:00', '2024-07-26 13:45:00'),
    (9, 17, '2024-07-13 14:00:00', '2024-07-13 14:10:00'),
    (9, 18, '2024-07-27 15:00:00', '2024-07-27 15:12:00'),
    (10, 19, '2024-07-14 17:00:00', '2024-07-14 17:05:00'),
    (10, 20, '2024-07-28 13:00:00', '2024-07-28 13:05:00');

SELECT * FROM dim_stores;

SELECT * FROM fct_checkout_times;
```

In [1]:
import pandas as pd
import numpy as np

In [11]:
df_store = pd.read_csv('Data/020/dim_stores.csv')
#df_checkout = pd.read_csv('Data/020/fct_checkout_times.csv')

# Si usas read_csv
df_checkout = pd.read_csv('Data/020/fct_checkout_times.csv', parse_dates=['checkout_start_time', 'checkout_end_time'])

df_store.head()

Unnamed: 0,store_id,store_name,location
0,1,Walmart Supercenter - Downtown,"New York, NY"
1,2,Walmart Neighborhood Market,"Los Angeles, CA"
2,3,Walmart Supercenter - Suburb,"Chicago, IL"
3,4,Walmart Discount,"Houston, TX"
4,5,Walmart Express,"Phoenix, AZ"


In [3]:
df_checkout.head()

Unnamed: 0,store_id,transaction_id,checkout_start_time,checkout_end_time
0,1,1,2024-07-05 10:00:00.000000,2024-07-05 10:05:00.000000
1,1,2,2024-07-12 15:30:00.000000,2024-07-12 15:40:00.000000
2,2,3,2024-07-06 11:15:00.000000,2024-07-06 11:20:00.000000
3,2,4,2024-07-20 12:00:00.000000,2024-07-20 12:08:00.000000
4,3,5,2024-07-07 13:00:00.000000,2024-07-07 13:15:00.000000


# Pregunta 1

### ¿Cuál es el tiempo promedio de espera en caja en minutos para cada tienda Walmart durante julio de 2024? Incluya el nombre de la tienda de la tabla dim_stores para identificar los impactos específicos por ubicación. Esta métrica ayudará a determinar qué tiendas tienen los tiempos de espera más largos para los clientes.

In [7]:
df_merge = df_checkout.merge(df_store, on='store_id')

# df_merge['checkout_start_time'] = pd.to_datetime(df_merge['checkout_start_time'])
# df_merge['checkout_end_time'] = pd.to_datetime(df_merge['checkout_end_time'])

df_merge[['checkout_start_time', 'checkout_end_time']] = df_merge[['checkout_start_time', 'checkout_end_time']].apply(pd.to_datetime)

df_merge['duration_min'] = (df_merge['checkout_end_time'] - df_merge['checkout_start_time']).dt.total_seconds() / 60

df_july = df_merge[df_merge['checkout_start_time'].dt.month == 7]
resultado = df_july.groupby('store_name')['duration_min'].mean().reset_index()

resultado

Unnamed: 0,store_name,duration_min
0,Walmart Discount,12.5
1,Walmart Express,5.5
2,Walmart Extra,5.5
3,Walmart Market,14.5
4,Walmart Neighborhood Market,6.5
5,Walmart Outlet,5.0
6,Walmart Supercenter - Downtown,7.5
7,Walmart Supercenter - East,11.0
8,Walmart Supercenter - Suburb,15.0
9,Walmart Supercenter - West,17.5


```SQL
SELECT
    s.store_name,
    AVG(EXTRACT(EPOCH FROM (c.checkout_end_time - c.checkout_start_time)) / 60 ) AS avg_wait_minutes
FROM fct_checkout_times c
JOIN dim_stores s ON c.store_id = s.store_id
WHERE c.checkout_start_time BETWEEN '2024-07-01' AND '2024-07-31'
GROUP BY s.store_name
ORDER BY avg_wait_minutes DESC;
```

# Pregunta 2

### Para las tiendas que superaron un tiempo promedio de espera en caja de 10 minutos en julio de 2024, ¿cuáles son los tiempos de espera promedio en minutos desglosados por cada hora del día? Use la información de dim_stores para asegurar la identificación correcta de cada tienda. Este detalle ayudará a identificar las horas específicas en las que los tiempos de espera son particularmente largos.

In [12]:
# 1. Preparar datos y duración
df_merge = df_checkout.merge(df_store, on='store_id')
df_merge['duration'] = (df_merge['checkout_end_time'] - df_merge['checkout_start_time']).dt.total_seconds() / 60

# 2. Identificar tiendas con promedio > 10
avg_store = df_merge.groupby('store_name')['duration'].mean()
slow_stores = avg_store[avg_store > 10].index

# 3. Filtrar esas tiendas y extraer la hora
df_slow = df_merge[df_merge['store_name'].isin(slow_stores)].copy()
df_slow['hour'] = df_slow['checkout_start_time'].dt.hour

# 4. Resultado final
reporte_horas = df_slow.groupby(['store_name', 'hour'])['duration'].mean().reset_index()

reporte_horas

Unnamed: 0,store_name,hour,duration
0,Walmart Discount,9,13.0
1,Walmart Discount,16,12.0
2,Walmart Market,12,14.0
3,Walmart Market,13,15.0
4,Walmart Supercenter - East,14,10.0
5,Walmart Supercenter - East,15,12.0
6,Walmart Supercenter - Suburb,13,15.0
7,Walmart Supercenter - Suburb,14,15.0
8,Walmart Supercenter - West,19,20.0
9,Walmart Supercenter - West,20,15.0


```SQL
WITH SlowStores AS (
    SELECT store_id
    FROM fct_checkout_times
    WHERE checkout_start_time BETWEEN '2024-07-01' AND '2024-07-31'
    GROUP BY store_id
    HAVING AVG(EXTRACT(EPOCH FROM (checkout_end_time - checkout_start_time)) / 60) > 10
)
SELECT 
    s.store_name,
    EXTRACT(HOUR FROM c.checkout_start_time) AS hour_of_day,
    AVG(EXTRACT(EPOCH FROM (c.checkout_end_time - c.checkout_start_time)) / 60) AS avg_wait_min
FROM fct_checkout_times c
JOIN dim_stores s ON c.store_id = s.store_id
WHERE c.store_id IN (SELECT store_id FROM SlowStores)
  AND c.checkout_start_time BETWEEN '2024-07-01' AND '2024-07-31'
GROUP BY s.store_name, hour_of_day
ORDER BY s.store_name, hour_of_day;
```

# Pregunta 3

### En todas las tiendas durante julio de 2024, ¿qué horas presentan los tiempos promedio de espera en caja más largos en minutos? Este análisis servirá de guía para recomendar estrategias óptimas de asignación de personal.

In [14]:
# 1. Calcular duración en minutos
df_checkout['duration_min'] = (df_checkout['checkout_end_time'] - df_checkout['checkout_start_time']).dt.total_seconds() / 60

# 2. Filtrar julio y extraer la hora
df_july = df_checkout[df_checkout['checkout_start_time'].dt.month == 7].copy()
df_july['hour'] = df_july['checkout_start_time'].dt.hour

# 3. Agrupar por hora y promediar
staffing_guide = df_july.groupby('hour')['duration_min'].mean().sort_values(ascending=False)

staffing_guide

hour
19    20.000000
20    15.000000
9     13.000000
14    12.500000
16    12.000000
13    11.666667
15    11.000000
12    11.000000
8      6.000000
10     5.500000
11     5.000000
17     5.000000
Name: duration_min, dtype: float64

```SQL
SELECT
    EXTRACT(HOUR FROM checkout_start_time) AS hour_of_day,
    AVG(EXTRACT(EPOCH FROM (checkout_end_time - checkout_start_time)) / 60) AS avg_wait_minutes
FROM fct_checkout_times
WHERE checkout_start_time BETWEEN '2024-07-01' AND '2024-07-31'
GROUP BY EXTRACT(HOUR FROM checkout_start_time)
ORDER BY avg_wait_minutes DESC;
```