# Amazon - Third-Party Seller Fees and Performance Metrics

```SQL
CREATE TABLE dim_seller (
    seller_id INTEGER,
    seller_name VARCHAR
);

CREATE TABLE fct_seller_sales (
    sale_id INTEGER,
    seller_id INTEGER,
    sale_amount DECIMAL,
    fee_amount_percentage DECIMAL,
    sale_date DATE
);

INSERT INTO dim_seller (seller_id, seller_name)
VALUES
    (1, 'Alpha Sellers'),
    (2, 'Beta Traders'),
    (3, 'Gamma Goods'),
    (4, 'Delta Deals'),
    (5, 'Epsilon Emporium');

INSERT INTO fct_seller_sales (sale_id, seller_id, sale_amount, fee_amount_percentage, sale_date)
VALUES
    (1, 1, 500, 25, '2024-04-05'),
    (2, 2, 600, 30, '2024-04-07'),
    (3, 3, 700, 35, '2024-04-10'),
    (4, 4, 800, 40, '2024-04-15'),
    (5, 5, 650, 32, '2024-04-20'),
    (6, 1, 500, 27, '2024-04-28'),
    (7, 1, 550, 28, '2024-05-03'),
    (8, 1, 580, 29, '2024-05-15'),
    (9, 2, 620, 31, '2024-05-05'),
    (10, 2, 630, 32, '2024-05-20'),
    (11, 3, 710, 36, '2024-05-08'),
    (12, 3, 720, 37, '2024-05-22'),
    (13, 4, 810, 41, '2024-05-10'),
    (14, 4, 805, 40, '2024-05-25'),
    (15, 5, 660, 33, '2024-05-11'),
    (16, 5, 680, 34, '2024-05-28'),
    (17, 1, 560, 28, '2024-06-03'),
    (18, 1, 570, 29, '2024-06-05'),
    (19, 1, 580, 30, '2024-06-10'),
    (20, 1, 590, 31, '2024-06-13'),
    (21, 2, 640, 32, '2024-06-04'),
    (22, 2, 650, 33, '2024-06-06'),
    (23, 2, 660, 34, '2024-06-11'),
    (24, 2, 670, 35, '2024-06-15'),
    (25, 3, 720, 36, '2024-06-02'),
    (26, 3, 730, 37, '2024-06-05'),
    (27, 3, 740, 38, '2024-06-12'),
    (28, 4, 820, 42, '2024-06-03'),
    (29, 4, 830, 43, '2024-06-08'),
    (30, 4, 840, 44, '2024-06-14'),
    (31, 5, 670, 34, '2024-06-01'),
    (32, 5, 680, 35, '2024-06-06'),
    (33, 5, 690, 36, '2024-06-11'),
    (34, 5, 700, 37, '2024-06-16');

SELECT * FROM dim_seller;

SELECT * FROM fct_seller_sales;
```

In [3]:
import pandas as pd
import numpy as np

In [4]:
df_seller = pd.read_csv('Data/001/dim_seller.csv')
df_sales = pd.read_csv('Data/001/fct_seller_sales.csv', parse_dates=['sale_date'])

df_seller.head()

Unnamed: 0,seller_id,seller_name
0,1,Alpha Sellers
1,2,Beta Traders
2,3,Gamma Goods
3,4,Delta Deals
4,5,Epsilon Emporium


In [4]:
df_sales.head()

Unnamed: 0,sale_id,seller_id,sale_amount,fee_amount_percentage,sale_date
0,1,1,500,25,2024-04-05
1,2,2,600,30,2024-04-07
2,3,3,700,35,2024-04-10
3,4,4,800,40,2024-04-15
4,5,5,650,32,2024-04-20


# Pregunta 1

### Para cada vendedor, identifique su transacción de venta más alta en abril de 2024 basándose en el monto de la venta (sale_amount). Si hay varias transacciones con el mismo monto de venta, seleccione la que tenga la fecha de venta (sale_date) más reciente.

In [6]:
df_abril = df_sales[df_sales['sale_date'].between('2024-04-01','2024-04-30')]

df_sorted = df_abril.sort_values(
    by=['seller_id', 'sale_amount', 'sale_date'],
    ascending= [True, False, False]
)

top_sales = df_sorted.drop_duplicates(subset='seller_id', keep='first')

resutlado = top_sales.merge(df_seller, on='seller_id')

resutlado

Unnamed: 0,sale_id,seller_id,sale_amount,fee_amount_percentage,sale_date,seller_name
0,6,1,500,27,2024-04-28,Alpha Sellers
1,2,2,600,30,2024-04-07,Beta Traders
2,3,3,700,35,2024-04-10,Gamma Goods
3,4,4,800,40,2024-04-15,Delta Deals
4,5,5,650,32,2024-04-20,Epsilon Emporium


```SQL
WITH ranked_saled AS(
    SELECT
        seller_id,
        sale_id,
        sale_amount,
        sale_date,
        ROW_NUMBER() OVER (
            PARTITION BY seller_id
            ORDER BY  sale_amount DESC, sale_date DESC
            ) as ranking
    FROM fct_seller_sales
    WHERE sale_date BETWEEN '2024-04-01' AND '2024-04-30'
)
SELECT
    s.seller_name,
    rs.sale_amount,
    rs.sale_date
FROM ranked_saled rs
JOIN dim_seller s ON rs.seller_id = s.seller_id
WHERE rs.ranking = 1
```

# Pregunta 2

### Durante mayo de 2024, para cada ID de vendedor, genere un resumen semanal que reporte el número total de transacciones de venta y muestre el monto de la comisión (fee) de la venta más reciente de esa semana. Este análisis nos permitirá correlacionar los cambios en las comisiones con las tendencias de rendimiento semanal de los vendedores.

In [5]:
# 1. Filtramos mayo y creamos la columna de semana
df_mayo = df_sales[df_sales['sale_date'].between('2024-05-01', '2024-05-31')].copy()
df_mayo['week'] = df_mayo['sale_date'].dt.isocalendar().week

# 2. Ordenamos para que la última venta de la semana quede al final
df_mayo = df_mayo.sort_values(['seller_id', 'week', 'sale_date'])

# 3. Agrupamos
resumen = df_mayo.groupby(['seller_id', 'week']).agg(
    total_transactions=('sale_id', 'count'),
    last_fee_amount=('fee_amount_percentage', 'last') # .last() toma el valor final tras el sort
).reset_index()

resumen

Unnamed: 0,seller_id,week,total_transactions,last_fee_amount
0,1,18,1,28
1,1,20,1,29
2,2,18,1,31
3,2,21,1,32
4,3,19,1,36
5,3,21,1,37
6,4,19,1,41
7,4,21,1,40
8,5,19,1,33
9,5,22,1,34


```SQL
WITH weekly_data AS (
    SELECT 
        seller_id,
        DATE_TRUNC('week', sale_date) AS sale_week,
        sale_amount,
        fee_amount_percentage,
        sale_date,

        ROW_NUMBER() OVER (
            PARTITION BY seller_id, DATE_TRUNC('week', sale_date) 
            ORDER BY sale_date DESC, sale_id DESC
        ) as latest_rank
    FROM fct_seller_sales
    WHERE sale_date BETWEEN '2024-05-01' AND '2024-05-31'
)
SELECT 
    seller_id,
    sale_week,
    COUNT(*) AS total_transactions,
    MAX(CASE WHEN latest_rank = 1 THEN fee_amount_percentage END) AS last_fee_amount
FROM weekly_data
GROUP BY seller_id, sale_week
ORDER BY seller_id, sale_week;
```

# Pregunta 3

### Utilizando los datos de junio de 2024, para cada vendedor, cree un reporte diario que calcule el conteo acumulado de transacciones hasta ese día.

In [6]:
# 1. Filtramos junio
df_junio = df_sales[df_sales['sale_date'].between('2024-06-01', '2024-06-30')].copy()

# 2. Ordenamos por vendedor y fecha
df_junio = df_junio.sort_values(['seller_id', 'sale_date'])

# 3. Agrupamos por vendedor y aplicamos el conteo acumulado
# Usamos cumcount() + 1 porque cumcount empieza en 0
df_junio['cumulative_transactions'] = df_junio.groupby('seller_id').cumcount() + 1

# Seleccionamos las columnas para el reporte
reporte_diario = df_junio[['seller_id', 'sale_date', 'cumulative_transactions']]

reporte_diario

Unnamed: 0,seller_id,sale_date,cumulative_transactions
16,1,2024-06-03,1
17,1,2024-06-05,2
18,1,2024-06-10,3
19,1,2024-06-13,4
20,2,2024-06-04,1
21,2,2024-06-06,2
22,2,2024-06-11,3
23,2,2024-06-15,4
24,3,2024-06-02,1
25,3,2024-06-05,2


```SQL
SELECT
    seller_id,
    sale_date,
    sale_id,
    COUNT(sale_id) OVER (
        PARTITION BY seller_id
        ORDER BY sale_date, sale_id
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
        ) AS cumulative_transactions
FROM fct_seller_sales
WHERE sale_date BETWEEN '2024-06-01' AND '2024-06-30'
ORDER BY seller_id, sale_date;
```