# Stripe - Connect Marketplace Payout Performance Insights

```SQL
CREATE TABLE dim_sellers (
    seller_id integer,
    seller_segment varchar
);

CREATE TABLE fct_payouts (
    payout_id integer,
    seller_id integer,
    payout_status varchar,
    payout_date date
);

INSERT INTO dim_sellers (seller_id, seller_segment)
VALUES
    (1, 'bronze'),
    (2, 'silver'),
    (3, 'gold'),
    (4, 'silver'),
    (5, 'bronze'),
    (6, 'gold'),
    (7, 'silver'),
    (8, 'bronze'),
    (9, 'gold'),
    (10, 'silver');

INSERT INTO fct_payouts (payout_id, seller_id, payout_status, payout_date)
VALUES
    (1, 1, 'successful', '2024-07-05'),
    (2, 1, 'failed', '2024-07-18'),
    (3, 5, 'successful', '2024-07-10'),
    (4, 5, 'successful', '2024-07-21'),
    (5, 5, 'successful', '2024-07-29'),
    (6, 8, 'failed', '2024-07-15'),
    (7, 2, 'successful', '2024-07-06'),
    (8, 2, 'successful', '2024-06-28'),
    (9, 2, 'failed', '2024-07-25'),
    (10, 4, 'successful', '2024-07-08'),
    (11, 4, 'successful', '2024-07-22'),
    (12, 7, 'failed', '2024-07-12'),
    (13, 10, 'successful', '2024-07-14'),
    (14, 10, 'failed', '2024-07-24'),
    (15, 3, 'successful', '2024-07-09'),
    (16, 3, 'successful', '2024-07-23'),
    (17, 6, 'successful', '2024-07-11'),
    (18, 6, 'successful', '2024-07-27'),
    (19, 9, 'successful', '2024-07-13'),
    (20, 9, 'failed', '2024-07-26');


SELECT * FROM dim_sellers;

SELECT * FROM fct_payouts;
```

In [1]:
import pandas as pd
import numpy as np

In [4]:
df_sellers = pd.read_csv('Data/021/dim_sellers.csv')
df_payouts = pd.read_csv('Data/021/fct_payouts.csv',  parse_dates=['payout_date'])

df_sellers.head()

Unnamed: 0,seller_id,seller_segment
0,1,bronze
1,2,silver
2,3,gold
3,4,silver
4,5,bronze


In [5]:
df_payouts.head()

Unnamed: 0,payout_id,seller_id,payout_status,payout_date
0,1,1,successful,2024-07-05
1,2,1,failed,2024-07-18
2,3,5,successful,2024-07-10
3,4,5,successful,2024-07-21
4,5,5,successful,2024-07-29


# Pregunta 1

### ¿Cuál es el número total de pagos realizados por cada segmento de vendedor en julio de 2024?

In [7]:
df_merge = df_sellers.merge(df_payouts, on='seller_id')

df_july = df_merge[
    (df_merge['payout_date'].between('2024-07-01','2024-07-31'))
]

repuesta1 = df_july.groupby('seller_segment')['payout_id'].count().reset_index(name='total_payout')

repuesta1


Unnamed: 0,seller_segment,total_payout
0,bronze,6
1,gold,6
2,silver,7


```SQL
SELECT
    s.seller_segment,
    COUNT(p.payout_id) AS total_payout
FROM fct_payouts p
JOIN dim_sellers s ON p.seller_id = s.seller_id
WHERE p.payout_date BETWEEN '2024-07-01' AND '2024-07-31'
GROUP BY s.seller_segment ;
```

# Pregunta 2

### Identifique el segmento de vendedor con la mayor tasa de éxito de pagos (payout success rate) en julio de 2024, comparando los pagos exitosos y fallidos.

In [None]:
df_merge = df_sellers.merge(df_payouts, on='seller_id')

# Aseguramos el formato de fecha
df_merge['payout_date'] = pd.to_datetime(df_merge['payout_date'])

# Comparamos ignorando mayúsculas/minúsculas por seguridad (.str.lower())
df_merge['is_success'] = df_merge['payout_status'].str.lower() == 'successful'

df_july = df_merge[df_merge['payout_date'].between('2024-07-01', '2024-07-31')]

# Cálculo de la tasa
success_analysis = df_july.groupby('seller_segment')['is_success'].mean() * 100

mejor_segmento = success_analysis.idxmax()
valor_maximo = success_analysis.max()

valor_maximo


np.float64(83.33333333333334)

```SQL
SELECT
    s.seller_segment,
    COUNT(CASE WHEN p.payout_status = 'successful' THEN 1 END) * 100.0 / COUNT(*) AS success_rate
FROM fct_payouts p
JOIN dim_sellers s ON p.seller_id = s.seller_id
WHERE payout_date BETWEEN '2024-07-01' AND '2024-07-31'
GROUP BY s.seller_segment
ORDER BY success_rate DESC
LIMIT 1;
```

# Pregunta 3 

### ¿Qué porcentaje de los pagos fueron exitosos frente a fallidos para cada segmento de vendedor en julio de 2024, y cómo puede utilizarse esta información para recomendar mejoras específicas?

In [13]:
df_july = df_merge[df_merge['payout_date'].between('2024-07-01', '2024-07-31')].copy()

# Calculamos ambas columnas directamente
reporte = df_july.groupby('seller_segment')['payout_status'].value_counts(normalize=True).unstack() * 100

# Renombramos para que coincida con tu SQL
reporte = reporte.rename(columns={'successful': 'success_rate', 'failed': 'failed_rate'})

reporte

payout_status,failed_rate,success_rate
seller_segment,Unnamed: 1_level_1,Unnamed: 2_level_1
bronze,33.333333,66.666667
gold,16.666667,83.333333
silver,42.857143,57.142857


```SQL
SELECT
    seller_segment,
    COUNT(CASE WHEN p.payout_status = 'successful' THEN 1 END) * 100.0 / COUNT(*) AS success_rate,
    COUNT(CASE WHEN p.payout_status = 'failed' THEN 1 END) * 100.0 / COUNT(*) AS failed_rate
FROM dim_sellers d
JOIN fct_payouts p ON p.seller_id = d.seller_id
WHERE payout_date BETWEEN '2024-07-01' AND '2024-07-31'
GROUP BY seller_segment ;
```