In [1]:
import pandas as pd
import funciones.test_func as tf
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

load_dotenv()

host=os.getenv("DB_HOST2")
port=os.getenv("DB_PORT2")
database=os.getenv("DB_NAME2")
user=os.getenv("DB_USER2")
password=os.getenv("DB_PASS2")

engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}')

tables = tf.excecute_query_tables(engine)

dfs = tf.create_df_variable(tables, engine)
transactions = dfs['transactions']
customers= dfs['customers']
products = dfs['products']

In [2]:
transactions

Unnamed: 0,transaction_id,customer_id,product_id,transaction_date,amount,type,payment_method
0,1,101,201,2023-01-05,200,credit,card
1,2,102,202,2023-01-07,-100,debit,paypal
2,3,101,201,2023-01-15,500,credit,transfer
3,4,103,203,2023-01-20,1200,credit,card
4,5,102,204,2023-02-02,-50,debit,paypal
5,6,101,205,2023-02-14,-300,debit,card
6,7,104,202,2023-02-18,400,credit,cash
7,8,103,206,2023-03-01,-200,debit,cash
8,9,102,207,2023-03-02,1000,credit,transfer
9,10,101,201,2023-03-10,-100,debit,paypal


In [3]:
customers

Unnamed: 0,customer_id,name,age,country,signup_date
0,101,Alice,29,USA,2022-11-10
1,102,Bob,35,Canada,2022-12-15
2,103,Charlie,42,USA,2023-01-01
3,104,Diana,31,UK,2023-02-10


In [4]:
products

Unnamed: 0,product_id,product_name,category,price
0,201,Subscription A,subscription,200
1,202,E-Book,digital,100
2,203,Online Course,education,1200
3,204,Gift Card,gift,50
4,205,Consulting Call,service,300
5,206,Webinar,event,200
6,207,Premium Upgrade,upgrade,1000


🛒 Ejercicio 3 – Producto más comprado por cliente

Contexto

Eres el analista de datos de una tienda virtual que ofrece diversos productos digitales y servicios. La empresa está interesada en conocer qué producto es el más popular para cada cliente, con el fin de mejorar la personalización de las campañas de marketing y sugerencias de compra.

Para ello, cuentas con tres tablas relacionadas:

- transactions: contiene las transacciones de los clientes (incluyendo tipo de transacción, producto, fecha, etc.)
- customers: contiene la información básica de los clientes
- products: contiene el detalle de los productos disponibles en la tienda

Objetivo del análisis:
Identificar el producto más comprado por cada cliente, considerando únicamente las transacciones exitosas de tipo 'credit'.

Requisitos del reto

Usar solo transacciones con type = 'credit'.

El resultado debe contener las siguientes columnas:

customer_id

customer_name

top_product_id

top_product_name

purchase_count (número de veces que el cliente compró ese producto)

Si un cliente tiene varios productos con el mismo número de compras, puedes devolver cualquiera de ellos.

Ordenar el resultado por customer_id en orden ascendente.

Bonus

Si implementas la solución en SQL, intenta usar una Common Table Expression (CTE).

In [7]:
#SQL 

query = """
WITH product_ranking AS (

SELECT c.customer_id, c.name, p.product_id, p.product_name, COUNT(transaction_id) AS purchase_count, ROW_NUMBER() OVER(PARTITION BY c.customer_id ORDER BY COUNT(transaction_id) DESC) AS rn
FROM transactions AS t
JOIN products AS p
ON t.product_id = p.product_id
JOIN customers AS c
ON t.customer_id = c.customer_id
WHERE t.type = 'credit'
GROUP BY c.customer_id, c.name, p.product_id, p.product_name
ORDER BY purchase_count DESC

)

SELECT customer_id, name, product_id AS top_product_id, product_name AS top_product_name, purchase_count
FROM product_ranking AS p
WHERE rn = 1
ORDER BY customer_id

"""

tf.excecute_query(query, engine)

Unnamed: 0,customer_id,name,top_product_id,top_product_name,purchase_count
0,101,Alice,201,Subscription A,2
1,102,Bob,207,Premium Upgrade,1
2,103,Charlie,203,Online Course,1
3,104,Diana,202,E-Book,1


In [19]:
#Python
merged = pd.merge(pd.merge(transactions, customers, on='customer_id'), products, on='product_id')
merged = merged[merged['type'] == 'credit']
merged = merged.groupby(['customer_id','name', 'product_id', 'product_name'])['transaction_id'].count().reset_index().rename(columns={'transaction_id': 'purchase_count'}).sort_values(by='purchase_count', ascending=False)
merged['rn'] = merged.groupby('customer_id').cumcount()+1
merged = merged[merged['rn'] == 1].drop(columns='rn').sort_values(by='customer_id')
merged

Unnamed: 0,customer_id,name,product_id,product_name,purchase_count
0,101,Alice,201,Subscription A,2
1,102,Bob,207,Premium Upgrade,1
2,103,Charlie,203,Online Course,1
3,104,Diana,202,E-Book,1
