#Frecuencia de Compra de Clientes

In [1]:
import pandas as pd
import os
from dotenv import load_dotenv
import numpy as np
import mysql.connector

In [2]:
load_dotenv()
db_host = os.getenv('DB_HOST')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')

In [3]:
conn = mysql.connector.connect(
    host=db_host,
    user=db_user,
    password=db_password,
    database=db_name
)

In [4]:
query = """
SELECT user_id as usuario, client_name, created_at as fecha
FROM order_main 
WHERE created_at >= DATE_SUB(CURDATE(), INTERVAL 3 YEAR);
"""

df = pd.read_sql(query, conn)

conn.close()

In [5]:
df['fecha'] = pd.to_datetime(df['fecha'], format='%Y-%m-%d', errors='coerce')
df['fecha'] = df['fecha'].dt.floor('d')
df.head()

Unnamed: 0,usuario,client_name,fecha
0,709.0,Cecilia Jarquín,2021-10-22
1,709.0,Cecilia Jarquín,2021-10-22
2,710.0,luciana palomo,2021-10-24
3,718.0,Karol Flamenco,2021-10-25
4,720.0,Wendy yamilet,2021-10-26


In [6]:
df.dtypes

usuario               float64
client_name            object
fecha          datetime64[ns]
dtype: object

In [7]:
df['Finicio']=df['fecha']
df['Ffinal']=df['fecha']
df.head()

Unnamed: 0,usuario,client_name,fecha,Finicio,Ffinal
0,709.0,Cecilia Jarquín,2021-10-22,2021-10-22,2021-10-22
1,709.0,Cecilia Jarquín,2021-10-22,2021-10-22,2021-10-22
2,710.0,luciana palomo,2021-10-24,2021-10-24,2021-10-24
3,718.0,Karol Flamenco,2021-10-25,2021-10-25,2021-10-25
4,720.0,Wendy yamilet,2021-10-26,2021-10-26,2021-10-26


In [8]:
df['Rango']=df['fecha'].sub(pd.Timestamp('2023-06-01')).dt.days
df.head()

Unnamed: 0,usuario,client_name,fecha,Finicio,Ffinal,Rango
0,709.0,Cecilia Jarquín,2021-10-22,2021-10-22,2021-10-22,-587
1,709.0,Cecilia Jarquín,2021-10-22,2021-10-22,2021-10-22,-587
2,710.0,luciana palomo,2021-10-24,2021-10-24,2021-10-24,-585
3,718.0,Karol Flamenco,2021-10-25,2021-10-25,2021-10-25,-584
4,720.0,Wendy yamilet,2021-10-26,2021-10-26,2021-10-26,-583


In [9]:
grupo1= df.groupby('usuario').agg({'Finicio' : lambda x: x.min(), 'Ffinal': lambda x: x.max()}).reset_index()
grupo1.head(100)

Unnamed: 0,usuario,Finicio,Ffinal
0,1.0,2021-10-27,2024-10-08
1,28.0,2022-02-17,2024-07-27
2,41.0,2022-04-15,2024-09-20
3,44.0,2021-11-25,2021-11-25
4,75.0,2023-03-21,2023-03-24
...,...,...,...
95,1433.0,2021-11-17,2021-11-17
96,1444.0,2023-08-30,2023-08-30
97,1461.0,2021-11-19,2021-11-19
98,1470.0,2022-08-24,2022-09-25


In [10]:
grupo2 = df.groupby('usuario')['Rango'].apply(set).reset_index()
grupo2.head(100)

Unnamed: 0,usuario,Rango
0,1.0,"{-512, 1, 2, 6, 12, 13, 14, 15, 16, 18, 19, 21..."
1,28.0,"{-64, -63, 128, 256, 422, -469, -20, -434, 398..."
2,41.0,"{130, 258, 264, 269, 15, 18, 405, 410, 284, 28..."
3,44.0,{-553}
4,75.0,"{-72, -69}"
...,...,...
95,1433.0,{-561}
96,1444.0,{90}
97,1461.0,{-559}
98,1470.0,"{-249, -281}"


In [11]:
data = pd.merge(grupo1, grupo2[['Rango']],left_index=True, right_index=True, how='outer')
data.head()

Unnamed: 0,usuario,Finicio,Ffinal,Rango
0,1.0,2021-10-27,2024-10-08,"{-512, 1, 2, 6, 12, 13, 14, 15, 16, 18, 19, 21..."
1,28.0,2022-02-17,2024-07-27,"{-64, -63, 128, 256, 422, -469, -20, -434, 398..."
2,41.0,2022-04-15,2024-09-20,"{130, 258, 264, 269, 15, 18, 405, 410, 284, 28..."
3,44.0,2021-11-25,2021-11-25,{-553}
4,75.0,2023-03-21,2023-03-24,"{-72, -69}"


In [12]:
lista = data['Rango'].values.tolist()
Promedio=[]
Desv=[]
Dif=[]
N=[]
for i in lista:
    lista2 = sorted(i)
    a = []
    for j in range(1, len(lista2)):
        a.append(lista2[j] - lista2[j - 1])
    if a:  # Verificar que 'a' no esté vacía
        Promedio.append(np.mean(a))
        Desv.append(np.std(a))
    else:
        Promedio.append(np.nan)  # O cualquier valor que desees usar para listas vacías
        Desv.append(np.nan)
    Dif.append(a)
    N.append(len(a))
    

In [13]:
resultados=[Dif,Promedio,Desv,N]

In [14]:
calculos=pd.DataFrame(resultados).T
calculos.columns=['diferencia_entre_fechas', 'frecuencia_compra', 'desviacion_estandar', 'cantidad_elementos']

In [15]:
resultado_final = pd.merge(data[['usuario','Finicio','Ffinal',]],calculos, left_index=True, right_index=True, how='outer')
resultado_final.head()

Unnamed: 0,usuario,Finicio,Ffinal,diferencia_entre_fechas,frecuencia_compra,desviacion_estandar,cantidad_elementos
0,1.0,2021-10-27,2024-10-08,"[13, 1, 1, 2, 1, 1, 4, 2, 5, 10, 1, 10, 1, 3, ...",4.873303,6.02951,221
1,28.0,2022-02-17,2024-07-27,"[35, 69, 301, 1, 43, 148, 128, 142, 1, 23]",89.1,88.335101,10
2,41.0,2022-04-15,2024-09-20,"[15, 184, 6, 20, 10, 14, 2, 5, 134, 15, 1, 21,...",30.655172,40.702686,29
3,44.0,2021-11-25,2021-11-25,[],,,0
4,75.0,2023-03-21,2023-03-24,[3],3.0,0.0,1


In [16]:
resultado_final.to_csv('frecuenta_compra.csv')