## Caso de Negocio

A una empresa vendedora de bebidas le interesa conocer las ventas que más frecuentan los consumidores al momento de comprar un refresco de la MARCA Coca-Cola, por ende nos fijamos en los pedidos que llevan consigo algún producto o dervidao de Coca-Cola, así como en sus pedidos sean mayores a la media de los productos comprados.​

### Preparación de Ambiente

In [123]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from collections import defaultdict
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules


### Cargar Dataframe

In [124]:
df = pd.read_csv('data/ventas_bebidas.csv', encoding='latin-1')

### Limpieza y Transformación de Datos

In [125]:
df.columns = df.columns.str.lower()
columns_to_drop = ['producto', 'producto_desc', 'monto' , 'precio_unidad', 'contador', 'segmento']
df.drop(columns=columns_to_drop,inplace=True)

In [126]:
df['fecha'] = pd.to_datetime(df['fecha'], format='%d-%m-%y')

def obtener_estacion(fecha):
    mes = fecha.month
    dia = fecha.day

    # Invierno: 21 diciembre a 20 marzo
    if (mes == 12 and dia >= 21) or mes in [1, 2] or (mes == 3 and dia <= 20):
        return 'Invierno'
    # Primavera: 21 marzo a 20 junio
    elif (mes == 3 and dia >= 21) or mes in [4, 5] or (mes == 6 and dia <= 20):
        return 'Primavera'
    # Verano: 21 junio a 22 septiembre
    elif (mes == 6 and dia >= 21) or mes in [7, 8] or (mes == 9 and dia <= 22):
        return 'Verano'
    # Otoño: 23 septiembre a 20 diciembre
    elif (mes == 9 and dia >= 23) or mes in [10, 11] or (mes == 12 and dia <= 20):
        return 'Otoño'

df['estacion'] = df['fecha'].apply(obtener_estacion)

df.drop(columns=['fecha'], inplace=True)

In [127]:
def categorizar_temperatura(temp):
    if temp < 15:
        return 'Frío'
    elif 15 <= temp < 25:
        return 'Templado'
    elif 25 <= temp < 35:
        return 'Cálido'
    else:
        return 'Muy Cálido'  

df['clima'] = df['prom_temperatura'].apply(categorizar_temperatura)

df.drop(columns=['prom_temperatura'], inplace=True)

In [128]:
def categorizar_unidades(unidades):
    if unidades <= 50:
        return 'poco'
    elif 51 <= unidades <= 200:
        return 'moderado'
    else:
        return 'mucho'

df['cat_unidades'] = df['unidades'].apply(categorizar_unidades)

df.drop(columns=['unidades'], inplace=True)

In [129]:
tienda_cat = defaultdict()
for i, tienda in enumerate(df['tienda'].unique(), start=1):
    tienda_cat[tienda] = f'Tienda {i}'

df['tienda'] = df['tienda'].replace(tienda_cat)

### Selección de Variables

In [130]:
df.head()

Unnamed: 0,pedido,tienda,sabor,marca,segmento_desc,municipio,estacion,clima,cat_unidades
0,1000244507,Tienda 1,MANZANA,DEL VALLE,EXTRA GRANDE BAJO AUDITABLE,GUADALUPE,Verano,Cálido,poco
1,1000242608,Tienda 1,MANGO,DEL VALLE,EXTRA GRANDE BAJO AUDITABLE,GUADALUPE,Verano,Cálido,poco
2,1000250602,Tienda 2,MANGO,DEL VALLE,MEDIANO BAJO AUDITABLE,SAN NICOLAS DE LOS GZA,Verano,Cálido,poco
3,1000239117,Tienda 3,MANGO,DEL VALLE,GRANDE BAJO AUDITABLE,SAN NICOLAS DE LOS GZA,Verano,Cálido,poco
4,1000244507,Tienda 1,MANGO,DEL VALLE,EXTRA GRANDE BAJO AUDITABLE,GUADALUPE,Verano,Cálido,poco


In [157]:
df['sabor_estacion'] = df['sabor'] +' - '+ df['estacion']
df['sabor_clima'] = df['sabor'] + ' - ' + df['clima']
df['sabor_cat'] = df['sabor'] + ' - ' + df['cat_unidades']

## Aprori


### Situación 1:

In [189]:
basket = df.pivot_table(index=['pedido'], columns='sabor_estacion', aggfunc='size', fill_value=0)
basket = (basket > 0).astype(int)

# Aplicar el algoritmo Apriori
frequent_itemsets = apriori(basket, min_support=0.21, use_colnames=True)



In [190]:
# Generar reglas de asociación
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

In [191]:
rules[:]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(MANZANA - Invierno),(COLA - Invierno),0.214541,0.271752,0.212157,0.988889,3.638938,0.153855,65.542312,0.923275
1,(COLA - Invierno),(MANZANA - Invierno),0.271752,0.214541,0.212157,0.780702,3.638938,0.153855,3.581692,0.995807
2,(MANZANA - Primavera),(COLA - Primavera),0.231625,0.281685,0.225268,0.972556,3.452641,0.160023,26.173619,0.924505
3,(COLA - Primavera),(MANZANA - Primavera),0.281685,0.231625,0.225268,0.799718,3.452641,0.160023,3.836464,0.988934
4,(NARANJA - Primavera),(COLA - Primavera),0.21772,0.281685,0.21176,0.972628,3.452897,0.150432,26.242458,0.908099
5,(COLA - Primavera),(NARANJA - Primavera),0.281685,0.21772,0.21176,0.751763,3.452897,0.150432,3.151346,0.988964


### Situación 2

In [182]:
basket = df.pivot_table(index=['pedido'], columns='sabor_clima', aggfunc='size', fill_value=0)
basket = (basket > 0).astype(int)

# Aplicar el algoritmo Apriori
frequent_itemsets = apriori(basket, min_support=0.43, use_colnames=True)



In [183]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.562177,(COLA - Cálido)
1,0.479539,(MANZANA - Cálido)
2,0.453317,(NARANJA - Cálido)
3,0.468415,"(MANZANA - Cálido, COLA - Cálido)"
4,0.44259,"(COLA - Cálido, NARANJA - Cálido)"
5,0.431863,"(MANZANA - Cálido, NARANJA - Cálido)"


In [184]:
# Generar reglas de asociación
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

In [185]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(MANZANA - Cálido),(COLA - Cálido),0.479539,0.562177,0.468415,0.976802,1.737534,0.198829,18.87329,0.815569
1,(COLA - Cálido),(MANZANA - Cálido),0.562177,0.479539,0.468415,0.833216,1.737534,0.198829,3.12056,0.969506
2,(COLA - Cálido),(NARANJA - Cálido),0.562177,0.453317,0.44259,0.787279,1.736706,0.187746,2.569953,0.968879
3,(NARANJA - Cálido),(COLA - Cálido),0.453317,0.562177,0.44259,0.976337,1.736706,0.187746,18.502067,0.775948
4,(MANZANA - Cálido),(NARANJA - Cálido),0.479539,0.453317,0.431863,0.90058,1.986643,0.21448,5.498715,0.954228
5,(NARANJA - Cálido),(MANZANA - Cálido),0.453317,0.479539,0.431863,0.952673,1.986643,0.21448,10.997145,0.908458


### Situación 3

In [194]:
basket = df.pivot_table(index=['pedido'], columns='sabor_cat', aggfunc='size', fill_value=0)
basket = (basket > 0).astype(int)

# Aplicar el algoritmo Apriori
frequent_itemsets = apriori(basket, min_support=0.70, use_colnames=True)



In [195]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.946365,(COLA - poco)
1,0.787843,(MANZANA - poco)
2,0.741756,(NARANJA - poco)
3,0.704013,(PONCHE - poco)
4,0.769964,"(COLA - poco, MANZANA - poco)"
5,0.720699,"(NARANJA - poco, COLA - poco)"
6,0.705205,"(NARANJA - poco, MANZANA - poco)"


In [196]:
# Generar reglas de asociación
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

In [197]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(COLA - poco),(MANZANA - poco),0.946365,0.787843,0.769964,0.813602,1.032696,0.024378,1.138195,0.590299
1,(MANZANA - poco),(COLA - poco),0.787843,0.946365,0.769964,0.977307,1.032696,0.024378,2.363528,0.149233
2,(NARANJA - poco),(COLA - poco),0.741756,0.946365,0.720699,0.971612,1.026678,0.018727,1.889379,0.100623
3,(COLA - poco),(NARANJA - poco),0.946365,0.741756,0.720699,0.761545,1.026678,0.018727,1.082988,0.484479
4,(NARANJA - poco),(MANZANA - poco),0.741756,0.787843,0.705205,0.950723,1.206742,0.120818,4.30541,0.663414
5,(MANZANA - poco),(NARANJA - poco),0.787843,0.741756,0.705205,0.895108,1.206742,0.120818,2.462008,0.807527


## Evaluación de Modelos