In [None]:
# MÁSTER DE CIENCIA DE DATOS
# MÉTODOS DESCRIPTIVOS
# Dr. José Raúl Romero, 2022

# Haz una copia local de este notebook para realizar modificaciones

# Instalamos mlxtend
!pip install mlxtend



In [None]:
# Importamos paquetes
import pandas as pd

from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [None]:
# Cargamos el dataset
df = pd.read_csv('bankdata_py.csv')
df


FileNotFoundError: ignored

In [None]:
# Realizamos cualquier tipo de preprocesamiento que sea necesario
# Como ejemplo, para este dataset no nos interesa cuántos hijos tiene el 
# solicitante del préstamo, sino si tiene o no hijos 
df["children"] = ['NO' if x == 0 else 'YES' for x in df["children"]]
# Tampoco nos interesa la columna con el id (no aporta información útil)
df = df.drop("id",1)
df

  


Unnamed: 0,age,sex,region,income,married,children,car,save_act,current_act,mortgage,pep
0,adulto,FEMALE,INNER_CITY,'BAJO',NO,YES,NO,NO,NO,NO,YES
1,adulto,MALE,TOWN,'MEDIO',YES,YES,YES,NO,YES,YES,NO
2,mayor,FEMALE,INNER_CITY,'BAJO',YES,NO,YES,YES,YES,NO,NO
3,joven,FEMALE,TOWN,'BAJO',YES,YES,NO,NO,YES,NO,NO
4,mayor,FEMALE,RURAL,'ALTO',YES,NO,NO,YES,NO,NO,NO
...,...,...,...,...,...,...,...,...,...,...,...
595,mayor,FEMALE,INNER_CITY,'ALTO',NO,YES,YES,YES,YES,YES,NO
596,joven,FEMALE,INNER_CITY,'BAJO',YES,NO,YES,YES,YES,NO,NO
597,joven,FEMALE,TOWN,'BAJO',YES,NO,YES,YES,NO,NO,YES
598,joven,MALE,INNER_CITY,'BAJO',YES,NO,NO,YES,NO,YES,NO


In [None]:
# Apriori requiere variables binarias, por lo que convertimos 
# con la función get_dummies de pandas
df = pd.get_dummies(df, drop_first=False)
df

Unnamed: 0,age_adulto,age_joven,age_mayor,sex_FEMALE,sex_MALE,region_INNER_CITY,region_RURAL,region_SUBURBAN,region_TOWN,income_'ALTO',income_'BAJO',income_'MEDIO',married_NO,married_YES,children_NO,children_YES,car_NO,car_YES,save_act_NO,save_act_YES,current_act_NO,current_act_YES,mortgage_NO,mortgage_YES,pep_NO,pep_YES
0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,1,0,1,0,1,0,0,1
1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,1,1,0
2,0,0,1,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0
3,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0
4,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,1,0,1,1,0
596,0,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0
597,0,1,0,1,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1
598,0,1,0,0,1,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,1,0,0,1,1,0


In [None]:
# PASO 1: Generación de itemsets frecuentes
# minsup = 0.2
# Puede probar distintas combinaciones de min_support y min_conf
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
frequent_itemsets


Unnamed: 0,support,itemsets
0,0.356667,(age_adulto)
1,0.325000,(age_joven)
2,0.318333,(age_mayor)
3,0.500000,(sex_FEMALE)
4,0.500000,(sex_MALE)
...,...,...
221,0.236667,"(mortgage_NO, save_act_YES, current_act_YES, m..."
222,0.216667,"(save_act_YES, pep_NO, current_act_YES, marrie..."
223,0.200000,"(mortgage_NO, save_act_YES, pep_NO, married_YES)"
224,0.215000,"(mortgage_NO, pep_NO, current_act_YES, married..."


In [None]:
# PASO 2: Generación de reglas de asociación
# min_threshold = 0.8 (confidence)
rules = association_rules(frequent_itemsets, metric="confidence",min_threshold=0.8)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(age_joven),(income_'BAJO'),0.325,0.475,0.29,0.892308,1.878543,0.135625,4.875
1,"(current_act_YES, age_joven)",(income_'BAJO'),0.255,0.475,0.23,0.901961,1.898865,0.108875,5.355
2,"(children_NO, pep_NO)",(married_YES),0.278333,0.66,0.235,0.844311,1.27926,0.0513,2.183846
3,"(mortgage_NO, pep_NO)",(married_YES),0.348333,0.66,0.285,0.818182,1.239669,0.0551,1.87
4,"(mortgage_NO, car_NO)",(current_act_YES),0.328333,0.758333,0.263333,0.80203,1.057623,0.014347,1.220726
5,"(mortgage_NO, save_act_YES, pep_NO)",(married_YES),0.236667,0.66,0.2,0.84507,1.28041,0.0438,2.194545
6,"(mortgage_NO, pep_NO, current_act_YES)",(married_YES),0.263333,0.66,0.215,0.816456,1.237054,0.0412,1.852414
