In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import pdb # for debugging

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/renatomaaliw3/public_files/refs/heads/master/Data%20Sets/credit_card_approval.csv')
df.head()


Unnamed: 0,Age,Annual_Income,Credit_Score,Employment_Status,Debt_Income_Ratio,Residence,Marital_Status,Number_of_Dependents,Credit_History_Length,Approval_Status
0,25-34,40-60k,Credit_Excellent,Employed,DIB_Low,Residence_Own,MS_Single,Dep_0,3-5 years,App_Grant
1,25-34,40-60k,Credit_Very Good,Student,DIB_Low,Residence_Own,MS_Married,Dep_0,3-5 years,App_Grant
2,55+,20-40k,Credit_Excellent,Student,DIB_Low,Residence_Rent,MS_Single,Dep_0,10+ years,App_Grant
3,45-54,20-40k,Credit_Fair,Employed,DIB_Low,Residence_Own,MS_Married,Dep_1,5-10 years,App_Denied
4,55+,80-100k,Credit_Excellent,Employed,DIB_Low,Residence_Rent,MS_Single,Dep_1,<1 year,App_Grant


In [None]:
# Data Preprocessing
# Before Applying the Apriori algorithm, we need to preprocess the data
# One-Hot Encoding, Remember get dummies?

from mlxtend.preprocessing import TransactionEncoder

# Consolidate each transaction into a single list of items, removing NaN values
transactions = df.apply(lambda row: row.dropna().tolist(), axis = 1).tolist()

# Initialize TransactionEncoder
encoder = TransactionEncoder()

# Fit and transform the transactions data
transaction_matrix = encoder.fit_transform(transactions)

# Convert to DataFrame
transaction_df = pd.DataFrame(transaction_matrix, columns = encoder.columns_)
transaction_df = transaction_df[transaction_df['App_Grant'] ==True]
transaction_df

Unnamed: 0,1-3 years,10+ years,100k+,20-40k,25-34,3-5 years,35-44,40-60k,45-54,5-10 years,...,MS_Married,MS_Single,MS_Widowed,Residence_Other,Residence_Own,Residence_Rent,Retired,Self-Employed,Student,Unemployed
0,False,False,False,False,True,True,False,True,False,False,...,False,True,False,False,True,False,False,False,False,False
1,False,False,False,False,True,True,False,True,False,False,...,True,False,False,False,True,False,False,False,True,False
2,False,True,False,True,False,False,False,False,False,False,...,False,True,False,False,False,True,False,False,True,False
4,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,True,False,False,False,False
5,False,False,False,False,True,False,False,True,False,False,...,False,True,False,False,True,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2493,True,False,False,False,False,False,False,False,True,False,...,True,False,False,True,False,False,False,False,True,False
2494,False,False,False,True,False,True,False,False,False,False,...,False,True,False,True,False,False,True,False,False,False
2496,False,False,False,False,True,False,False,False,False,True,...,False,True,False,False,False,True,False,False,False,True
2498,False,False,False,True,True,True,False,False,False,False,...,False,True,False,False,True,False,False,False,False,False


In [None]:
# Appying the Apriori Algorithm
# Since data are cleaned and prepared for frequent itemset

from mlxtend.frequent_patterns import apriori, association_rules

# Apply the Apriori algorithm
frequent_itemsets = apriori(transaction_df, min_support = 0.2, use_colnames = True)


In [None]:
if 'support' not in frequent_itemsets.columns:
   frequent_itemsets = frequent_itemsets.rename(columns={'sup': 'support'})

In [None]:
pd.set_option('display.max_columns', 10000)
import warnings
warnings.filterwarnings('ignore', 'all')
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.224380,(1-3 years)
1,0.347622,(25-34)
2,0.257870,(3-5 years)
3,0.237106,(35-44)
4,0.232418,(5-10 years)
...,...,...
72,0.229069,"(MS_Married, Employed, App_Grant)"
73,0.292029,"(Employed, App_Grant, MS_Single)"
74,0.247823,"(Employed, App_Grant, Residence_Own)"
75,0.306095,"(Residence_Rent, Employed, App_Grant)"


In [None]:

rules = association_rules(frequent_itemsets, num_itemsets = len(transaction_df), metric = "confidence", min_threshold = 0.2)

rules.loc[:, :'lift']
#rules.loc[:, :'lift'].to_csv('rules.csv')

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
0,(1-3 years),(App_Grant),0.224380,1.000000,0.224380,1.000000,1.000000
1,(App_Grant),(1-3 years),1.000000,0.224380,0.224380,0.224380,1.000000
2,(25-34),(App_Grant),0.347622,1.000000,0.347622,1.000000,1.000000
3,(App_Grant),(25-34),1.000000,0.347622,0.347622,0.347622,1.000000
4,(25-34),(DIB_Low),0.347622,0.584059,0.200938,0.578035,0.989686
...,...,...,...,...,...,...,...
191,"(Residence_Rent, MS_Single)",(App_Grant),0.245144,1.000000,0.245144,1.000000,1.000000
192,"(App_Grant, MS_Single)",(Residence_Rent),0.474213,0.509042,0.245144,0.516949,1.015533
193,(Residence_Rent),"(App_Grant, MS_Single)",0.509042,0.474213,0.245144,0.481579,1.015533
194,(App_Grant),"(Residence_Rent, MS_Single)",1.000000,0.245144,0.245144,0.245144,1.000000


In [None]:
rules.sort_values(by='confidence', ascending=False).head(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(1-3 years),(App_Grant),0.22438,1.0,0.22438,1.0,1.0,1.0,0.0,inf,0.0,0.22438,0.0,0.61219
25,(DIB_Moderate),(App_Grant),0.339585,1.0,0.339585,1.0,1.0,1.0,0.0,inf,0.0,0.339585,0.0,0.669792
29,(Dep_1),(App_Grant),0.300067,1.0,0.300067,1.0,1.0,1.0,0.0,inf,0.0,0.300067,0.0,0.650033
30,(Employed),(App_Grant),0.618218,1.0,0.618218,1.0,1.0,1.0,0.0,inf,0.0,0.618218,0.0,0.809109
32,(MS_Married),(App_Grant),0.376423,1.0,0.376423,1.0,1.0,1.0,0.0,inf,0.0,0.376423,0.0,0.688212


In [None]:
rules.sort_values(by='support', ascending=False).head(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
31,(App_Denied),(Employed),1.0,0.6286,0.6286,0.6286,1.0,1.0,0.0,1.0,0.0,0.6286,0.0,0.8143
30,(Employed),(App_Denied),0.6286,1.0,0.6286,1.0,1.0,1.0,0.0,inf,0.0,0.6286,0.0,0.8143
17,(App_Denied),(Credit_Fair),1.0,0.529295,0.529295,0.529295,1.0,1.0,0.0,1.0,0.0,0.529295,0.0,0.764647
16,(Credit_Fair),(App_Denied),0.529295,1.0,0.529295,1.0,1.0,1.0,0.0,inf,0.0,0.529295,0.0,0.764647
35,(App_Denied),(MS_Single),1.0,0.519364,0.519364,0.519364,1.0,1.0,0.0,1.0,0.0,0.519364,0.0,0.759682


In [None]:
rules_sorted = rules.sort_values('lift', ascending=False)

rules_sorted.loc[:, :'lift']

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
142,"(Residence_Rent, App_Grant)",(DIB_Low),0.509042,0.584059,0.311453,0.611842,1.047569
144,"(App_Grant, DIB_Low)",(Residence_Rent),0.584059,0.509042,0.311453,0.533257,1.047569
145,(Residence_Rent),"(App_Grant, DIB_Low)",0.509042,0.584059,0.311453,0.611842,1.047569
147,(DIB_Low),"(Residence_Rent, App_Grant)",0.584059,0.509042,0.311453,0.533257,1.047569
58,(Residence_Rent),(DIB_Low),0.509042,0.584059,0.311453,0.611842,1.047569
...,...,...,...,...,...,...,...
129,(DIB_Low),"(MS_Married, App_Grant)",0.584059,0.376423,0.205626,0.352064,0.935288
53,(DIB_Low),(MS_Married),0.584059,0.376423,0.205626,0.352064,0.935288
124,"(MS_Married, App_Grant)",(DIB_Low),0.376423,0.584059,0.205626,0.546263,0.935288
52,(MS_Married),(DIB_Low),0.376423,0.584059,0.205626,0.546263,0.935288


In [None]:
from mlxtend.preprocessing import TransactionEncoder

transactions = df.apply(lambda row: row.dropna().tolist(), axis = 1).tolist()


encoder = TransactionEncoder()
transaction_matrix = encoder.fit_transform(transactions)

transaction_df = pd.DataFrame(transaction_matrix, columns = encoder.columns_)
transaction_df = transaction_df[transaction_df['App_Denied'] == True]
transaction_df

Unnamed: 0,1-3 years,10+ years,100k+,20-40k,25-34,3-5 years,35-44,40-60k,45-54,5-10 years,55+,60-80k,80-100k,<1 year,<20k,<25,App_Denied,App_Grant,Credit_Excellent,Credit_Fair,Credit_Good,Credit_Poor,Credit_Very Good,DIB_High,DIB_Low,DIB_Moderate,Dep_0,Dep_1,Dep_2,Dep_3,Dep_4,Dep_5,Employed,MS_Divorced,MS_Married,MS_Single,MS_Widowed,Residence_Other,Residence_Own,Residence_Rent,Retired,Self-Employed,Student,Unemployed
3,False,False,False,True,False,False,False,False,True,True,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,True,False,False,False,False,True,False,True,False,False,False,True,False,False,False,False,False
6,False,False,False,False,True,True,False,True,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,True,False,False,False,False,False,True,False,False,True,False,False,False,True,False,False,False,False
10,True,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,True,False,False,False,False,True,False,False,False,True,True,False,False,False,False,False,False,False,True,False,False,False,True,False,True,False,False,False
13,True,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,False,True,False,True,False,False,False,False,True,False,False,True,False,False,True,False,False,False,False,False
14,True,False,True,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,True,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2487,True,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,True,False,False,True,False,False,False,False,True,False,False,False,True,False,False,False,True,False,False,True,False,False,False,True,False,False,False,False
2488,True,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,True,False,False,False,False,True,False,True,False,False,False,False,True,False,False,False,False
2492,True,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,True,False,True,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,True,False,True,False,False,False
2495,False,False,False,False,False,True,False,False,True,False,False,False,True,False,False,False,True,False,False,True,False,False,False,False,True,False,True,False,False,False,False,False,True,False,False,True,False,False,False,True,False,False,False,False


In [None]:
from mlxtend.frequent_patterns import apriori, association_rules

frequent_itemsets = apriori(transaction_df, min_support = 0.2, use_colnames = True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.291956,(1-3 years)
1,0.224429,(20-40k)
2,0.326713,(25-34)
3,0.356504,(3-5 years)
4,0.280040,(35-44)
...,...,...
70,0.324727,"(Employed, MS_Single, App_Denied)"
71,0.263158,"(Employed, Residence_Own, App_Denied)"
72,0.300894,"(Residence_Rent, Employed, App_Denied)"
73,0.213505,"(Residence_Own, MS_Single, App_Denied)"


In [None]:
pd.set_option('display.max_columns', 10000)

import warnings
warnings.filterwarnings('ignore', 'all')

rules = association_rules(frequent_itemsets, num_itemsets=frequent_itemsets, metric = "confidence", min_threshold = 0.2)
rules.loc[:, :'lift']

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
0,(1-3 years),(App_Denied),0.291956,1.000000,0.291956,1.000000,1.000000
1,(App_Denied),(1-3 years),1.000000,0.291956,0.291956,0.291956,1.000000
2,(20-40k),(App_Denied),0.224429,1.000000,0.224429,1.000000,1.000000
3,(App_Denied),(20-40k),1.000000,0.224429,0.224429,0.224429,1.000000
4,(25-34),(App_Denied),0.326713,1.000000,0.326713,1.000000,1.000000
...,...,...,...,...,...,...,...
183,"(Residence_Rent, App_Denied)",(MS_Single),0.490566,0.519364,0.259186,0.528340,1.017282
184,"(MS_Single, App_Denied)",(Residence_Rent),0.519364,0.490566,0.259186,0.499044,1.017282
185,(Residence_Rent),"(MS_Single, App_Denied)",0.490566,0.519364,0.259186,0.528340,1.017282
186,(MS_Single),"(Residence_Rent, App_Denied)",0.519364,0.490566,0.259186,0.499044,1.017282


In [None]:
rules_sorted = rules.sort_values('lift', ascending=False)
rules_sorted.loc[:, :'lift']

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
90,(DIB_Low),"(Credit_Fair, App_Denied)",0.387289,0.529295,0.268123,0.692308,1.307981
89,(Credit_Fair),"(App_Denied, DIB_Low)",0.529295,0.387289,0.268123,0.506567,1.307981
41,(DIB_Low),(Credit_Fair),0.387289,0.529295,0.268123,0.692308,1.307981
87,"(Credit_Fair, App_Denied)",(DIB_Low),0.529295,0.387289,0.268123,0.506567,1.307981
88,"(App_Denied, DIB_Low)",(Credit_Fair),0.387289,0.529295,0.268123,0.692308,1.307981
...,...,...,...,...,...,...,...
142,"(Dep_0, App_Denied)",(Employed),0.414101,0.628600,0.245283,0.592326,0.942295
141,"(Employed, App_Denied)",(Dep_0),0.628600,0.414101,0.245283,0.390205,0.942295
58,(Employed),(Dep_0),0.628600,0.414101,0.245283,0.390205,0.942295
59,(Dep_0),(Employed),0.414101,0.628600,0.245283,0.592326,0.942295


In [None]:
rules.sort_values(by='support', ascending=False).head(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
31,(App_Denied),(Employed),1.0,0.6286,0.6286,0.6286,1.0,1.0,0.0,1.0,0.0,0.6286,0.0,0.8143
30,(Employed),(App_Denied),0.6286,1.0,0.6286,1.0,1.0,1.0,0.0,inf,0.0,0.6286,0.0,0.8143
17,(App_Denied),(Credit_Fair),1.0,0.529295,0.529295,0.529295,1.0,1.0,0.0,1.0,0.0,0.529295,0.0,0.764647
16,(Credit_Fair),(App_Denied),0.529295,1.0,0.529295,1.0,1.0,1.0,0.0,inf,0.0,0.529295,0.0,0.764647
35,(App_Denied),(MS_Single),1.0,0.519364,0.519364,0.519364,1.0,1.0,0.0,1.0,0.0,0.519364,0.0,0.759682


In [None]:
rules.sort_values(by='consequent support', ascending=False).head(5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(1-3 years),(App_Denied),0.291956,1.0,0.291956,1.0,1.0,1.0,0.0,inf,0.0,0.291956,0.0,0.645978
80,"(Employed, 3-5 years)",(App_Denied),0.230387,1.0,0.230387,1.0,1.0,1.0,0.0,inf,0.0,0.230387,0.0,0.615194
26,(Dep_0),(App_Denied),0.414101,1.0,0.414101,1.0,1.0,1.0,0.0,inf,0.0,0.414101,0.0,0.707051
28,(Dep_1),(App_Denied),0.28997,1.0,0.28997,1.0,1.0,1.0,0.0,inf,0.0,0.28997,0.0,0.644985
152,"(MS_Married, Employed)",(App_Denied),0.203575,1.0,0.203575,1.0,1.0,1.0,0.0,inf,0.0,0.203575,0.0,0.601787
