In [30]:
# importing required libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from apyori import apriori
from sklearn.preprocessing import LabelEncoder


#### 1. Loading data

In [58]:
df = pd.read_csv('bankdata_csv_all.csv')

In [59]:
df.head()

Unnamed: 0,id,age,sex,region,income,married,children,car,save_act,current_act,mortgage,pep
0,ID12101,48,FEMALE,INNER_CITY,17546.0,NO,1,NO,NO,NO,NO,YES
1,ID12102,40,MALE,TOWN,30085.1,YES,3,YES,NO,YES,YES,NO
2,ID12103,51,FEMALE,INNER_CITY,16575.4,YES,0,YES,YES,YES,NO,NO
3,ID12104,23,FEMALE,TOWN,20375.4,YES,3,NO,NO,YES,NO,NO
4,ID12105,57,FEMALE,RURAL,50576.3,YES,0,NO,YES,NO,NO,NO


In [60]:
df.shape

(600, 12)

#### 2. Data Pre-Processing

In [64]:
df.isnull().sum() # no null values

id             0
age            0
sex            0
region         0
income         0
married        0
children       0
car            0
save_act       0
current_act    0
mortgage       0
pep            0
dtype: int64

In [65]:
df.isna().sum() # no NA values

id             0
age            0
sex            0
region         0
income         0
married        0
children       0
car            0
save_act       0
current_act    0
mortgage       0
pep            0
dtype: int64

In [66]:
# removing the id field
df.drop('id',axis=1, inplace=True)

In [69]:
# summary of the statistical data
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
age,600.0,42.395,14.424947,18.0,30.0,42.0,55.25,67.0
income,600.0,27524.031217,12899.468246,5014.21,17264.5,24925.3,36172.675,63130.1
children,600.0,1.011667,1.056752,0.0,0.0,1.0,2.0,3.0


In [52]:
# Discretizing age column - converting numeric value to categorical value
df['age_group']=pd.cut(df['age'],bins=[0,10,20,30,40,50,60,100], 
                      labels=["child","teens","twenties","thirties","fourties","fifties","old"])

In [56]:
df.head()

Unnamed: 0,age,sex,region,income,married,children,car,save_act,current_act,mortgage,pep,age_group,gender_dummy,married_dummy,car_dummy,savingsaccnt_dummy,currentaccnt_dummy,mortgage_dummy,pep_dummy
0,48,FEMALE,INNER_CITY,17546.0,NO,1,NO,NO,NO,NO,YES,fourties,0,0,0,0,0,0,1
1,40,MALE,TOWN,30085.1,YES,3,YES,NO,YES,YES,NO,thirties,1,1,1,0,1,1,0
2,51,FEMALE,INNER_CITY,16575.4,YES,0,YES,YES,YES,NO,NO,fifties,0,1,1,1,1,0,0
3,23,FEMALE,TOWN,20375.4,YES,3,NO,NO,YES,NO,NO,twenties,0,1,0,0,1,0,0
4,57,FEMALE,RURAL,50576.3,YES,0,NO,YES,NO,NO,NO,fifties,0,1,0,1,0,0,0


In [54]:
# discretizing sex,married,car,save_act,current_act,mortgage,pep columns by label encoding
le = LabelEncoder()
df['gender_dummy'] = le.fit_transform(df['sex']) # female - 0 , male-1
df['married_dummy'] = le.fit_transform(df['married']) # No - 0, Yes - 1
df['car_dummy'] = le.fit_transform(df['car'])
df['savingsaccnt_dummy'] = le.fit_transform(df['save_act'])
df['currentaccnt_dummy'] = le.fit_transform(df['current_act'])
df['mortgage_dummy'] = le.fit_transform(df['mortgage'])
df['pep_dummy'] = le.fit_transform(df['pep'])

In [55]:
df.head()

Unnamed: 0,age,sex,region,income,married,children,car,save_act,current_act,mortgage,pep,age_group,gender_dummy,married_dummy,car_dummy,savingsaccnt_dummy,currentaccnt_dummy,mortgage_dummy,pep_dummy
0,48,FEMALE,INNER_CITY,17546.0,NO,1,NO,NO,NO,NO,YES,fourties,0,0,0,0,0,0,1
1,40,MALE,TOWN,30085.1,YES,3,YES,NO,YES,YES,NO,thirties,1,1,1,0,1,1,0
2,51,FEMALE,INNER_CITY,16575.4,YES,0,YES,YES,YES,NO,NO,fifties,0,1,1,1,1,0,0
3,23,FEMALE,TOWN,20375.4,YES,3,NO,NO,YES,NO,NO,twenties,0,1,0,0,1,0,0
4,57,FEMALE,RURAL,50576.3,YES,0,NO,YES,NO,NO,NO,fifties,0,1,0,1,0,0,0


In [75]:
bankdata_oneHot = pd.get_dummies(df)

In [76]:
bankdata_oneHot

Unnamed: 0,age,income,children,sex_FEMALE,sex_MALE,region_INNER_CITY,region_RURAL,region_SUBURBAN,region_TOWN,married_NO,...,car_NO,car_YES,save_act_NO,save_act_YES,current_act_NO,current_act_YES,mortgage_NO,mortgage_YES,pep_NO,pep_YES
0,48,17546.00,1,1,0,1,0,0,0,1,...,1,0,1,0,1,0,1,0,0,1
1,40,30085.10,3,0,1,0,0,0,1,0,...,0,1,1,0,0,1,0,1,1,0
2,51,16575.40,0,1,0,1,0,0,0,0,...,0,1,0,1,0,1,1,0,1,0
3,23,20375.40,3,1,0,0,0,0,1,0,...,1,0,1,0,0,1,1,0,1,0
4,57,50576.30,0,1,0,0,1,0,0,0,...,1,0,0,1,1,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,61,47025.00,2,1,0,1,0,0,0,1,...,0,1,0,1,0,1,0,1,1,0
596,30,9672.25,0,1,0,1,0,0,0,0,...,0,1,0,1,0,1,1,0,1,0
597,31,15976.30,0,1,0,0,0,0,1,0,...,0,1,0,1,1,0,1,0,0,1
598,29,14711.80,0,0,1,1,0,0,0,0,...,1,0,0,1,1,0,0,1,1,0


#### 3. Association Rule Mining

In [32]:
records = []
for i in range(df.shape[0]):
    records.append([str(df.values[i,j]) for j in range(df.shape[1])])

In [47]:
association_rules = apriori(records, min_support=0.009, min_confidence=0.2, min_lift=3, min_length=2)
association_results = list(association_rules)

KeyboardInterrupt: 

In [45]:
association_results

[]

Reference:
https://stackabuse.com/association-rule-mining-via-apriori-algorithm-in-python/