### Imports Required Libraries

In [12]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules

In [13]:
df = pd.read_csv("Market_Basket_Optimisation.csv")
df.head(2)

Unnamed: 0,item_1,item_2,item_3,item_4,item_5,item_6,item_7,item_8,item_9,item_10,item_11,item_12,item_13,item_14,item_15,item_16,item_17,item_18,item_19,item_20
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,


### Print column names with total empty/NaN value counts

In [14]:
print('Columns \t Total Empty')
print('----------------------------')
print(df.isnull().sum())

Columns 	 Total Empty
----------------------------
item_1        0
item_2     1754
item_3     3112
item_4     4156
item_5     4972
item_6     5637
item_7     6132
item_8     6520
item_9     6847
item_10    7106
item_11    7245
item_12    7347
item_13    7414
item_14    7454
item_15    7476
item_16    7493
item_17    7497
item_18    7497
item_19    7498
item_20    7500
dtype: int64


### Replaces 'NaN' values with empty string

In [15]:
df = df.fillna("")
df.head(2)

Unnamed: 0,item_1,item_2,item_3,item_4,item_5,item_6,item_7,item_8,item_9,item_10,item_11,item_12,item_13,item_14,item_15,item_16,item_17,item_18,item_19,item_20
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,


### Encode the dataset into true/false values using tansaction encoder

In [16]:
te = TransactionEncoder()
encoded_array = te.fit_transform(df.values.tolist())

### Convert the encoded array into pandas dataframe

In [17]:
transaction_df = pd.DataFrame(encoded_array, columns=te.columns_)
transaction_df.head(2)

Unnamed: 0,Unnamed: 1,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,False,True,True,False,True,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


### Extract frequent items by using fp-growth algorithm

In [18]:
frequent_items = fpgrowth(transaction_df, min_support=0.1, use_colnames=True)
frequent_items

Unnamed: 0,support,itemsets
0,0.238368,(mineral water)
1,0.132116,(green tea)
2,0.999867,()
3,0.179709,(eggs)
4,0.129583,(milk)
5,0.170911,(french fries)
6,0.17411,(spaghetti)
7,0.163845,(chocolate)
8,0.238235,"(, mineral water)"
9,0.131982,"(, green tea)"


### Genreate association rules for frequent items

In [19]:
rules = association_rules(frequent_items, min_threshold=0.2)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(),(mineral water),0.999867,0.238368,0.238235,0.238267,0.999574,1.0,-0.000102,0.999867,-0.761733,0.238235,-0.000133,0.618854
1,(mineral water),(),0.238368,0.999867,0.238235,0.999441,0.999574,1.0,-0.000102,0.238368,-0.000559,0.238235,-3.19519,0.618854
2,(green tea),(),0.132116,0.999867,0.131982,0.998991,0.999124,1.0,-0.000116,0.132116,-0.001009,0.131982,-6.569122,0.565495
3,(eggs),(),0.179709,0.999867,0.179709,1.0,1.000133,1.0,2.4e-05,inf,0.000163,0.179733,1.0,0.589867
4,(milk),(),0.129583,0.999867,0.129583,1.0,1.000133,1.0,1.7e-05,inf,0.000153,0.1296,1.0,0.5648
5,(french fries),(),0.170911,0.999867,0.170911,1.0,1.000133,1.0,2.3e-05,inf,0.000161,0.170933,1.0,0.585467
6,(spaghetti),(),0.17411,0.999867,0.17411,1.0,1.000133,1.0,2.3e-05,inf,0.000161,0.174133,1.0,0.587067
7,(chocolate),(),0.163845,0.999867,0.163845,1.0,1.000133,1.0,2.2e-05,inf,0.000159,0.163867,1.0,0.581933


### Print association rules with necessary columns

In [20]:
rules[["antecedents", "consequents", "support", "confidence"]]

Unnamed: 0,antecedents,consequents,support,confidence
0,(),(mineral water),0.238235,0.238267
1,(mineral water),(),0.238235,0.999441
2,(green tea),(),0.131982,0.998991
3,(eggs),(),0.179709,1.0
4,(milk),(),0.129583,1.0
5,(french fries),(),0.170911,1.0
6,(spaghetti),(),0.17411,1.0
7,(chocolate),(),0.163845,1.0
