#### Imports Required Libraries

In [123]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

#### Load dataset and print two rows

In [124]:
df = pd.read_csv("Market_Basket_Optimisation.csv")
df.head(2)

Unnamed: 0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
0,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
1,chutney,,,,,,,,,,,,,,,,,,,


#### Print column names with total empty/NaN value counts

In [125]:
print("Column Name \t Total Null")
df.isnull().sum()

Column Name 	 Total Null


shrimp                  0
almonds              1754
avocado              3112
vegetables mix       4156
green grapes         4972
whole weat flour     5637
yams                 6132
cottage cheese       6520
energy drink         6847
tomato juice         7106
low fat yogurt       7245
green tea            7347
honey                7414
salad                7454
mineral water        7476
salmon               7493
antioxydant juice    7497
frozen smoothie      7497
spinach              7498
olive oil            7500
dtype: int64

#### Replaces 'NaN' values with empty string

In [126]:
df.fillna("", inplace=True)
df.head(2)

  df.fillna("", inplace=True)


Unnamed: 0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
0,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
1,chutney,,,,,,,,,,,,,,,,,,,


#### Encode the dataset into true/false values using tansaction encoder

In [127]:
te = TransactionEncoder()
encoded_array  = te.fit_transform(df.values.tolist())
encoded_array

array([[ True, False, False, ..., False, False, False],
       [ True, False, False, ..., False, False, False],
       [ True, False, False, ..., False, False, False],
       ...,
       [ True, False, False, ..., False, False, False],
       [ True, False, False, ..., False, False, False],
       [ True, False, False, ..., False,  True, False]], shape=(7500, 121))

#### print encoded array column names

In [128]:
te.columns_[:5]

['', ' asparagus', 'almonds', 'antioxydant juice', 'asparagus']


#### Convert the encoded array into pandas dataframe

In [129]:
transaction_df = pd.DataFrame(encoded_array, columns= te.columns_)
transaction_df.head(2)

Unnamed: 0,Unnamed: 1,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


#### Extract frequent items by using apriori algorithm

In [130]:
frequent_items = apriori(transaction_df, min_support=0.1, use_colnames=True)
frequent_items

Unnamed: 0,support,itemsets
0,1.0,()
1,0.163867,(chocolate)
2,0.179733,(eggs)
3,0.170933,(french fries)
4,0.132,(green tea)
5,0.1296,(milk)
6,0.238267,(mineral water)
7,0.174133,(spaghetti)
8,0.163867,"(, chocolate)"
9,0.179733,"(, eggs)"


#### Genreate association rules for frequent items

In [132]:
rules = association_rules(frequent_items, min_threshold=0.2)
rules

  cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(chocolate),(),0.163867,1.0,0.163867,1.0,1.0,1.0,0.0,inf,0.0,0.163867,0.0,0.581933
1,(eggs),(),0.179733,1.0,0.179733,1.0,1.0,1.0,0.0,inf,0.0,0.179733,0.0,0.589867
2,(french fries),(),0.170933,1.0,0.170933,1.0,1.0,1.0,0.0,inf,0.0,0.170933,0.0,0.585467
3,(green tea),(),0.132,1.0,0.132,1.0,1.0,1.0,0.0,inf,0.0,0.132,0.0,0.566
4,(milk),(),0.1296,1.0,0.1296,1.0,1.0,1.0,0.0,inf,0.0,0.1296,0.0,0.5648
5,(),(mineral water),1.0,0.238267,0.238267,0.238267,1.0,1.0,0.0,1.0,0.0,0.238267,0.0,0.619133
6,(mineral water),(),0.238267,1.0,0.238267,1.0,1.0,1.0,0.0,inf,0.0,0.238267,0.0,0.619133
7,(spaghetti),(),0.174133,1.0,0.174133,1.0,1.0,1.0,0.0,inf,0.0,0.174133,0.0,0.587067


#### Print association rules with necessary columns

In [133]:
rules[["antecedents", "consequents", "support", "confidence"]]

Unnamed: 0,antecedents,consequents,support,confidence
0,(chocolate),(),0.163867,1.0
1,(eggs),(),0.179733,1.0
2,(french fries),(),0.170933,1.0
3,(green tea),(),0.132,1.0
4,(milk),(),0.1296,1.0
5,(),(mineral water),0.238267,0.238267
6,(mineral water),(),0.238267,1.0
7,(spaghetti),(),0.174133,1.0
