# Market Basket Analysis Using mlxtend for french store data.

* In this example dataset used by french store which contain 7500 transaction of different itemsets.

---

## 1. Import Required Library

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

## 2.Load Data

In [2]:
store_data = pd.read_csv("./Data/store_data.csv", header=None)
display(store_data.head())
print(store_data.shape)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


(7501, 20)


## 3.Preprocess and Convert into list

In [3]:
records = []
for i in range(1, 7501):
    records.append([str(store_data.values[i, j]) for j in range(0, 20)])

In [4]:
print(type(records))

<class 'list'>


## 4.Transform dataset to train

In [5]:
te = TransactionEncoder()
te_try = te.fit(records).transform(records)

In [6]:
df = pd.DataFrame(te_try, columns=te.columns_)

In [7]:
df.head()

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


## 5. Train data and generate rule

In [8]:
apriori(df,min_support=0.020, use_colnames=True).head()

Unnamed: 0,support,itemsets
0,0.020267,(almonds)
1,0.0332,(avocado)
2,0.033733,(brownies)
3,0.0872,(burgers)
4,0.030133,(butter)


## 6.Generate frequent Itemset with support 0.05

In [9]:
frequent_itemsets = apriori(df, min_support=0.05, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets.head()

Unnamed: 0,support,itemsets,length
0,0.0872,(burgers),1
1,0.081067,(cake),1
2,0.06,(chicken),1
3,0.163867,(chocolate),1
4,0.0804,(cookies),1


## 7.Generate frequent Itemset with support and number of iteration record value

In [10]:
apriori(df, min_support=0.06, use_colnames=True, verbose=1).head(10)

Iteration: 1 | Sampling itemset size 2Iteration: 2 | Sampling itemset size 2Iteration: 3 | Sampling itemset size 2Iteration: 4 | Sampling itemset size 2Iteration: 5 | Sampling itemset size 2Iteration: 6 | Sampling itemset size 2Iteration: 7 | Sampling itemset size 2Iteration: 8 | Sampling itemset size 2Iteration: 9 | Sampling itemset size 2Iteration: 10 | Sampling itemset size 2Iteration: 11 | Sampling itemset size 2Iteration: 12 | Sampling itemset size 2Iteration: 13 | Sampling itemset size 2Iteration: 14 | Sampling itemset size 2Iteration: 15 | Sampling itemset size 2Iteration: 16 | Sampling itemset size 2Iteration: 17 | Sampling itemset size 2Iteration: 18 | Sampling itemset size 2Iteration: 19 | Sampling itemset size 2Iteration: 20 | Sampling itemset size 2Iteration: 21 | Sampling itemset size 2Iteration: 22 | Sampling itemset size 2Iteration: 23 | Sampling itemset size 2Iteration: 24 | Sampling itemset size 2Iteration: 25 | Sampling itemset size 2Iteratio

Unnamed: 0,support,itemsets
0,0.0872,(burgers)
1,0.081067,(cake)
2,0.06,(chicken)
3,0.163867,(chocolate)
4,0.0804,(cookies)
5,0.179733,(eggs)
6,0.079333,(escalope)
7,0.170933,(french fries)
8,0.0632,(frozen smoothie)
9,0.095333,(frozen vegetables)


In [11]:
frequent_itemsets[ (frequent_itemsets['length'] == 2) & (frequent_itemsets['support'] >= 0.02) ].head(10)

Unnamed: 0,support,itemsets,length
26,0.0872,"(burgers, nan)",2
27,0.081067,"(nan, cake)",2
28,0.06,"(chicken, nan)",2
29,0.052667,"(mineral water, chocolate)",2
30,0.163867,"(chocolate, nan)",2
31,0.0804,"(nan, cookies)",2
32,0.051067,"(nan, cooking oil)",2
33,0.050933,"(mineral water, eggs)",2
34,0.179733,"(eggs, nan)",2
35,0.079333,"(escalope, nan)",2


In [12]:
frequent_itemsets[ frequent_itemsets['itemsets'] == {'spaghetti', 'mineral water'}]

Unnamed: 0,support,itemsets,length
45,0.059733,"(mineral water, spaghetti)",2
