In [61]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from csv import reader
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [62]:
groceries = []
with open('basket_analysis.csv', 'r') as read_obj:
    csv_reader = reader(read_obj)
    for row in csv_reader:
        groceries.append(row)
groceries[0:5]

[['',
  'Apple',
  'Bread',
  'Butter',
  'Cheese',
  'Corn',
  'Dill',
  'Eggs',
  'Ice cream',
  'Kidney Beans',
  'Milk',
  'Nutmeg',
  'Onion',
  'Sugar',
  'Unicorn',
  'Yogurt',
  'chocolate'],
 ['0',
  'False',
  'True',
  'False',
  'False',
  'True',
  'True',
  'False',
  'True',
  'False',
  'False',
  'False',
  'False',
  'True',
  'False',
  'True',
  'True'],
 ['1',
  'False',
  'False',
  'False',
  'False',
  'False',
  'False',
  'False',
  'False',
  'False',
  'True',
  'False',
  'False',
  'False',
  'False',
  'False',
  'False'],
 ['2',
  'True',
  'False',
  'True',
  'False',
  'False',
  'True',
  'False',
  'True',
  'False',
  'True',
  'False',
  'False',
  'False',
  'False',
  'True',
  'True'],
 ['3',
  'False',
  'False',
  'True',
  'True',
  'False',
  'True',
  'False',
  'False',
  'False',
  'True',
  'True',
  'True',
  'False',
  'False',
  'False',
  'False']]

In [63]:
encoder = TransactionEncoder()
transactions = encoder.fit(groceries).transform(groceries)
transactions

array([[ True, False, False, ...,  True,  True,  True],
       [False,  True, False, ..., False, False, False],
       [False, False,  True, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [64]:
transactions = transactions.astype('int')
transactions

array([[1, 0, 0, ..., 1, 1, 1],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [65]:
df = pd.DataFrame(transactions, columns=encoder.columns_)

In [66]:
df.head()

Unnamed: 0,Unnamed: 1,0,1,10,100,101,102,103,104,105,...,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Sugar,True,Unicorn,Yogurt,chocolate
0,1,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,0,1,1,1
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [67]:
df.shape

(1000, 1018)

In [68]:
frequent_itemsets = apriori(df, min_support=0.02, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.999,(False),1
1,0.999,(True),1
2,0.999,"(True, False)",2


In [69]:
frequent_itemsets[(frequent_itemsets['length'] > 1) & 
                  (frequent_itemsets['support'] >= 0.05)]

Unnamed: 0,support,itemsets,length
2,0.999,"(True, False)",2


In [70]:
frequent_itemsets[(frequent_itemsets['length'] == 2) & 
                  (frequent_itemsets['support'] >= 0.02)]

Unnamed: 0,support,itemsets,length
2,0.999,"(True, False)",2


In [71]:
rules = association_rules(frequent_itemsets, metric='support', min_threshold=0.02)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(True),(False),0.999,0.999,0.999,1.0,1.001001,0.000999,inf
1,(False),(True),0.999,0.999,0.999,1.0,1.001001,0.000999,inf


In [72]:
rules.sort_values(by='confidence', ascending=False)[0:10]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(True),(False),0.999,0.999,0.999,1.0,1.001001,0.000999,inf
1,(False),(True),0.999,0.999,0.999,1.0,1.001001,0.000999,inf


In [73]:
rules[(rules['support'] >= 0.02) &
      (rules['lift'] > 1.0)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(True),(False),0.999,0.999,0.999,1.0,1.001001,0.000999,inf
1,(False),(True),0.999,0.999,0.999,1.0,1.001001,0.000999,inf
