### Importing libraries

In [3]:
import numpy as np
import pandas as pd
import apyori as ap

### Importing dataset

In [4]:
df = pd.read_csv('./datasets/groceries - groceries.csv', header = None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,23,24,25,26,27,28,29,30,31,32
0,Item(s),Item 1,Item 2,Item 3,Item 4,Item 5,Item 6,Item 7,Item 8,Item 9,...,Item 23,Item 24,Item 25,Item 26,Item 27,Item 28,Item 29,Item 30,Item 31,Item 32
1,4,citrus fruit,semi-finished bread,margarine,ready soups,,,,,,...,,,,,,,,,,
2,3,tropical fruit,yogurt,coffee,,,,,,,...,,,,,,,,,,
3,1,whole milk,,,,,,,,,...,,,,,,,,,,
4,4,pip fruit,yogurt,cream cheese,meat spreads,,,,,,...,,,,,,,,,,


### Dataset shape

In [5]:
df.shape

(9836, 33)

### Preprocessing

In [6]:
transactions = []
for row in range(1,9836):
    transactions.append([str(df.values[row,col]) for col in range(1,33)])

In [30]:
print(transactions[0])

['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan']


### Applying apriori algorithm

In [7]:
rules = ap.apriori(transactions = transactions, min_support = 0.01, min_confidence = 0.4, min_lift = 2, min_length = 2, max_length = 2)

In [8]:
results = list(rules)
results

[RelationRecord(items=frozenset({'other vegetables', 'chicken'}), support=0.017895271987798677, ordered_statistics=[OrderedStatistic(items_base=frozenset({'chicken'}), items_add=frozenset({'other vegetables'}), confidence=0.4170616113744075, lift=2.1554392789633723)]),
 RelationRecord(items=frozenset({'other vegetables', 'hamburger meat'}), support=0.013828164717844434, ordered_statistics=[OrderedStatistic(items_base=frozenset({'hamburger meat'}), items_add=frozenset({'other vegetables'}), confidence=0.41590214067278286, lift=2.149446954028807)]),
 RelationRecord(items=frozenset({'other vegetables', 'onions'}), support=0.014234875444839857, ordered_statistics=[OrderedStatistic(items_base=frozenset({'onions'}), items_add=frozenset({'other vegetables'}), confidence=0.45901639344262296, lift=2.372268118501417)]),
 RelationRecord(items=frozenset({'other vegetables', 'root vegetables'}), support=0.047381799694966954, ordered_statistics=[OrderedStatistic(items_base=frozenset({'root vegetable

In [9]:
# function for visualizing the results in a better way

def inspect(results):
    lhs         = [','.join(list(result[2][0][0])) for result in results]
    rhs         = [list(result[2][0][1]) for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))


In [10]:
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])
resultsinDataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,chicken,[other vegetables],0.017895,0.417062,2.155439
1,hamburger meat,[other vegetables],0.013828,0.415902,2.149447
2,onions,[other vegetables],0.014235,0.459016,2.372268
3,root vegetables,[other vegetables],0.047382,0.434701,2.246605
4,whipped/sour cream,[other vegetables],0.028876,0.402837,2.081924


In [11]:
rules = ap.apriori(transactions = transactions, min_support = 0.015, min_confidence = 0.4, min_lift = 2, min_length = 3, max_length = 3)

In [12]:
results = list(rules)
results

[RelationRecord(items=frozenset({'other vegetables', 'chicken'}), support=0.017895271987798677, ordered_statistics=[OrderedStatistic(items_base=frozenset({'chicken'}), items_add=frozenset({'other vegetables'}), confidence=0.4170616113744075, lift=2.1554392789633723)]),
 RelationRecord(items=frozenset({'other vegetables', 'root vegetables'}), support=0.047381799694966954, ordered_statistics=[OrderedStatistic(items_base=frozenset({'root vegetables'}), items_add=frozenset({'other vegetables'}), confidence=0.43470149253731344, lift=2.2466049285887952)]),
 RelationRecord(items=frozenset({'other vegetables', 'whipped/sour cream'}), support=0.02887646161667514, ordered_statistics=[OrderedStatistic(items_base=frozenset({'whipped/sour cream'}), items_add=frozenset({'other vegetables'}), confidence=0.40283687943262414, lift=2.081923651718265)]),
 RelationRecord(items=frozenset({'other vegetables', 'nan', 'chicken'}), support=0.017793594306049824, ordered_statistics=[OrderedStatistic(items_base=f

In [13]:
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [14]:
resultsinDataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,chicken,[other vegetables],0.017895,0.417062,2.155439
1,root vegetables,[other vegetables],0.047382,0.434701,2.246605
2,whipped/sour cream,[other vegetables],0.028876,0.402837,2.081924
3,chicken,"[other vegetables, nan]",0.017794,0.414692,2.144319
4,root vegetables,"[other vegetables, nan]",0.04728,0.433769,2.242963
5,whipped/sour cream,"[other vegetables, nan]",0.028775,0.401418,2.075684
6,"whole milk,root vegetables",[other vegetables],0.023183,0.474012,2.44977
7,"tropical fruit,whole milk",[other vegetables],0.017082,0.403846,2.08714
8,"other vegetables,yogurt",[whole milk],0.022267,0.512881,2.007235
9,"tropical fruit,yogurt",[whole milk],0.01515,0.517361,2.02477


In [15]:
rhs = list(resultsinDataFrame['Right Hand Side'])

In [16]:
indices = []
for i, item in enumerate(rhs):
    if('nan' in item):
        indices.append(i)
print(indices)

[3, 4, 5]


In [17]:
resultsinDataFrame.drop(indices, inplace=True)
resultsinDataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,chicken,[other vegetables],0.017895,0.417062,2.155439
1,root vegetables,[other vegetables],0.047382,0.434701,2.246605
2,whipped/sour cream,[other vegetables],0.028876,0.402837,2.081924
6,"whole milk,root vegetables",[other vegetables],0.023183,0.474012,2.44977
7,"tropical fruit,whole milk",[other vegetables],0.017082,0.403846,2.08714
8,"other vegetables,yogurt",[whole milk],0.022267,0.512881,2.007235
9,"tropical fruit,yogurt",[whole milk],0.01515,0.517361,2.02477


In [None]:
resultsinDataFrame.nlargest(n = 10, columns = 'Lift')

In [1]:
import matplotlib.pyplot as plt


ModuleNotFoundError: No module named 'matplotlib'