### Installing the in-built package for Apriori Algorithm

In [1]:
!pip3 install apyori

Collecting apyori
  Downloading https://files.pythonhosted.org/packages/25/fd/0561e2dd29aeed544bad2d1991636e38700cdaef9530490b863741f35295/apyori-1.1.1.tar.gz
Building wheels for collected packages: apyori
  Running setup.py bdist_wheel for apyori ... [?25ldone
[?25h  Stored in directory: /Users/umasubbiah/Library/Caches/pip/wheels/7b/2a/35/c0c3749c1a36d4f454ea22d8396e1b854b86340d63cbbb7949
Successfully built apyori
Installing collected packages: apyori
Successfully installed apyori-1.1.1


In [2]:
import numpy as np
import pandas as pd
from apyori import apriori

### The dataset is a handmade dataset with 22 rows and 6 columns, 
source : https://intellipaat.com/blog/data-science-apriori-algorithm/#Dataset

In [35]:
store_data = pd.read_csv("data1.csv",header=None)
store_data.columns = (['Item1','Item2','Item3','Item4','Item5','Item6'])
print(store_data.shape)
store_data.head()

(22, 6)


Unnamed: 0,Item1,Item2,Item3,Item4,Item5,Item6
0,Wine,Chips,Bread,Butter,Milk,Apple
1,Wine,,Bread,Butter,Milk,
2,,,Bread,Butter,Milk,
3,,Chips,,,,Apple
4,Wine,Chips,Bread,Butter,Milk,Apple


### Making a recordset of all the records in the dataset

In [41]:
records = []
for i in range(0, 22):
    records.append([str(store_data.values[i,j]) for j in range(0, 6)])

###  Support is the fractions of the transactions that contain both X and Y. 
### Confidence measures how often items in Y appears in transactions that contain X.
### Lift says how likely item Y is purchased when item X is purchased, while controlling for how popular item Y is

### For a moderate suppot of 0.5 and a confidence of 0.75

In [None]:
association_rules = apriori(records, 
                            min_support=0.50, 
                            min_confidence=0.75, 
                            min_lift=1.2, 
                            min_length=2)
association_results = list(association_rules)

In [47]:
print(len(association_results))

1


In [48]:
# pretty printing the rules, code reference : 
# https://stackabuse.com/association-rule-mining-via-apriori-algorithm-in-python/


for item in association_results:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    print("Rule: " + items[0] + " -> " + items[1])

    #second index of the inner list
    print("Support: " + str(item[1]))

    #third index of the list located at 0th
    #of the third index of the inner list

    print("Confidence: " + str(item[2][0][2]))
    print("=====================================")

Rule: Milk -> Bread
Support: 0.5
Confidence: 0.8461538461538461


## For higher confidence (0.8) and minimal support (0.2)

In [None]:
association_rules = apriori(records, 
                            min_support=0.20, 
                            min_confidence=0.8, 
                            min_lift=1.2, 
                            min_length=2)
association_results = list(association_rules)


# pretty printing the rules, code reference : 
# https://stackabuse.com/association-rule-mining-via-apriori-algorithm-in-python/


for item in association_results:

    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    print("Rule: " + items[0] + " -> " + items[1])

    #second index of the inner list
    print("Support: " + str(item[1]))

    #third index of the list located at 0th
    #of the third index of the inner list

    print("Confidence: " + str(item[2][0][2]))
    print("=====================================")

## A second implementation using mlxtend package instead

source : https://www.geeksforgeeks.org/implementing-apriori-algorithm-in-python/

In [56]:
import numpy as np 
import pandas as pd 
from mlxtend.frequent_patterns import apriori, association_rules 

In [60]:
# Loading the Data 
data = pd.read_excel('Online Retail.xlsx') 
data.head() 

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [61]:
# Exploring the columns of the data 
data.columns 

Index(['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'UnitPrice', 'CustomerID', 'Country'],
      dtype='object')

In [62]:
# Stripping extra spaces in the description 
data['Description'] = data['Description'].str.strip() 

# Dropping the rows without any invoice number 
data.dropna(axis = 0, subset =['InvoiceNo'], inplace = True) 
data['InvoiceNo'] = data['InvoiceNo'].astype('str') 

# Dropping all transactions which were done on credit 
data = data[~data['InvoiceNo'].str.contains('C')] 



In [63]:
# Transactions done in France 
basket_France = (data[data['Country'] =="France"] 
		.groupby(['InvoiceNo', 'Description'])['Quantity'] 
		.sum().unstack().reset_index().fillna(0) 
		.set_index('InvoiceNo')) 

# Transactions done in the United Kingdom 
basket_UK = (data[data['Country'] =="United Kingdom"] 
		.groupby(['InvoiceNo', 'Description'])['Quantity'] 
		.sum().unstack().reset_index().fillna(0) 
		.set_index('InvoiceNo')) 

# Transactions done in Portugal 
basket_Por = (data[data['Country'] =="Portugal"] 
		.groupby(['InvoiceNo', 'Description'])['Quantity'] 
		.sum().unstack().reset_index().fillna(0) 
		.set_index('InvoiceNo')) 

basket_Sweden = (data[data['Country'] =="Sweden"] 
		.groupby(['InvoiceNo', 'Description'])['Quantity'] 
		.sum().unstack().reset_index().fillna(0) 
		.set_index('InvoiceNo')) 



In [64]:
# Defining the hot encoding function to make the data suitable 
# for the concerned libraries 
def hot_encode(x): 
	if(x<= 0): 
		return 0
	if(x>= 1): 
		return 1

# Encoding the datasets 
basket_encoded = basket_France.applymap(hot_encode) 
basket_France = basket_encoded 


## Building rules based on Confidence measure

In [70]:
# Building the model 
frq_items = apriori(basket_France, min_support = 0.05, use_colnames = True) 

# Collecting the inferred rules in a dataframe 
rules = association_rules(frq_items, metric ="confidence", min_threshold = 1) 
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False]) 
print(rules.head()) 



                                         antecedents consequents  \
0                       (JUMBO BAG WOODLAND ANIMALS)   (POSTAGE)   
1  (PLASTERS IN TIN CIRCUS PARADE, RED TOADSTOOL ...   (POSTAGE)   
2  (PLASTERS IN TIN WOODLAND ANIMALS, RED TOADSTO...   (POSTAGE)   

   antecedent support  consequent support   support  confidence      lift  \
0            0.076531            0.765306  0.076531         1.0  1.306667   
1            0.051020            0.765306  0.051020         1.0  1.306667   
2            0.053571            0.765306  0.053571         1.0  1.306667   

   leverage  conviction  
0  0.017961         inf  
1  0.011974         inf  
2  0.012573         inf  


In [80]:
for i in range(len(rules.antecedents)):
    print(rules.antecedents[i]," ===> ",rules.consequents[i])

frozenset({'JUMBO BAG WOODLAND ANIMALS'})  ===>  frozenset({'POSTAGE'})
frozenset({'PLASTERS IN TIN CIRCUS PARADE', 'RED TOADSTOOL LED NIGHT LIGHT'})  ===>  frozenset({'POSTAGE'})
frozenset({'PLASTERS IN TIN WOODLAND ANIMALS', 'RED TOADSTOOL LED NIGHT LIGHT'})  ===>  frozenset({'POSTAGE'})
