In [2]:
import pandas as pd
from datetime import datetime
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import association_rules

In [3]:
df = pd.read_csv('OnlineRetail.csv', delimiter=';')

In [4]:
df = df[df.Country=='Japan']

In [5]:
df = df.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)

In [6]:
df['UnitPrice'] = df['UnitPrice'].str.replace(',', '.').astype(float)


In [7]:
df = df.dropna()
df=df[df['Quantity'] > 0]
df=df[df['UnitPrice'] > 0]

In [8]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
9783,537218,85016,SET OF 6 VINTAGE NOTELETS KIT,6,05/12/2010 15:46,2.55,12763.0,Japan
9784,537218,21506,"FANCY FONT BIRTHDAY CARD,",12,05/12/2010 15:46,0.42,12763.0,Japan
9785,537218,22716,CARD CIRCUS PARADE,12,05/12/2010 15:46,0.42,12763.0,Japan
9786,537218,22983,CARD BILLBOARD FONT,12,05/12/2010 15:46,0.42,12763.0,Japan
9787,537218,22030,SWALLOWS GREETING CARD,12,05/12/2010 15:46,0.42,12763.0,Japan


In [11]:
def return_one(x):
    return 1

In [10]:
transaction_filtered = df[['InvoiceNo','Description','Quantity']].copy()
transaction_filtered

Unnamed: 0,InvoiceNo,Description,Quantity
9783,537218,SET OF 6 VINTAGE NOTELETS KIT,6
9784,537218,"FANCY FONT BIRTHDAY CARD,",12
9785,537218,CARD CIRCUS PARADE,12
9786,537218,CARD BILLBOARD FONT,12
9787,537218,SWALLOWS GREETING CARD,12
...,...,...,...
475874,576923,RED RETROSPOT ROUND CAKE TINS,12
475875,576923,VINTAGE DOILY TRAVEL SEWING KIT,100
475876,576923,VINTAGE DOILY DELUXE SEWING KIT,40
475877,576923,PACK OF 12 VINTAGE DOILY TISSUES,144


In [12]:
table = pd.pivot_table(transaction_filtered, values='Quantity', index=['InvoiceNo'],
                    columns=['Description'], aggfunc=return_one, fill_value=0)
table.head()

Description,I LOVE LONDON MINI BACKPACK,12 PENCILS TALL TUBE RED RETROSPOT,20 DOLLY PEGS RETROSPOT,3 HOOK HANGER MAGIC GARDEN,36 PENCILS TUBE RED RETROSPOT,4 TRADITIONAL SPINNING TOPS,5 HOOK HANGER MAGIC TOADSTOOL,5 HOOK HANGER RED MAGIC TOADSTOOL,60 CAKE CASES DOLLY GIRL DESIGN,ABC TREASURE BOOK BOX,...,VINTAGE DONKEY TAIL GAME,VINTAGE KEEPSAKE BOX PARIS DAYS,VINTAGE LEAF MAGNETIC NOTEPAD,WALL TIDY RETROSPOT,WHITE HEART CONFETTI IN TUBE,WHITE SOAP RACK WITH 2 BOTTLES,WHITE WIRE EGG HOLDER,WOODEN HAPPY BIRTHDAY GARLAND,WOODEN SCHOOL COLOURING SET,WORLD WAR 2 GLIDERS ASSTD DESIGNS
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
537218,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
537899,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
538397,0,0,1,1,0,0,1,1,0,0,...,0,0,0,0,0,0,1,1,0,0
543179,0,0,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
543518,0,0,0,1,0,0,0,1,0,0,...,0,1,0,1,0,0,0,0,0,0


In [13]:
frequent_itemsets_aus_apriori = apriori(table, min_support=0.2, use_colnames=True)
frequent_itemsets_aus_apriori



Unnamed: 0,support,itemsets
0,0.210526,(BASKET OF TOADSTOOLS)
1,0.210526,(CHARLOTTE BAG DOLLY GIRL DESIGN)
2,0.210526,(LUNCH BAG DOLLY GIRL DESIGN)
3,0.210526,(MINI WOODEN HAPPY BIRTHDAY GARLAND)
4,0.263158,(PACK OF 12 TRADITIONAL CRAYONS)
5,0.210526,(RABBIT NIGHT LIGHT)
6,0.263158,(RED SPOTTY BISCUIT TIN)
7,0.210526,"(SET 3 RETROSPOT TEA,COFFEE,SUGAR)"
8,0.210526,(SET OF 72 PINK HEART PAPER DOILIES)
9,0.210526,(SET OF 72 RETROSPOT PAPER DOILIES)


In [14]:
rules = association_rules(frequent_itemsets_aus_apriori, metric="lift", min_threshold=0.2)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(BASKET OF TOADSTOOLS),(RED SPOTTY BISCUIT TIN),0.210526,0.263158,0.210526,1.0,3.8,0.155125,inf
1,(RED SPOTTY BISCUIT TIN),(BASKET OF TOADSTOOLS),0.263158,0.210526,0.210526,0.8,3.8,0.155125,3.947368
2,"(SET 3 RETROSPOT TEA,COFFEE,SUGAR)",(BASKET OF TOADSTOOLS),0.210526,0.210526,0.210526,1.0,4.75,0.166205,inf
3,(BASKET OF TOADSTOOLS),"(SET 3 RETROSPOT TEA,COFFEE,SUGAR)",0.210526,0.210526,0.210526,1.0,4.75,0.166205,inf
4,(LUNCH BAG DOLLY GIRL DESIGN),(CHARLOTTE BAG DOLLY GIRL DESIGN),0.210526,0.210526,0.210526,1.0,4.75,0.166205,inf
5,(CHARLOTTE BAG DOLLY GIRL DESIGN),(LUNCH BAG DOLLY GIRL DESIGN),0.210526,0.210526,0.210526,1.0,4.75,0.166205,inf
6,(RED SPOTTY BISCUIT TIN),(CHARLOTTE BAG DOLLY GIRL DESIGN),0.263158,0.210526,0.210526,0.8,3.8,0.155125,3.947368
7,(CHARLOTTE BAG DOLLY GIRL DESIGN),(RED SPOTTY BISCUIT TIN),0.210526,0.263158,0.210526,1.0,3.8,0.155125,inf
8,(LUNCH BAG DOLLY GIRL DESIGN),(RED SPOTTY BISCUIT TIN),0.210526,0.263158,0.210526,1.0,3.8,0.155125,inf
9,(RED SPOTTY BISCUIT TIN),(LUNCH BAG DOLLY GIRL DESIGN),0.263158,0.210526,0.210526,0.8,3.8,0.155125,3.947368


In [124]:
from apyori import apriori
association_rules = apriori(table, min_support=0.1, min_confidence=0.6, min_lift=4, min_length=2)
association_results = list(association_rules)

#Lihat jumlah aturan asosiasi yg dihasilkan
print(len(association_results))
print()
#Lihat aturan asosiasi pertama (ke-0)
print(association_results[0])

KeyboardInterrupt: 