## Dataset

In [2]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [3]:
# Import Dataset
data = pd.read_excel('Online Retail II.xlsx')
data.head()

  and should_run_async(code)


Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085.0,United Kingdom
1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
2,489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01 07:45:00,2.1,13085.0,United Kingdom
4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01 07:45:00,1.25,13085.0,United Kingdom


In [4]:
# Menampilkan Kolom dari Dataset
data.columns

  and should_run_async(code)


Index(['Invoice', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'Price', 'Customer ID', 'Country'],
      dtype='object')

In [5]:
# Data
data.shape

  and should_run_async(code)


(1048575, 8)

In [6]:
# Mengecek missing value
data.isnull().values.any()

  and should_run_async(code)


True

In [7]:
# Menampilkan missing value
data.isnull().sum()

  and should_run_async(code)


Invoice             0
StockCode           0
Description      4372
Quantity            0
InvoiceDate         0
Price               0
Customer ID    236682
Country             0
dtype: int64

## Preprocessing

In [8]:
# Menghapus row yang tidak memiliki invoice number
data.dropna(axis = 0, subset =['Invoice'], inplace = True)
data['Invoice'] = data['Invoice'].astype('str')

#Menghapus transaksi yang menggunakan kredit
data = data[~data['Invoice'].str.contains('C')]

  and should_run_async(code)


In [9]:
# Menampilkan negara yang ada dalam dataset
data.Country.unique()

  and should_run_async(code)


array(['United Kingdom', 'France', 'USA', 'Belgium', 'Australia', 'EIRE',
       'Germany', 'Portugal', 'Denmark', 'Netherlands', 'Poland',
       'Channel Islands', 'Spain', 'Cyprus', 'Greece', 'Norway',
       'Austria', 'Sweden', 'United Arab Emirates', 'Finland', 'Italy',
       'Switzerland', 'Japan', 'Unspecified', 'Nigeria', 'Malta',
       'Bahrain', 'RSA', 'Bermuda', 'Hong Kong', 'Singapore', 'Thailand',
       'Israel', 'Lithuania', 'West Indies', 'Lebanon', 'Korea', 'Brazil',
       'Canada', 'Iceland', 'Saudi Arabia', 'Czech Republic',
       'European Community'], dtype=object)

In [10]:
# Membagi data berdasarkan wilayah transaksi
# Transaksi yang dilakukan di Switzerland
basket_Switzerland = (data[data['Country'] =="Switzerland"]
          .groupby(['Invoice','Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('Invoice'))

  and should_run_async(code)


In [11]:
# Mendeskripsikan fungsi hot encode untuk membuat data yang sesuai
def hot_encode(x):
    if(x<= 0):
        return 0
    if(x>= 1):
        return 1

  and should_run_async(code)


In [12]:
# Menerapkan hot encoding
basket_encoded = basket_Switzerland.applymap(hot_encode)
basket_Switzerland = basket_encoded

  and should_run_async(code)


In [13]:
basket_Switzerland.head()

  and should_run_async(code)


Description,4 PURPLE FLOCK DINNER CANDLES,CHERRY BLOSSOM DECORATIVE FLASK,RED SPOT GIFT BAG LARGE,RED/WHITE DOT MINI CASES,SPACEBOY BABY GIFT SET,10 COLOUR SPACEBOY PEN,12 COLOURED PARTY BALLOONS,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE RED SPOTTY,12 PENCILS SMALL TUBE SKULL,...,WRAP PAISLEY PARK,WRAP PINK FAIRY CAKES,WRAP RED APPLES,WRAP RED VINTAGE DOILY,WRAP SUKI AND FRIENDS,"WRAP,SUKI AND FRIENDS",YELLOW METAL CHICKEN HEART,YOU'RE CONFUSING ME METAL SIGN,ZINC METAL HEART DECORATION,ZINC WILLIE WINKIE CANDLE STICK
Invoice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
492446,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
497594,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
500157,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
500158,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
501813,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Membangun Model

In [14]:
# Membangun model
frq_items = apriori(basket_Switzerland, min_support = 0.07, use_colnames = True)

# Mengumpulkan aturan yang disimpulkan dalam kerangka data
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
print(rules.head())

                                           antecedents  \
97                           (WATERING CAN PINK BUNNY)   
3                                 (BLUE POLKADOT BOWL)   
105                       (JUMBO BAG WOODLAND ANIMALS)   
246  (ROUND SNACK BOXES SET OF4 WOODLAND , WATERING...   
4                                 (CARD CIRCUS PARADE)   

                                           consequents  antecedent support  \
97                        (WATERING CAN BLUE ELEPHANT)            0.075269   
3                                 (RED RETROSPOT BOWL)            0.075269   
105  (WOODLAND CHARLOTTE BAG, ROUND SNACK BOXES SET...            0.086022   
246                    (RED TOADSTOOL LED NIGHT LIGHT)            0.075269   
4                             (SPACEBOY BIRTHDAY CARD)            0.075269   

     consequent support   support  confidence       lift  leverage  \
97             0.096774  0.075269         1.0  10.333333  0.067985   
3              0.107527  0.075269         

  and should_run_async(code)
