In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from apyori import apriori
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import datetime

## 1. Data Selection
> Prepare for dataset that would be used

In [4]:
# Import dataset
data = pd.read_excel("data_retail.xlsx")

In [5]:
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,BRANCH_SPLR,BRANCHNAME_SPLR,warehouseProductsID,BARCODEID,StockCode,PRODUCT,PRODUCT_CATEGORY,Quantity,...,CustomerID,CUSTNAME,ADDRESS,KOTA,PROVINSI,NEGARA,CHANNELID_SPLR,CHANNELNAME_SPLR,SUBDISTID,SUBDIST_NAME
0,536365,2020-12-01 08:26:00,13,SAMARINDA,10001,8992753282401,85123A,123 BENDERA COKLAT 300G,SUSU,6,...,17850.0,DUA PUTRI SLAMET RIYADI,JL. SLAMET RIYADI,SAMARINDA,KALIMANTAN TIMUR,INDONESIA,32,Toko Kelontong,130113,CV. EKA PUTRA
1,536365,2020-12-01 08:26:00,13,SAMARINDA,20020,8999909192034,71053,2.3.4 FILTER,ROKOK,6,...,17850.0,SANURI,JL. M. SAID,SAMARINDA,KALIMANTAN TIMUR,INDONESIA,32,Toko Kelontong,130113,CV. EKA PUTRA
2,536365,2020-12-01 08:26:00,19,YOGYAKARTA,20021,8999909007147,84406B,234 KERETEK,ROKOK,8,...,17850.0,EMI MBAK,PS. TALUN NO.63,MAGELANG,JAWA TENGAH,INDONESIA,32,Toko Kelontong,190105,PT. KTRI DISTRIBUSI
3,536365,2020-12-01 08:26:00,13,SAMARINDA,20018,8999909172234,84029G,234 KERETEK 12,ROKOK,6,...,17850.0,RISKA CELL ADAM MALIK,JL. ADAM MALIK,SAMARINDA,KALIMANTAN TIMUR,INDONESIA,32,Toko Kelontong,130113,CV. EKA PUTRA
4,536365,2020-12-01 08:26:00,19,YOGYAKARTA,20019,8999909005860,84029E,234 KERETEK 16,ROKOK,6,...,17850.0,ABADI MART,JL.GROWONG - PUCUNG REJO MUNTILAN.,MAGELANG,JAWA TENGAH,INDONESIA,42,Mini Market,190105,PT. KTRI DISTRIBUSI


In [6]:
data.isnull().sum()

InvoiceNo                   0
InvoiceDate                 0
BRANCH_SPLR                 0
BRANCHNAME_SPLR             0
warehouseProductsID         0
BARCODEID                   0
StockCode                   0
PRODUCT                     0
PRODUCT_CATEGORY            0
Quantity                    0
UnitPrice                   0
UnitPriceRupiah             0
oldCUSTID                   0
CustomerID             135080
CUSTNAME                    0
ADDRESS                   172
KOTA                    16672
PROVINSI                14840
NEGARA                      0
CHANNELID_SPLR              0
CHANNELNAME_SPLR            0
SUBDISTID                   0
SUBDIST_NAME                0
dtype: int64

In [7]:
data.dtypes

InvoiceNo                      object
InvoiceDate            datetime64[ns]
BRANCH_SPLR                     int64
BRANCHNAME_SPLR                object
warehouseProductsID            object
BARCODEID                       int64
StockCode                      object
PRODUCT                        object
PRODUCT_CATEGORY               object
Quantity                        int64
UnitPrice                     float64
UnitPriceRupiah               float64
oldCUSTID                      object
CustomerID                    float64
CUSTNAME                       object
ADDRESS                        object
KOTA                           object
PROVINSI                       object
NEGARA                         object
CHANNELID_SPLR                  int64
CHANNELNAME_SPLR               object
SUBDISTID                       int64
SUBDIST_NAME                   object
dtype: object

In [8]:
data.head(3)

Unnamed: 0,InvoiceNo,InvoiceDate,BRANCH_SPLR,BRANCHNAME_SPLR,warehouseProductsID,BARCODEID,StockCode,PRODUCT,PRODUCT_CATEGORY,Quantity,...,CustomerID,CUSTNAME,ADDRESS,KOTA,PROVINSI,NEGARA,CHANNELID_SPLR,CHANNELNAME_SPLR,SUBDISTID,SUBDIST_NAME
0,536365,2020-12-01 08:26:00,13,SAMARINDA,10001,8992753282401,85123A,123 BENDERA COKLAT 300G,SUSU,6,...,17850.0,DUA PUTRI SLAMET RIYADI,JL. SLAMET RIYADI,SAMARINDA,KALIMANTAN TIMUR,INDONESIA,32,Toko Kelontong,130113,CV. EKA PUTRA
1,536365,2020-12-01 08:26:00,13,SAMARINDA,20020,8999909192034,71053,2.3.4 FILTER,ROKOK,6,...,17850.0,SANURI,JL. M. SAID,SAMARINDA,KALIMANTAN TIMUR,INDONESIA,32,Toko Kelontong,130113,CV. EKA PUTRA
2,536365,2020-12-01 08:26:00,19,YOGYAKARTA,20021,8999909007147,84406B,234 KERETEK,ROKOK,8,...,17850.0,EMI MBAK,PS. TALUN NO.63,MAGELANG,JAWA TENGAH,INDONESIA,32,Toko Kelontong,190105,PT. KTRI DISTRIBUSI


In [9]:
# Mengganti kolom PERIODE menjadi tipe data datetime
data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'])

In [10]:
# Menghapus column yang tidak digunakan / duplicate columns
# df = df.drop(columns=['SALES_HNA_RUPIAH','SALES_HNA_UNIT','NET_SALES','NET_SALES_BU'])

In [11]:
# Clean up spaces in product description, product category and remove any rows that don't have a valid invoice
data['PRODUCT'] = data['PRODUCT'].str.strip()
data['PRODUCT_CATEGORY'] = data['PRODUCT_CATEGORY'].str.strip()

data.dropna(axis=0, subset=['InvoiceNo'], inplace=True)

In [12]:
# menghapus variabel inoviceNO yang diawali dengan huruf C pada invoice numbernya
data['InvoiceNo'] = data['InvoiceNo'].astype('str')
data = data[~(data['InvoiceNo'].str[0] == 'C')]

### 2.2 Data Transformation

In [13]:
basket = (data[data['PROVINSI'] =="JAWA TENGAH"].groupby(['InvoiceNo', 'PRODUCT_CATEGORY'])['Quantity'].count()\
                                      .unstack().reset_index().fillna(0)\
                                      .set_index('InvoiceNo'))
basket.head()

PRODUCT_CATEGORY,ALAT LISTRIK,ALAT RUMAH TANGGA,BEER,BERAS,BISKUIT,BUMBU,COKELAT,DETERGEN,DIET FOOD,ELEKTRONIK,...,SEMIR SEPATU,SLAI/JAM,SNACK,STATIONERY,SUSU,SYRUP,TANDAS,TEH & KOPI,TEPUNG,TISSUE
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536367,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536368,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536370,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536371,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536373,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [14]:
# Show a subset of columns
basket.iloc[:,[0,1,2,3,4,5,6,7]].head()

PRODUCT_CATEGORY,ALAT LISTRIK,ALAT RUMAH TANGGA,BEER,BERAS,BISKUIT,BUMBU,COKELAT,DETERGEN
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
536367,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536368,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536370,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536371,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536373,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [15]:
# Melakukan proses encoding -> Mengubah data kebentuk angka, agar sistem atau komputer dapat memahami informasi dari dataset
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

basket_sets = basket.applymap(encode_units)
basket_sets.head(5)

  basket_sets = basket.applymap(encode_units)


PRODUCT_CATEGORY,ALAT LISTRIK,ALAT RUMAH TANGGA,BEER,BERAS,BISKUIT,BUMBU,COKELAT,DETERGEN,DIET FOOD,ELEKTRONIK,...,SEMIR SEPATU,SLAI/JAM,SNACK,STATIONERY,SUSU,SYRUP,TANDAS,TEH & KOPI,TEPUNG,TISSUE
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536367,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536368,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536370,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536371,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536373,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


## 3. Data Mining
### Data mining dapat dikenal juga dengan istilah data exploration

In [16]:
# Build up the frequent items, the rules, and model

frequent_itemsets = apriori(basket_sets, min_support=0.1, use_colnames=True)




In [17]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.326438,(ALAT RUMAH TANGGA)
1,0.348726,(BISKUIT)
2,0.172516,(BUMBU)
3,0.166359,(COKELAT)
4,0.273488,(DETERGEN)
...,...,...
250,0.102204,"(MINUMAN, KOSMETIK, SABUN & SAMPHOO, SNACK)"
251,0.117473,"(MINUMAN, KOSMETIK, SABUN & SAMPHOO, SUSU)"
252,0.102081,"(PARFUM, KOSMETIK, SABUN & SAMPHOO, OBATAN)"
253,0.106760,"(SUSU, PARFUM, KOSMETIK, SABUN & SAMPHOO)"


In [18]:
rules1 = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

In [19]:
rules1.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(BISKUIT),(ALAT RUMAH TANGGA),0.348726,0.326438,0.177811,0.509887,1.561974,0.063973,1.3743,0.552431
1,(ALAT RUMAH TANGGA),(BISKUIT),0.326438,0.348726,0.177811,0.5447,1.561974,0.063973,1.43043,0.534152
2,(DETERGEN),(ALAT RUMAH TANGGA),0.273488,0.326438,0.140377,0.513282,1.572375,0.0511,1.383887,0.501051
3,(ALAT RUMAH TANGGA),(DETERGEN),0.326438,0.273488,0.140377,0.430026,1.572375,0.0511,1.274641,0.540439
4,(KOSMETIK),(ALAT RUMAH TANGGA),0.412757,0.326438,0.181997,0.440931,1.350735,0.047258,1.204792,0.442172


In [20]:
result1 = rules1[(rules1['lift'] >= 1) &
               (rules1['confidence'] >= 0.8)]

apr_result = result1.sort_values(by='confidence', ascending=False)
apr_result.head(20) # 64 rows × 9 columns

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
1240,"(PARFUM, SABUN & SAMPHOO, OBATAN)",(KOSMETIK),0.115503,0.412757,0.102081,0.883795,2.1412,0.054406,5.053522,0.602571
1253,"(SUSU, PARFUM, SABUN & SAMPHOO)",(KOSMETIK),0.12129,0.412757,0.10676,0.880203,2.132497,0.056697,4.901986,0.60437
1087,"(SABUN & SAMPHOO, BISKUIT, OBATAN)",(MINUMAN),0.117596,0.383327,0.102697,0.873298,2.278206,0.057619,4.867128,0.635829
1183,"(MINUMAN, PARFUM, SABUN & SAMPHOO)",(KOSMETIK),0.1405,0.412757,0.122522,0.872042,2.112725,0.06453,4.589344,0.612772
1058,"(PARFUM, SABUN & SAMPHOO, BISKUIT)",(KOSMETIK),0.127694,0.412757,0.111316,0.871745,2.112006,0.05861,4.578729,0.603591
1168,"(SUSU, KOSMETIK, OBATAN)",(MINUMAN),0.120552,0.383327,0.104913,0.870276,2.270321,0.058702,4.753722,0.636233
1044,"(SABUN & SAMPHOO, BISKUIT, OBATAN)",(KOSMETIK),0.117596,0.412757,0.102327,0.870157,2.108158,0.053789,4.522718,0.595705
1072,"(SUSU, SABUN & SAMPHOO, BISKUIT)",(KOSMETIK),0.119074,0.412757,0.103559,0.8697,2.107051,0.05441,4.506857,0.596421
1212,"(MINUMAN, SABUN & SAMPHOO, SNACK)",(KOSMETIK),0.118212,0.412757,0.102204,0.864583,2.094654,0.053411,4.336563,0.592653
1155,"(MINUMAN, SABUN & SAMPHOO, OBATAN)",(KOSMETIK),0.133481,0.412757,0.115257,0.863469,2.091954,0.060161,4.301158,0.602385


In [21]:
apr_result.iloc[5]

antecedents           (SUSU, KOSMETIK, OBATAN)
consequents                          (MINUMAN)
antecedent support                    0.120552
consequent support                    0.383327
support                               0.104913
confidence                            0.870276
lift                                  2.270321
leverage                              0.058702
conviction                            4.753722
zhangs_metric                         0.636233
Name: 1168, dtype: object

## 4. Interpretation

In [22]:
# check barang kedua untuk kombinasi barang pertama
# pairing kombinasi dari pembelian produk pertama yang paling banyak untuk barang kedua adalah

apr_result['consequents'].value_counts()

consequents
(KOSMETIK)           31
(MINUMAN)            22
(SABUN & SAMPHOO)    11
Name: count, dtype: int64

<br>

**Kelas B**

## *CREATE NEW RULE/MODEL* - **JAWA BARAT**

---



min support 0.1,
confidence 0.8

- KOSMETIK
- MINUMAN
- SABUN DAN SAMPO
- BISKUIT

## *CREATE NEW RULE/MODEL* - **BALI**

min support 0.2,
confidence 0.8


## *CREATE NEW RULE/MODEL* - **KALTIM**

min support 0.2,
confidence 0.8

- BISKUIT
- ALAT RUMAHTANGGA
- DETERGEN
- KOSMETIK

## *CREATE NEW RULE/MODEL* - **SULAWESI SELATAN**

min support 0.3,
confidence 0.9

- ALAT RUMAHTANGGA
- SABUN DAN SAMPOO
- KOSMETIK
- BISKUIT

**Kelas A**

## *CREATE NEW RULE/MODEL* - **BANTEN**

---

min support 0.1,
confidence 0.7

- KOSMETIK
- MINUMAN
- SABUN DAN SAMPO
- SUSU
- BISKUIT
- PARFUM


## *CREATE NEW RULE/MODEL* - **JAWA TIMUR**

min support 0.3 --> 0.1
confidence 0.7

- KOSMETIK
- MINUMAN
- SABUN DAN SAMPOO
- BISKUIT



## *CREATE NEW RULE/MODEL* - **KALTIM**

min support 0.2 --> 0.1
confidence 0.8

- SABUN DAN SAMPOO
- DETERJEN
- SUSU
- MINUMAN

## *CREATE NEW RULE/MODEL* - **KALIMANTAN TENGAH**

min support 0.1,
confidence 0.8

- KOSMETIK
- MINUMAN
- SABUN DAN SAMPOO

KELAS C

## *CREATE NEW RULE/MODEL* - **BANTEN**

---

min support 0.2,
confidence 0.8




## *CREATE NEW RULE/MODEL* - **JAWA TIMUR**

min support 0.3
confidence 0.9




## *CREATE NEW RULE/MODEL* - **KALTIM**

min support 0.1
confidence 0.7



## *CREATE NEW RULE/MODEL* - **SUMATRA BARAT**

min support 0.3,
confidence 0.7

In [24]:
# KELAS D

## *CREATE NEW RULE/MODEL* - **BALI**

---

min support 0.1,
confidence 0.8

- kosmetik
- minuman
- sabun dan sampo




## *CREATE NEW RULE/MODEL* - **DI YOGYAKARTA**

min support 0.1
confidence 0.7

- kosmetik
- minuman




In [25]:
jogja = (data[data['KOTA'] =="YOGYAKARTA"].groupby(['InvoiceNo', 'PRODUCT_CATEGORY'])['Quantity'].count()\
                                     .unstack().reset_index().fillna(0)\
                                     .set_index('InvoiceNo'))
jogja.head()

PRODUCT_CATEGORY,ALAT LISTRIK,ALAT RUMAH TANGGA,BISKUIT,BUMBU,COKELAT,DETERGEN,DIET FOOD,ELEKTRONIK,ES,GULAPUTIH,...,ROKOK,SABUN & SAMPHOO,SEMIR SEPATU,SNACK,STATIONERY,SUSU,TANDAS,TEH & KOPI,TEPUNG,TISSUE
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536405,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536406,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
536437,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536464,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
536520,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
# Show a subset of columns
# Melakukan proses encoding -> Mengubah data kebentuk angka, agar sistem atau komputer dapat memahami informasi dari dataset
jogja.iloc[:,[0,1,2,3,4,5,6,7]].head()


PRODUCT_CATEGORY,ALAT LISTRIK,ALAT RUMAH TANGGA,BISKUIT,BUMBU,COKELAT,DETERGEN,DIET FOOD,ELEKTRONIK
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
536405,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536406,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536437,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536464,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536520,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [27]:
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

jogja_sets = jogja.applymap(encode_units)
jogja_sets

  jogja_sets = jogja.applymap(encode_units)


PRODUCT_CATEGORY,ALAT LISTRIK,ALAT RUMAH TANGGA,BISKUIT,BUMBU,COKELAT,DETERGEN,DIET FOOD,ELEKTRONIK,ES,GULAPUTIH,...,ROKOK,SABUN & SAMPHOO,SEMIR SEPATU,SNACK,STATIONERY,SUSU,TANDAS,TEH & KOPI,TEPUNG,TISSUE
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536405,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536406,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
536437,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536464,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
536520,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
560989,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
560991,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
560992,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
560993,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
# Build up the frequent items, the rules, and model
#diberikan min suppport = 10% 0.1

frequent_itemsets = apriori(jogja_sets, min_support=0.1, use_colnames=True)
frequent_itemsets



Unnamed: 0,support,itemsets
0,0.113139,(ALAT RUMAH TANGGA)
1,0.142336,(BISKUIT)
2,0.131387,(DETERGEN)
3,0.213504,(KOSMETIK)
4,0.140511,(MINUMAN)
5,0.122263,(PARFUM)
6,0.173358,(SABUN & SAMPHOO)
7,0.101277,(SNACK)
8,0.107664,(STATIONERY)


In [29]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric


In [31]:
result = rules[(rules['lift'] >= 1) &
               (rules['confidence'] >= 0.8)]

apr_result = result.sort_values(by='confidence', ascending=False)
apr_result.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric


In [35]:
apr_result = data.head(10)


In [36]:
if apr_result.shape[0] > 5:
    print(apr_result.iloc[5])
else:
    print("DataFrame memiliki kurang dari 6 baris.")

InvoiceNo                                              536365
InvoiceDate                               2020-12-01 08:26:00
BRANCH_SPLR                                                19
BRANCHNAME_SPLR                                    YOGYAKARTA
warehouseProductsID                                     20022
BARCODEID                                       8999269471015
StockCode                                               22752
PRODUCT                                   26-PROMIL GOLD 400G
PRODUCT_CATEGORY                                       TEPUNG
Quantity                                                    2
UnitPrice                                                7.65
UnitPriceRupiah                                      109395.0
oldCUSTID                                             1915415
CustomerID                                            17850.0
CUSTNAME                                        CV PENI PUTRI
ADDRESS                NOLOGATEN, CATURTUNGGAL, DEPOK, SLEMAN
KOTA    

## *CREATE NEW RULE/MODEL* - **JAWA BARAT**

min support 0.1
confidence 0.8

- kosmetik
- minuman
- sabun dan sampo



## *CREATE NEW RULE/MODEL* - **JAMBI**

min support 0.2,
confidence 0.8

- kosmetik
- minuman
- sabun dan sampo
- biskuit
- parfum