In [1]:
import pandas as pd
import numpy as np

# Import Dataset

In [2]:
dataset_xls_1 = pd.read_excel('./Dataset/sales-(2020).xlsx')
dataset_xls_2 = pd.read_excel('./Dataset/sales-(2021).xlsx')
dataset_xls_3 = pd.read_excel('./Dataset/sales-(2022).xlsx')
dataset_xls_4 = pd.read_excel('./Dataset/sales-(2023).xlsx')

In [3]:
# combine all dataset_xls into one dataframe 
dataset = pd.concat([dataset_xls_1, dataset_xls_2, dataset_xls_3, dataset_xls_4], ignore_index=True)

In [4]:
dataset.columns

Index(['username', 'no-pesanan', 'tanggal-pesanan', 'nama-barang', 'jumlah',
       'dibayar', 'payment-method'],
      dtype='object')

In [5]:
dataset

Unnamed: 0,username,no-pesanan,tanggal-pesanan,nama-barang,jumlah,dibayar,payment-method
0,surya251196,201104NURT3,2020-05-03,"NIBRAS SARIMBIT LAIKA BLACK, NIBRAS SARIMBIT E...",2,230000,COD
1,captain.marline,201104DSB34,2020-05-03,"NIBRAS SARIMBIT LAIKA BLACK, NIBRAS NBC 20 GAM...",2,230000,COD
2,ridhayashinta,201104JHVJ45,2020-05-03,"NIBRAS SARIMBIT LAIKA BLACK, NIBRAS KOKO NK 101",2,230000,COD
3,nurulismawati103,201104VGRX4,2020-05-03,"NIBRAS KOKO NSK 89 DAN GAMIS NB A89, NIBRAS KO...",2,230000,COD
4,nabilahauraaa,201104XSAEF,2020-05-04,"NIBRAS KOKO NSK 89 DAN GAMIS NB A90, NIBRAS CO...",2,230000,Pay Later
...,...,...,...,...,...,...,...
395,gusnajib,231003AQW34,2023-04-06,"NIBRAS SARIMBIT 70 COKLAT, NIBRAS SARIMBIT CAR...",2,230000,COD
396,shella_naura,231003FVG77,2023-04-06,"NIBRAS SARIMBIT 70 COKLAT, NIBRAS SARIMBIT CHE...",2,230000,COD
397,aldokristanto,231003NBV59,2023-04-06,"NIBRAS SARIMBIT 70 COKLAT, NIBRAS KOKO POLOS N...",2,230000,Pay Later
398,nurulrhmynti,231003MNB10,2023-04-07,"NIBRAS SARIMBIT 70 COKLAT, NIBRAS GAMIS NB B108",2,230000,COD


## Tokenize nama-barang

In [6]:
dataset['nama-barang'] = dataset['nama-barang'].str.split(', ')
dataset['nama-barang'] = dataset['nama-barang'].apply(
    lambda x: [item.strip() for item in x])

In [7]:
dataset.head()

Unnamed: 0,username,no-pesanan,tanggal-pesanan,nama-barang,jumlah,dibayar,payment-method
0,surya251196,201104NURT3,2020-05-03,"[NIBRAS SARIMBIT LAIKA BLACK, NIBRAS SARIMBIT ...",2,230000,COD
1,captain.marline,201104DSB34,2020-05-03,"[NIBRAS SARIMBIT LAIKA BLACK, NIBRAS NBC 20 GA...",2,230000,COD
2,ridhayashinta,201104JHVJ45,2020-05-03,"[NIBRAS SARIMBIT LAIKA BLACK, NIBRAS KOKO NK 101]",2,230000,COD
3,nurulismawati103,201104VGRX4,2020-05-03,"[NIBRAS KOKO NSK 89 DAN GAMIS NB A89, NIBRAS K...",2,230000,COD
4,nabilahauraaa,201104XSAEF,2020-05-04,"[NIBRAS KOKO NSK 89 DAN GAMIS NB A90, NIBRAS C...",2,230000,Pay Later


# Implement Apriori Algorithm

In [8]:
from apyori import apriori

In [9]:
result = list(apriori(dataset['nama-barang']))
result

[RelationRecord(items=frozenset({'NIBRAS ATASAN PRIA NK 120'}), support=0.1275, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'NIBRAS ATASAN PRIA NK 120'}), confidence=0.1275, lift=1.0)]),
 RelationRecord(items=frozenset({'NIBRAS KOKO NK 52'}), support=0.1, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'NIBRAS KOKO NK 52'}), confidence=0.1, lift=1.0)]),
 RelationRecord(items=frozenset({'NIBRAS SARIMBIT 70 COKLAT'}), support=0.215, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'NIBRAS SARIMBIT 70 COKLAT'}), confidence=0.215, lift=1.0)])]

# Implement FP Growth Algorithm

In [10]:
from itertools import groupby
from collections import Counter


In [11]:
# join all items in each transaction into one array
transactions = dataset['nama-barang'].tolist()
# append each transaction into one array
transaction_items = []
for transaction in transactions:
    for item in transaction:
        transaction_items.append(item)


### __Frekuensi Kemunculan tiap item

In [12]:
items_df = pd.DataFrame(transaction_items, columns=["items"])
items_df["incident_count"] = 1

items_df_table = items_df.groupby("items").sum().sort_values(
    "incident_count", ascending=False).reset_index()

items_df_table


Unnamed: 0,items,incident_count
0,NIBRAS SARIMBIT 70 COKLAT,86
1,NIBRAS ATASAN PRIA NK 120,51
2,NIBRAS KOKO NK 52,40
3,NIBRAS SARIMBIT FAIDA PLUM,30
4,NIBRAS SARIMBIT CARYNA ARMY COUPLE,29
5,NIBRAS SARIMBIT LAIKA BLACK,29
6,NIBRAS SARIMBIT SYARI,25
7,NIBRAS SARIMBIT CHESA GREY COUPLE,23
8,NIBRAS KOKO POLOS NK 83,23
9,NIBRAS SARIMBIT FAIDAA GREY LILAC,22


### __Plotting with Treemap

In [13]:
import plotly.express as px


In [14]:
items_df_table["all"] = "Top 50 items"

fig_items = px.treemap(items_df_table.head(50), path=['all', "items"], values='incident_count',
                       color=items_df_table["incident_count"].head(50), hover_data=['items'],
                       color_continuous_scale='Blues',
                       )


In [15]:
fig_items.show()

### __Pre-Processing Dataset

In [16]:
# crate new datarames that each column is unique item and each row is number of transaction
items_transaction_df = pd.DataFrame(columns=items_df_table["items"].tolist())

In [17]:

# fill each row with items in dataset['nama-barang] and fill with 1 if item is in transaction and 0 if not
for transaction in dataset['nama-barang']:
    temp_df = pd.DataFrame(columns=items_df_table["items"].tolist())
    for item in items_df_table["items"].tolist():
        if item in transaction:
            temp_df[item] = [True]
        else:
            temp_df[item] = [False]
    items_transaction_df = pd.concat(
        [items_transaction_df, temp_df], ignore_index=True)

items_transaction_df


Unnamed: 0,NIBRAS SARIMBIT 70 COKLAT,NIBRAS ATASAN PRIA NK 120,NIBRAS KOKO NK 52,NIBRAS SARIMBIT FAIDA PLUM,NIBRAS SARIMBIT CARYNA ARMY COUPLE,NIBRAS SARIMBIT LAIKA BLACK,NIBRAS SARIMBIT SYARI,NIBRAS SARIMBIT CHESA GREY COUPLE,NIBRAS KOKO POLOS NK 83,NIBRAS SARIMBIT FAIDAA GREY LILAC,...,NIBRAS KOKO NSK 89 DAN GAMIS NB A91,NIBRAS KOKO NSK 89 DAN GAMIS NB A90,NIBRAS KOKO NK 109,NIBRAS KOKO NK 101 NIBRAS,NIBRAS SARIMBIT FAIDAA PLUM,NIBRAS GAMIS NB B108 NIBRAS,NIBRAS COUPLE GAMIS NB B94 DAN KOKO NSK 96,NIBRAS COUPLE GAMIS NB B94 DAN KOKO NSK 95,SARIMBIT DAANIA BURGUNDY COUPLE,SARIMBIT KHAWLA VIOLET
0,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,True,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
396,True,False,False,False,False,False,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False
397,True,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
398,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


### __Implementing FP-Growth Algorithm

In [18]:
from mlxtend.frequent_patterns import fpgrowth, association_rules


In [None]:
fpgrowth_res = fpgrowth(items_transaction_df, min_support=0.001, use_colnames=True)
fpgrowth_res 

NameError: name 'fpgrowth' is not defined

In [None]:
association_rules_res = association_rules(
    fpgrowth_res, metric="confidence", min_threshold=1)
association_rules_res


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric


In [None]:
# Sort values based on confidence
association_rules_res.sort_values("confidence", ascending=False)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
