# ðŸ“Š Tugas Praktikum: Association Rule

Link dataset:\
ðŸ”— https://kaggle.com/datasets/848632beef653350630ff93062c80ed2d23a4948a4c4f24593376954ae144dd4

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.utils import resample

## 1. Unduh Dataset

In [2]:
df = pd.read_csv('transaksi_ritel_jateng.csv')

## 2. Eksplorasi Data

In [3]:
df.head(5)

Unnamed: 0,InvoiceNo,InvoiceDate,BRANCH_SPLR,BRANCHNAME_SPLR,warehouseProductsID,BARCODEID,StockCode,PRODUCT,PRODUCT_CATEGORY,Quantity,...,UnitPriceRupiah,oldCUSTID,CustomerID,CUSTNAME,ADDRESS,KOTA,CHANNELID_SPLR,CHANNELNAME_SPLR,SUBDISTID,SUBDIST_NAME
0,536367,2020-12-01 08:34:00,19,YOGYAKARTA,A2375,9555021502350,84969,A LICAFE 100G,MINUMAN,6,...,60775.0,1922038,13047.0,IMAM,NEPAK BULUREJO (DPN SMKN 1),MAGELANG,32,Toko Kelontong,190105,PT. KTRI DISTRIBUSI
1,536367,2020-12-01 08:34:00,19,YOGYAKARTA,A2568,8992761111212,22622,A&W SARSAPARILA 330ML,MINUMAN,2,...,142285.0,1922807,13047.0,A&W MART,JL RAYA KANDANGAN JLN.,TEMANGGUNG,32,Toko Kelontong,190105,PT. KTRI DISTRIBUSI
2,536368,2020-12-01 08:34:00,6,SEMARANG,A2416,8850305310340,22960,ABAKUS PERMEN 30G KTK BLT,MAKANAN KALENG,6,...,60775.0,1630992,13047.0,ANDALAN (MOTORIS),JL. RAYA TUNTANG - MBERAN,SALATIGA,32,Toko Kelontong,60315,PT. KELUARGA SEJAHTRA
3,536368,2020-12-01 08:34:00,6,SEMARANG,A2432,8850305310258,22913,ABAKUS PERMEN BONEKA,MINUMAN,3,...,70785.0,1631972,13047.0,AMBARWATI (MOTORIS),AMBARAWA,SEMARANG,32,Toko Kelontong,60315,PT. KELUARGA SEJAHTRA
4,536370,2020-12-01 08:45:00,36,PURWOKERTO,A2678,8992388121243,21724,ABC CUP SELERA PEDAS 65G TOMAT,MINUMAN,12,...,12155.0,3217503,12583.0,SALMA SLM0401,TANJUNG,BANYUMAS,32,Toko Kelontong,320302,CV. CITRA BERUSAHA


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 138644 entries, 0 to 138643
Data columns (total 21 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   InvoiceNo            138644 non-null  object 
 1   InvoiceDate          138644 non-null  object 
 2   BRANCH_SPLR          138644 non-null  int64  
 3   BRANCHNAME_SPLR      138644 non-null  object 
 4   warehouseProductsID  138644 non-null  object 
 5   BARCODEID            138644 non-null  int64  
 6   StockCode            138644 non-null  object 
 7   PRODUCT              138644 non-null  object 
 8   PRODUCT_CATEGORY     138644 non-null  object 
 9   Quantity             138644 non-null  int64  
 10  UnitPrice            138644 non-null  float64
 11  UnitPriceRupiah      138644 non-null  float64
 12  oldCUSTID            138644 non-null  int64  
 13  CustomerID           108336 non-null  float64
 14  CUSTNAME             138644 non-null  object 
 15  ADDRESS          

## 3. Persiapan Data untuk Association Rule

In [None]:
# Import library yang dibutuhkan
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
from mlxtend.preprocessing import TransactionEncoder
import pandas as pd
import numpy as np

# 1. Hapus transaksi dengan Quantity <= 0 (return/refund)
df_clean = df[df['Quantity'] > 0].copy()

# 2. Hapus transaksi dengan UnitPrice <= 0 (invalid)
df_clean = df_clean[df_clean['UnitPrice'] > 0]

# 3. Hapus missing values di kolom penting
# Untuk association rule, kita fokus ke InvoiceNo dan PRODUCT
df_clean = df_clean.dropna(subset=['InvoiceNo', 'PRODUCT'])

# 4. Hapus duplikat (jika ada transaksi yang tercatat 2x)
df_clean = df_clean.drop_duplicates()

# Hitung frekuensi produk
product_counts = df_clean['PRODUCT'].value_counts()
# Ambil produk yang muncul minimal 10 kali (sesuaikan threshold)
valid_products = product_counts[product_counts >= 10].index
df_clean = df_clean[df_clean['PRODUCT'].isin(valid_products)]

# Hapus transaksi yang hanya berisi 1 produk
df_clean = df_clean.groupby('InvoiceNo').filter(lambda x: x['PRODUCT'].nunique() > 1)

print(f"Data awal: {len(df)} rows")
print(f"Data setelah cleaning: {len(df_clean)} rows")
print(f"Jumlah transaksi unik: {df_clean['InvoiceNo'].nunique()}")
print(f"Jumlah produk unik: {df_clean['PRODUCT'].nunique()}")

Data awal: 138644 rows
Data setelah cleaning: 78420 rows
Jumlah transaksi unik: 4989
Jumlah produk unik: 4870


In [6]:
# Buat format basket: setiap invoice berisi list produk
transactions = df_clean.groupby('InvoiceNo')['PRODUCT'].apply(list)

# Lihat contoh transaksi
print(f"\nTotal transaksi: {len(transactions)}")
print(f"\nContoh 5 transaksi pertama:")
for i, trans in enumerate(transactions[:5], 1):
    print(f"Transaksi {i}: {trans}")
    
# Cek distribusi jumlah item per transaksi
trans_lengths = [len(t) for t in transactions]
print(f"\nStatistik item per transaksi:")
print(f"Min: {min(trans_lengths)}, Max: {max(trans_lengths)}, Mean: {np.mean(trans_lengths):.2f}")


Total transaksi: 4989

Contoh 5 transaksi pertama:
Transaksi 1: ['ABC KICAP ASIN 620ML', 'ABC KOPI MOCCA 5SX32G']
Transaksi 2: ['ABC SAMBAL EXTRA REDAS 24SX9G', 'ABC SAOS TOMAT SASET 9G24', 'ABC SARDINES  TOMAT 425ML', 'ABC SARDINES CHILI 425G', 'ABC SARDINES MACKEREL EXTRA PDS 425']
Transaksi 3: ['ABC SAUS TIRAM 195ML', 'ABC SAUS TOMOT 5.7KG', 'ABC SPW 9 VOLT NEW', 'ABC TERASI 20 SASET']
Transaksi 4: ['ADIDAS DEO SPRAY WOMEN 150ML FRUITY', 'ADIDAS DEO SPRAY WOMEN 150ML PASSIO', 'ADIDAS DYNAMIC PULSE SPRAY 96G']
Transaksi 5: ['AGAR SWALL SUN PUTIH', 'AGAR-AGAR HOKA-HOKA DOLPIN 480G']

Statistik item per transaksi:
Min: 2, Max: 437, Mean: 15.72


In [7]:
transactions

InvoiceNo
536373        [ABC KICAP ASIN 620ML, ABC KOPI MOCCA 5SX32G]
536378    [ABC SAMBAL EXTRA REDAS 24SX9G, ABC SAOS TOMAT...
536381    [ABC SAUS TIRAM 195ML, ABC SAUS TOMOT 5.7KG, A...
536384    [ADIDAS DEO SPRAY WOMEN 150ML FRUITY, ADIDAS D...
536390    [AGAR SWALL SUN PUTIH, AGAR-AGAR HOKA-HOKA DOL...
                                ...                        
575897    [PANTENE SHP 5MLX24S A.KETOMBE, PIXY MATTE LIP...
575898    [P/P SYRUP 1LT TERONG BLD SUPER, PIXY LIQ FOND...
575899    [PIGEON REF+PUFF 25G BEIGE WAVE, PEPSODENT COM...
575900    [KOEPOE PASTA 60ML DURIAN, KOEPOE PASTA 60ML D...
575902    [KOEPOE PASTA 60MLCOCOPDN, KOEPOE PASTA 60ML R...
Name: PRODUCT, Length: 4989, dtype: object

In [8]:
# Transform ke format binary matrix
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_, index=transactions.index)

print(f"\nShape data encoded: {df_encoded.shape}")
print(f"\nContoh data encoded:")
df_encoded.head(5)


Shape data encoded: (4989, 4870)

Contoh data encoded:


Unnamed: 0_level_0,ABC JUICE GUAVA 1LTR,ABC JUICE SOY BEAN 1LTR,ABC KICAP ASIN 620ML,ABC KOPI MOCCA 5SX32G,ABC MACKEREL 155G TOMAT,ABC MIE EAT&GO BASO AYAM 60G,ABC SAMBAL ASLI 275ML,ABC SAMBAL ASLI 600ML BTL,ABC SAMBAL EXTRA REDAS 24SX9G,ABC SAOS TOMAT SASET 9G24,...,ZWITSAL KIDS APPEL MELON 90ML,ZWITSAL KIDS STRAW 90ML,ZWITSAL PWD 100G FRESH,ZWITSAL PWD 100G SOFT,ZWITSAL PWD 100G W.ZINC,ZWITSAL PWD 300G SOFT,ZWITSAL SHAMPOO 50ML,ZWITSAL SHP REF CLEAN&R 250ML,penghapus faber,pudding aneka rasa 18x15g
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536373,False,False,True,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
536378,False,False,False,False,False,False,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
536381,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
536384,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
536390,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


## 4. Penerapan Algoritma

In [9]:
min_support = 0.005
frequent_itemsets = apriori(df_encoded, min_support=min_support, use_colnames=True)
# frequent_itemsets = fpgrowth(df_encoded, min_support=min_support, use_colnames=True)

print(f"\nJumlah frequent itemsets: {len(frequent_itemsets)}\n")
frequent_itemsets.sort_values('support', ascending=False)


Jumlah frequent itemsets: 229



Unnamed: 0,support,itemsets
140,0.009220,(SIKAT WC)
153,0.008619,(SOY MASTER STROBERI 320ML)
73,0.008218,(PIGEON HOT & SOUR MUSTARD 140GR)
186,0.008018,(V3 JAM 450GR STRAWBERRY)
62,0.008018,(PC DEODORANT REF 250ML)
...,...,...
198,0.005011,(VIVELLE B.MIST 75ML AMOUR)
156,0.005011,(STEFIT SHAKE CHOCO 180GR)
175,0.005011,(TEKO AIR 8851(8B264)MULTI)
187,0.005011,(VAPE LIQUID SET)


In [10]:
# Generate rules dengan minimum confidence
min_confidence = 0.5

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)

# Hitung lift
rules['lift'] = rules['lift'].round(3)
rules['support'] = rules['support'].round(4)
rules['confidence'] = rules['confidence'].round(3)

print(f"\nJumlah rules yang ditemukan: {len(rules)}")
print(f"\nContoh rules:")
print(rules.head(10))


Jumlah rules yang ditemukan: 40

Contoh rules:
                          antecedents                         consequents  \
0             (ALPENLIEBE 117G PEACH)    (AIR WICK FRESH&UP 375ML CITRUS)   
1    (AIR WICK FRESH&UP 375ML CITRUS)             (ALPENLIEBE 117G PEACH)   
2          (BENDERA 123 300GR VANILA)  (BELLAGIO SPRAY COL VENTURA 100ML)   
3  (BELLAGIO SPRAY COL VENTURA 100ML)          (BENDERA 123 300GR VANILA)   
4         (IC COFFEEMIX JAHE 25GRX5S)             (IDF KECAP 625ML MANIS)   
5             (IDF KECAP 625ML MANIS)         (IC COFFEEMIX JAHE 25GRX5S)   
6    (JEPITAN BAJU 30PCS 844 (JHEDY))         (JEPITAN BAJU 024 NGT (SA))   
7         (JEPITAN BAJU 024 NGT (SA))    (JEPITAN BAJU 30PCS 844 (JHEDY))   
8  (JULIES ELITE 228G LIGH VEGETABLE)    (JEPITAN BAJU 30PCS 844 (JHEDY))   
9    (JEPITAN BAJU 30PCS 844 (JHEDY))  (JULIES ELITE 228G LIGH VEGETABLE)   

   antecedent support  consequent support  support  confidence     lift  \
0            0.006013        

## 5. Evaluasi Aturan Asosiasi

In [11]:
# Sort berdasarkan lift dan ambil top 3
top_rules = rules.sort_values('lift', ascending=False).head(3)

print("\n" + "="*80)
print("TOP 3 ASSOCIATION RULES BERDASARKAN LIFT")
print("="*80)

for idx, rule in top_rules.iterrows():
    antecedents = ', '.join(list(rule['antecedents']))
    consequents = ', '.join(list(rule['consequents']))
    
    print(f"\nRule {idx+1}:")
    print(f"  IF pelanggan membeli: {antecedents}")
    print(f"  THEN kemungkinan besar juga membeli: {consequents}")
    print(f"  Support: {rule['support']:.4f} ({rule['support']*100:.2f}%)")
    print(f"  Confidence: {rule['confidence']:.3f} ({rule['confidence']*100:.1f}%)")
    print(f"  Lift: {rule['lift']:.3f}")
    print(f"\n  Interpretasi:")
    print(f"  - Support {rule['support']*100:.2f}% = kombinasi produk ini muncul di {rule['support']*100:.2f}% dari semua transaksi")
    print(f"  - Confidence {rule['confidence']*100:.1f}% = jika membeli {antecedents}, ada {rule['confidence']*100:.1f}% kemungkinan juga membeli {consequents}")
    print(f"  - Lift {rule['lift']:.3f} = produk ini {rule['lift']:.3f}x lebih mungkin dibeli bersamaan dibanding secara acak")
    print("-"*80)


TOP 3 ASSOCIATION RULES BERDASARKAN LIFT

Rule 14:
  IF pelanggan membeli: KYE GIFT PACK 8S30G
  THEN kemungkinan besar juga membeli: KURANG ASEM 30G ORANGE
  Support: 0.0052 (0.52%)
  Confidence: 1.000 (100.0%)
  Lift: 172.034

  Interpretasi:
  - Support 0.52% = kombinasi produk ini muncul di 0.52% dari semua transaksi
  - Confidence 100.0% = jika membeli KYE GIFT PACK 8S30G, ada 100.0% kemungkinan juga membeli KURANG ASEM 30G ORANGE
  - Lift 172.034 = produk ini 172.034x lebih mungkin dibeli bersamaan dibanding secara acak
--------------------------------------------------------------------------------

Rule 16:
  IF pelanggan membeli: OBH COMBI+ 30ML MENTHOL
  THEN kemungkinan besar juga membeli: OBH COMBI+ ANAK 30ML ORANGE (JPS)
  Support: 0.0058 (0.58%)
  Confidence: 1.000 (100.0%)
  Lift: 172.034

  Interpretasi:
  - Support 0.58% = kombinasi produk ini muncul di 0.58% dari semua transaksi
  - Confidence 100.0% = jika membeli OBH COMBI+ 30ML MENTHOL, ada 100.0% kemungkinan juga