In [2]:
# =================  Connecting To Google Drive  ================= #

from google.colab import drive
drive.mount('/content/drive')

# END

Mounted at /content/drive


In [3]:
# =================  Libary  ================= #

import pandas as pd
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules

# END

In [None]:
# =================  Import Database  ================= #

#Membaca Database
db = pd.read_excel('/content/drive/MyDrive/Data/DATASET_TRANSAKSI.xlsx')

print(db)

# END

In [None]:
# =================  Langkah KDD Data Selection  ================= #


# Memilih Atribut NAMA' & 'BUKU' (Data Selection)
selected_attributes = db[['NAMA', 'BUKU']]

print(selected_attributes)

# END

In [None]:
# ================= Langkah KDD Preprocessing dan Transformation ================= #

# Menghapus Duplikasi 'NAMA' (Preprocessing)
selected_attributes_no_duplicates = selected_attributes.drop_duplicates()

# Menghapus Karakter Newline '\n' Pada Data 'BUKU' (Preprocessing)
selected_attributes_no_duplicates['BUKU'] = selected_attributes_no_duplicates['BUKU'].str.replace('\n', '')

# Normalisasi Data 'NAMA' dan 'BUKU' Agar Menjadi 1 Baris (Transformation)
normalized_data = selected_attributes_no_duplicates.groupby('NAMA')['BUKU'].apply(list).reset_index()

# Menghapus Kolom 'NAMA'
normalized_data = normalized_data.drop(columns=['NAMA'])

print(normalized_data)

# END


In [7]:
# =================  Langkah KDD Transformation  ================= #

from sklearn.preprocessing import MultiLabelBinarizer

# Menggunakan MultiLabelBinarizer Untuk Melakukan One-Hot Encoding Pada Data (Transformation)
mlb = MultiLabelBinarizer()
encoded_books = mlb.fit_transform(normalized_data['BUKU'])

# Membuat Dataframe Untuk 'BUKU' yang telah di-encode (Transformation)
encoded_books_df = pd.DataFrame(encoded_books, columns=mlb.classes_, index=normalized_data.index)

# Menggabungkan kolom 'NAMA' dengan 'BUKU' yang telah di-encode (Transformation)
final_data = normalized_data.drop(columns=['BUKU']).join(encoded_books_df)

print("Hasil One-Hot Encoding:")
print(final_data)

# Ekspor DataFrame ke file Excel
final_data.to_excel('/content/data.xlsx', index=False)

# Unduh file dari Google Colab ke komputer Anda
from google.colab import files
files.download('/content/data.xlsx')

# ==========================================

# Menghitung total item yang bernilai 1 untuk setiap kolom
total_items = final_data.sum()

# Menampilkan total item yang bernilai 1 untuk setiap kolom
print("\nTotal Item yang Bernilai 1 untuk Setiap Kolom:")
print(total_items)

# END

  and should_run_async(code)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Total Item yang Bernilai 1 untuk Setiap Kolom:
Aroma Karsa                  7
Interaktif PPKn             11
Interaktif: Informatika     10
Jujutsu Kaisen 4             9
Laskar Pelangi              10
Negeri 5 Menara              8
Pintu Harmonika             12
Pulang                      11
Si Juki: Cari Kerja         10
Top Rank Buku Pintar IPA    17
dtype: int64


In [None]:
# =================  Langkah KDD Data Mining (FP-Growth) ================= #

# Melakukan Analisis Frequent Itemsets Menggunakan FP-Growth (Data Mining)
frequent_itemsets = fpgrowth(final_data, min_support=0.2, use_colnames=True)

# Mengurutkan frequent_itemsets berdasarkan kolom 'support' secara descending
frequent_itemsets_sorted = frequent_itemsets.sort_values(by='support', ascending=False)

print("\nFrequent Itemsets:")
print(frequent_itemsets_sorted)

# END

In [None]:
# =================  Langkah KDD Data Mining (Association Rules) ================= #

# Menampilkan Hasil Hubungan Asosiasi (Data Mining)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

print("\nAssociation Rules:")
print(rules)



Association Rules:
  antecedents                 consequents  antecedent support  \
0    (Pulang)  (Top Rank Buku Pintar IPA)            0.354839   

   consequent support   support  confidence      lift  leverage  conviction  \
0            0.548387  0.225806    0.636364  1.160428  0.031217    1.241935   

   zhangs_metric  
0       0.214286  


  and should_run_async(code)


In [None]:
# =================  Langkah KDD Knowledge / Representasi Ilmu Pengetahuan  ================= #

for index, row in rules.iterrows():
    antecedent = ", ".join(list(row['antecedents']))
    consequent = ", ".join(list(row['consequents']))
    confidence = row['confidence'] * 100

    print("Jika buku \"{antecedent}\" dipinjam maka punya kemungkinan {confidence:.2f}% buku \"{consequent}\" akan dipinjam juga")
    print()

  # END

In [None]:
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth

# Data preprocessing
data = {
    'NO': [1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7],
    'NAMA': ["Aditya Pradita Wijaya"]*3 + ["Aprisilla Nadien Islami"]*3 + ["Azura Naysila Putri"]*2 + ["Muhammad Syahril Mubaroq"]*2 + ["Siti Rohimah"]*2 + ["Nida Sarifa"]*2 + ["Dea Arlitha Enggelina"]*3,
    'BUKU': [
        "Jujutsu Kaisen 4", "Interaktif PPKn", "Top Rank Buku Pintar IPA",
        "Pintu Harmonika", "Jujutsu Kaisen 4", "Interaktif PPKn",
        "Top Rank Buku Pintar IPA", "Si Juki: Cari Kerja",
        "Interaktif PPKn", "Pintu Harmonika",
        "Jujutsu Kaisen 4", "Top Rank Buku Pintar IPA",
        "Interaktif PPKn", "Pintu Harmonika",
        "Interaktif PPKn", "Jujutsu Kaisen 4", "Si Juki: Cari Kerja"
    ]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Create the transaction list
transactions = df.groupby('NAMA')['BUKU'].apply(list).tolist()

# Create a transaction DataFrame
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
transaction_df = pd.DataFrame(te_ary, columns=te.columns_)

# Find frequent itemsets with FP-Growth
frequent_itemsets = fpgrowth(transaction_df, min_support=0.4, use_colnames=True)

# Output the frequent itemsets
frequent_itemsets


  and should_run_async(code)


Unnamed: 0,support,itemsets
0,0.714286,(Interaktif PPKn)
1,0.571429,(Jujutsu Kaisen 4)
2,0.428571,(Top Rank Buku Pintar IPA)
3,0.428571,(Pintu Harmonika)
4,0.428571,"(Interaktif PPKn, Jujutsu Kaisen 4)"
5,0.428571,"(Interaktif PPKn, Pintu Harmonika)"
