In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

In [2]:
with open("foodmartFIM.txt", "r") as file:
    transactions = [line.strip().split() for line in file.readlines()]

In [3]:
transactions = [[int(item) for item in transaction] for transaction in transactions]

In [4]:
sensitive_itemsets = [
    (214, 763),
    (778, 195, 385),
    (12, 934),
    (359, 282),
    (16, 898, 1368),
    (1299, 557),
    (1336, 128, 224, 667),
    (1440, 1350),
    (1222, 354, 1502),
    (110, 1419, 610, 1065),
    (890, 1399, 284),
    (1195, 1478, 629),
    (615, 459),
    (1187, 977, 1193),
    (907, 1202, 1046),
    (487, 570),
    (551, 1206, 384),
    (1432, 251),
    (69, 1444),
    (11, 1213, 1136)
]

In [5]:
def is_sensitive(transaction, sensitive_itemsets):
    for itemset in sensitive_itemsets:
        if all(item in transaction for item in itemset):
            return 1
    return 0

In [6]:
def remove_sensitive_items(transaction, sensitive_itemsets):
    for itemset in sensitive_itemsets:
        if all(item in transaction for item in itemset):
            transaction = [item for item in transaction if item not in itemset]
    return transaction

In [7]:
labels = [is_sensitive(transaction, sensitive_itemsets) for transaction in transactions]
cleaned_transactions = [remove_sensitive_items(transaction, sensitive_itemsets) for transaction in transactions]

In [8]:
transactions_str = [" ".join(map(str, transaction)) for transaction in cleaned_transactions]

In [9]:
df = pd.DataFrame({'transaction': transactions_str, 'label': labels})

In [10]:
X_train, X_test, y_train, y_test = train_test_split(df['transaction'], df['label'], test_size=0.3, random_state=42)

In [11]:
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [12]:
model = MultinomialNB()
model.fit(X_train_vec, y_train)

In [13]:
y_pred = model.predict(X_test_vec)

In [14]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.996781979082864

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1239
           1       0.00      0.00      0.00         4

    accuracy                           1.00      1243
   macro avg       0.50      0.50      0.50      1243
weighted avg       0.99      1.00      1.00      1243



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [18]:
output_file = "Naive_foodmartFIM.txt"
with open(output_file, "w") as file:
    for transaction in cleaned_transactions:
        file.write(" ".join(map(str, transaction)) + "\n")

Itemset sensitif telah dihapus dan transaksi disimpan di 'Naive_foodmartFIM.txt'.
