In [None]:
# -*- coding: utf-8 -*-
"""Pertemuan11_Advanced_Pattern_Analysis_Real_Data.ipynb

Automatically generated by Colab.

**Pertemuan 11: Advanced Pattern Analysis dengan Data Real**
Analisis Asosiasi Lanjut & Deteksi Anomali Terapan
"""

# Install required libraries
!pip install mlxtend
!pip install pyod
!pip install plotly

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# ML libraries
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, confusion_matrix

# Association Rules
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Anomaly Detection
from pyod.models.knn import KNN
from pyod.models.iforest import IForest
from pyod.models.ocsvm import OCSVM

# Visualization
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

print("Semua library berhasil diimport!")

In [None]:
# =============================================
# BAGIAN 1: UPLOAD DAN PREPROCESSING DATA
# =============================================

print("BAGIAN 1: UPLOAD DAN PREPROCESSING DATA")
print("=" * 60)

In [None]:
# Langkah 1: Upload file CSV dataset
from google.colab import files
import io

print("SILAHKAN UPLOAD FILE CSV DATASET:")
print("Format yang diharapkan: Member_number, Date, itemDescription")
print("Contoh format:")
print("Member_number,Date,itemDescription")
print("1808,21-07-2015,herb & pepper")
print("2552,05-01-2015,whole milk")
print("...")

# Upload file
uploaded = files.upload()

# Cek file yang diupload
file_name = list(uploaded.keys())[0]
print(f" File '{file_name}' berhasil diupload!")
print(f" Size file: {len(uploaded[file_name])} bytes")

# Load dataset
df = pd.read_csv(io.BytesIO(uploaded[file_name]))

# Tampilkan informasi dataset
print("\n INFORMASI DATASET:")
print(f"Shape: {df.shape}")
print(f"\n5 Data Teratas:")
print(df.head())
print(f"\n Info Dataset:")
print(df.info())
print(f"\n Statistik Dataset:")
print(f"Jumlah Member Unik: {df['Member_number'].nunique()}")
print(f"Jumlah Item Unik: {df['itemDescription'].nunique()}")
print(f"Rentang Tanggal: {df['Date'].min()} hingga {df['Date'].max()}")

In [None]:
# Langkah 2: Data Cleaning dan Preprocessing

print("\n DATA CLEANING DAN PREPROCESSING")

# Cek missing values
print("CEK MISSING VALUES:")
print(df.isnull().sum())

# Handle missing values jika ada
if df.isnull().sum().sum() > 0:
    df = df.dropna()
    print("Missing values telah dihapus")

# Konversi tanggal ke datetime
print("\n KONVERSI FORMAT TANGGAL:")
try:
    df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')
    print("Format tanggal berhasil dikonversi")
except:
    try:
        df['Date'] = pd.to_datetime(df['Date'])
        print("Format tanggal berhasil dikonversi (auto-detect)")
    except:
        print("Gagal mengkonversi format tanggal")

print(f"Rentang tanggal setelah konversi: {df['Date'].min()} hingga {df['Date'].max()}")

# Analisis dasar dataset
print("\n ANALISIS DASAR DATASET:")
print(f"Total transaksi: {len(df)}")
print(f"Jumlah member unik: {df['Member_number'].nunique()}")
print(f"Jumlah item unik: {df['itemDescription'].nunique()}")
print(f"Periode data: {(df['Date'].max() - df['Date'].min()).days} hari")

# Tampilkan item paling populer
print(f"\n 10 ITEM PALING POPULER:")
top_items = df['itemDescription'].value_counts().head(10)
print(top_items)

In [None]:
# Langkah 3: Exploratory Data Analysis

print("\n EXPLORATORY DATA ANALYSIS")

# Visualisasi 1: Top 20 items paling populer
plt.figure(figsize=(12, 8))
top_20_items = df['itemDescription'].value_counts().head(20)
sns.barplot(y=top_20_items.index, x=top_20_items.values, palette='viridis')
plt.title('Top 20 Items Paling Populer')
plt.xlabel('Jumlah Kemunculan')
plt.tight_layout()
plt.show()

# Visualisasi 2: Distribusi transaksi per member
plt.figure(figsize=(15, 5))

plt.subplot(1, 2, 1)
transactions_per_member = df.groupby('Member_number').size()
sns.histplot(transactions_per_member, bins=50, kde=True)
plt.title('Distribusi Jumlah Transaksi per Member')
plt.xlabel('Jumlah Transaksi')
plt.ylabel('Frekuensi')

plt.subplot(1, 2, 2)
# Zoom in untuk melihat distribusi yang lebih detail
sns.histplot(transactions_per_member[transactions_per_member <= 50], bins=30, kde=True)
plt.title('Distribusi Jumlah Transaksi per Member (≤ 50 transaksi)')
plt.xlabel('Jumlah Transaksi')
plt.ylabel('Frekuensi')

plt.tight_layout()
plt.show()

# Visualisasi 3: Pola waktu transaksi
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
df['year_month'] = df['Date'].dt.to_period('M')
transactions_per_month = df.groupby('year_month').size()
transactions_per_month.plot(kind='line', marker='o')
plt.title('Trend Transaksi per Bulan')
plt.xlabel('Bulan')
plt.ylabel('Jumlah Transaksi')
plt.xticks(rotation=45)

plt.subplot(1, 3, 2)
df['day_of_week'] = df['Date'].dt.day_name()
transactions_per_day = df['day_of_week'].value_counts()
transactions_per_day = transactions_per_day.reindex(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
transactions_per_day.plot(kind='bar', color='skyblue')
plt.title('Transaksi per Hari dalam Seminggu')
plt.xlabel('Hari')
plt.ylabel('Jumlah Transaksi')
plt.xticks(rotation=45)

plt.subplot(1, 3, 3)
df['month'] = df['Date'].dt.month_name()
transactions_per_month_name = df['month'].value_counts()
transactions_per_month_name = transactions_per_month_name.reindex(['January', 'February', 'March', 'April', 'May', 'June',
                                                                   'July', 'August', 'September', 'October', 'November', 'December'])
transactions_per_month_name.plot(kind='bar', color='lightcoral')
plt.title('Transaksi per Bulan')
plt.xlabel('Bulan')
plt.ylabel('Jumlah Transaksi')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

print("STATISTIK TRANSAKSI:")
print(f"Rata-rata transaksi per member: {transactions_per_member.mean():.2f}")
print(f"Median transaksi per member: {transactions_per_member.median()}")
print(f"Std dev transaksi per member: {transactions_per_member.std():.2f}")
print(f"Member paling aktif: {transactions_per_member.idxmax()} dengan {transactions_per_member.max()} transaksi")

In [None]:
# =============================================
# BAGIAN 2: ADVANCED ASSOCIATION RULES
# =============================================

print("\n BAGIAN 2: ADVANCED ASSOCIATION RULES ANALYSIS")
print("=" * 60)

In [None]:
# Langkah 4: Persiapan Data untuk Association Rules

print("PERSIAPAN DATA UNTUK ASSOCIATION RULES")

# Group by Member_number dan Date untuk mendapatkan transaksi
# Asumsikan setiap kombinasi Member_number + Date adalah satu transaksi
df['Transaction_ID'] = df['Member_number'].astype(str) + '_' + df['Date'].astype(str)

print(f"Jumlah transaksi unik: {df['Transaction_ID'].nunique()}")

# Group items by transaction
transactions = df.groupby('Transaction_ID')['itemDescription'].apply(list).tolist()

print(f"Contoh 3 transaksi pertama:")
for i, transaction in enumerate(transactions[:3]):
    print(f"Transaksi {i+1}: {transaction}")

# Encode transactions
te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_array, columns=te.columns_)

print(f"\n DATA BERHASIL DIENCODE")
print(f"Shape data encoded: {df_encoded.shape}")
print(f"Jumlah item unik: {len(df_encoded.columns)}")

# Hitung support untuk setiap item
item_support = df_encoded.mean().sort_values(ascending=False)
print(f"\n SUPPORT TOP 15 ITEMS:")
print(item_support.head(15))

In [None]:
# Langkah 5: Advanced Association Rules Analysis

print("\n ADVANCED ASSOCIATION RULES ANALYSIS")

# Tentukan parameter yang optimal berdasarkan karakteristik data
total_transactions = len(transactions)
print(f"Total transaksi: {total_transactions}")

# Hitung min_support yang reasonable (item harus muncul setidaknya 50 kali)
min_support = 50 / total_transactions
print(f"Minimum support yang digunakan: {min_support:.4f}")

# Cari frequent itemsets
frequent_itemsets = apriori(df_encoded,
                           min_support=min_support,
                           use_colnames=True,
                           max_len=4)

print(f" FREQUENT ITEMSETS DITEMUKAN: {len(frequent_itemsets)}")

if len(frequent_itemsets) > 0:
    # Generate rules dengan confidence yang reasonable
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.1)

    print(f" TOTAL RULES AWAL: {len(rules)}")

    # Tambahkan advanced metrics
    rules['conviction'] = (1 - rules['consequent support']) / (1 - rules['confidence'])
    rules['leverage'] = rules['support'] - (rules['antecedent support'] * rules['consequent support'])
    rules['jaccard'] = rules['support'] / (rules['antecedent support'] + rules['consequent support'] - rules['support'])

    # Hitung composite score
    rules['composite_score'] = (
        rules['lift'] * 0.4 +
        rules['confidence'] * 0.3 +
        np.log1p(rules['support'] * 100) * 0.2 +
        rules['conviction'] * 0.1
    )

    # Filter rules yang meaningful
    meaningful_rules = rules[
        (rules['lift'] > 1.2) &
        (rules['confidence'] > 0.3) &
        (rules['conviction'] > 1) &
        (rules['support'] > min_support * 2)  # Minimum 2x min_support
    ].copy()

    print(f"MEANINGFUL RULES: {len(meaningful_rules)}")

    if len(meaningful_rules) > 0:
        # Tampilkan top rules
        top_rules = meaningful_rules.nlargest(20, 'composite_score')

        print(f"\n TOP 20 ASSOCIATION RULES:")
        print("=" * 90)
        for idx, row in top_rules.iterrows():
            antecedents = list(row['antecedents'])
            consequents = list(row['consequents'])
            print(f"Rule {idx+1:2d}: {antecedents} → {consequents}")
            print(f"        Support: {row['support']:.4f} | Confidence: {row['confidence']:.3f} | Lift: {row['lift']:.3f}")
            print(f"        Conviction: {row['conviction']:.3f} | Composite: {row['composite_score']:.3f}")
            print("-" * 70)
    else:
        print("Tidak ada meaningful rules yang memenuhi kriteria")
        # Tampilkan rules terbaik meski tidak memenuhi semua kriteria
        if len(rules) > 0:
            best_rules = rules.nlargest(10, 'composite_score')
            print(f"\n BEST AVAILABLE RULES:")
            for idx, row in best_rules.iterrows():
                antecedents = list(row['antecedents'])
                consequents = list(row['consequents'])
                print(f"Rule {idx+1}: {antecedents} → {consequents}")
                print(f"        Support: {row['support']:.4f} | Confidence: {row['confidence']:.3f} | Lift: {row['lift']:.3f}")
else:
    print("Tidak ada frequent itemsets yang ditemukan")
    print("Coba turunkan minimum support")

In [None]:
# Langkah 6: Visualisasi Association Rules

print("\n VISUALISASI ASSOCIATION RULES")

if 'meaningful_rules' in locals() and len(meaningful_rules) > 0:
    # Prepare data untuk visualisasi
    viz_rules = meaningful_rules.nlargest(15, 'composite_score')

    # Buat label untuk rules
    rule_labels = []
    for _, row in viz_rules.iterrows():
        ant = list(row['antecedents'])
        cons = list(row['consequents'])
        # Batasi panjang label
        ant_str = ', '.join(ant[:2]) + ('...' if len(ant) > 2 else '')
        cons_str = ', '.join(cons[:2]) + ('...' if len(cons) > 2 else '')
        label = f"{ant_str} → {cons_str}"
        rule_labels.append(label)

    # Visualisasi 1: Bubble Chart
    plt.figure(figsize=(14, 8))
    scatter = plt.scatter(
        viz_rules['support'],
        viz_rules['confidence'],
        s=viz_rules['lift'] * 50,  # Size berdasarkan lift
        c=viz_rules['conviction'],   # Color berdasarkan conviction
        cmap='RdYlGn',
        alpha=0.7,
        edgecolors='black',
        linewidth=0.5
    )

    plt.colorbar(scatter, label='Conviction')
    plt.xlabel('Support')
    plt.ylabel('Confidence')
    plt.title('Association Rules: Support vs Confidence\n(Size = Lift, Color = Conviction)')
    plt.grid(True, alpha=0.3)

    # Annotate beberapa rules teratas
    for i, (x, y, label) in enumerate(zip(viz_rules['support'], viz_rules['confidence'], rule_labels)):
        if i < 5:  # Hanya annotate 5 teratas
            plt.annotate(label, (x, y), xytext=(5, 5), textcoords='offset points',
                        fontsize=9, bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.7))

    plt.tight_layout()
    plt.show()

    # Visualisasi 2: Horizontal bar chart untuk composite score
    plt.figure(figsize=(12, 8))
    y_pos = range(len(viz_rules))
    plt.barh(y_pos, viz_rules['composite_score'])
    plt.yticks(y_pos, rule_labels)
    plt.xlabel('Composite Score')
    plt.title('Top Association Rules by Composite Score')
    plt.grid(True, axis='x', alpha=0.3)

    # Tambahkan nilai pada bars
    for i, v in enumerate(viz_rules['composite_score']):
        plt.text(v + 0.01, i, f'{v:.2f}', va='center', fontsize=9)

    plt.tight_layout()
    plt.show()

    # Visualisasi 3: Network graph sederhana
    print("\n NETWORK ANALYSIS:")
    # Analisis items yang paling sering muncul dalam rules
    all_items_in_rules = []
    for _, row in meaningful_rules.iterrows():
        all_items_in_rules.extend(list(row['antecedents']))
        all_items_in_rules.extend(list(row['consequents']))

    item_freq_in_rules = pd.Series(all_items_in_rules).value_counts()
    print("Items paling sering muncul dalam rules:")
    print(item_freq_in_rules.head(10))

else:
    print("Tidak ada meaningful rules untuk divisualisasikan")

In [None]:
# =============================================
# BAGIAN 3: ANOMALY DETECTION PADA DATA TRANSAKSI
# =============================================

print("\n BAGIAN 3: ANOMALY DETECTION PADA DATA TRANSAKSI")
print("=" * 60)

In [None]:
# Langkah 7: Feature Engineering untuk Anomaly Detection

print("FEATURE ENGINEERING UNTUK ANOMALY DETECTION")

# Buat features berdasarkan perilaku member
member_features = []

for member_id in df['Member_number'].unique():
    member_data = df[df['Member_number'] == member_id]

    features = {
        'member_id': member_id,
        'total_transactions': len(member_data),
        'unique_items': member_data['itemDescription'].nunique(),
        'transaction_days': member_data['Date'].nunique(),
        'avg_items_per_transaction': len(member_data) / member_data['Date'].nunique() if member_data['Date'].nunique() > 0 else 0,
        'transaction_frequency': (member_data['Date'].max() - member_data['Date'].min()).days / len(member_data) if len(member_data) > 1 else 0,
        'prefers_weekend': (member_data['day_of_week'].isin(['Saturday', 'Sunday']).sum() / len(member_data)),
        'most_common_item_count': member_data['itemDescription'].value_counts().iloc[0] if len(member_data) > 0 else 0,
        'entropy': -sum((member_data['itemDescription'].value_counts() / len(member_data)) *
                       np.log(member_data['itemDescription'].value_counts() / len(member_data))) if len(member_data) > 0 else 0
    }

    member_features.append(features)

# Convert ke DataFrame
df_member_features = pd.DataFrame(member_features)

print(f" FEATURES BERHASIL DIBUAT UNTUK {len(df_member_features)} MEMBER")
print(f"Features yang tersedia: {list(df_member_features.columns)}")
print(f"\n STATISTIK FEATURES:")
print(df_member_features.describe())

# Visualisasi distribusi features
fig, axes = plt.subplots(3, 3, figsize=(15, 12))
features_to_plot = ['total_transactions', 'unique_items', 'transaction_days',
                   'avg_items_per_transaction', 'transaction_frequency',
                   'prefers_weekend', 'most_common_item_count', 'entropy']

for idx, feature in enumerate(features_to_plot):
    if idx < 9:  # Pastikan tidak melebihi subplot yang tersedia
        row, col = idx // 3, idx % 3
        axes[row, col].hist(df_member_features[feature], bins=30, alpha=0.7, color='skyblue')
        axes[row, col].set_title(f'Distribution of {feature}')
        axes[row, col].set_xlabel(feature)
        axes[row, col].set_ylabel('Frequency')
        axes[row, col].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Langkah 8: Ensemble Anomaly Detection pada Member Behavior

print("ENSEMBLE ANOMALY DETECTION")

# Pilih features untuk anomaly detection (exclude member_id)
feature_columns = [col for col in df_member_features.columns if col != 'member_id']
X = df_member_features[feature_columns]

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print(f" FEATURES BERHASIL DI-SCALING")
print(f"Shape data: {X_scaled.shape}")

# Inisialisasi multiple anomaly detection models
models = {
    'Isolation Forest': IForest(contamination=0.05, random_state=42),
    'KNN': KNN(contamination=0.05),
    'One-Class SVM': OCSVM(contamination=0.05)
}

# Train models dan collect predictions
ensemble_predictions = {}
ensemble_scores = {}

print("TRAINING ENSEMBLE MODELS...")
for name, model in models.items():
    model.fit(X_scaled)

    # Predictions dan scores
    y_pred = model.predict(X_scaled)
    y_scores = model.decision_function(X_scaled)

    ensemble_predictions[name] = y_pred
    ensemble_scores[name] = y_scores

    # Hitung jumlah anomali yang terdeteksi
    n_anomalies = sum(y_pred)
    print(f"   {name}: {n_anomalies} anomali terdeteksi ({n_anomalies/len(y_pred)*100:.1f}%)")

# Ensemble voting (majority vote)
ensemble_df = pd.DataFrame(ensemble_predictions)
ensemble_vote = (ensemble_df.sum(axis=1) >= 2).astype(int)  # Minimal 2 dari 3 model setuju

# Ensemble weighted by scores
scores_df = pd.DataFrame(ensemble_scores)
# Normalize scores to 0-1 range
normalized_scores = (scores_df - scores_df.min()) / (scores_df.max() - scores_df.min())
weighted_scores = normalized_scores.mean(axis=1)
# Use 95th percentile sebagai threshold
ensemble_weighted = (weighted_scores > weighted_scores.quantile(0.95)).astype(int)

print(f"\n ENSEMBLE RESULTS:")
print(f"Majority Voting: {sum(ensemble_vote)} anomali terdeteksi")
print(f"Weighted Score: {sum(ensemble_weighted)} anomali terdeteksi")

# Tambahkan results ke dataframe
df_member_features['anomaly_vote'] = ensemble_vote
df_member_features['anomaly_weighted'] = ensemble_weighted
df_member_features['anomaly_score'] = weighted_scores

# Identifikasi member yang terdeteksi sebagai anomali
anomalous_members_vote = df_member_features[df_member_features['anomaly_vote'] == 1]
anomalous_members_weighted = df_member_features[df_member_features['anomaly_weighted'] == 1]

print(f"\n MEMBER YANG TERDETEKSI SEBAGAI ANOMALI:")
print("Majority Voting Ensemble:")
print(anomalous_members_vote[['member_id', 'total_transactions', 'unique_items', 'anomaly_score']].head(10))

print("\nWeighted Score Ensemble:")
print(anomalous_members_weighted[['member_id', 'total_transactions', 'unique_items', 'anomaly_score']].head(10))

In [None]:
# Langkah 9: Analisis dan Interpretasi Anomali

print("\n ANALISIS DAN INTERPRETASI ANOMALI")

# Bandingkan karakteristik member normal vs anomali
normal_members = df_member_features[df_member_features['anomaly_vote'] == 0]
anomalous_members = df_member_features[df_member_features['anomaly_vote'] == 1]

print("PERBANDINGAN KARAKTERISTIK: NORMAL vs ANOMALI")
print("=" * 50)

for feature in ['total_transactions', 'unique_items', 'transaction_days', 'avg_items_per_transaction']:
    normal_mean = normal_members[feature].mean()
    anomaly_mean = anomalous_members[feature].mean()

    print(f"{feature}:")
    print(f"  • Normal: {normal_mean:.2f}")
    print(f"  • Anomali: {anomaly_mean:.2f}")
    print(f"  • Rasio: {anomaly_mean/normal_mean:.2f}x")
    print()

# Visualisasi perbandingan
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
comparison_features = ['total_transactions', 'unique_items', 'transaction_days', 'avg_items_per_transaction']

for idx, feature in enumerate(comparison_features):
    row, col = idx // 2, idx % 2

    # Boxplot comparison
    data_to_plot = [normal_members[feature], anomalous_members[feature]]
    axes[row, col].boxplot(data_to_plot, labels=['Normal', 'Anomali'])
    axes[row, col].set_title(f'Perbandingan {feature}')
    axes[row, col].set_ylabel(feature)
    axes[row, col].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Analisis detail member anomali
print("\n ANALISIS DETAIL MEMBER ANOMALI:")
if len(anomalous_members) > 0:
    for idx, row in anomalous_members.head(5).iterrows():
        member_id = row['member_id']
        member_data = df[df['Member_number'] == member_id]

        print(f"\n MEMBER {member_id} (Anomaly Score: {row['anomaly_score']:.3f}):")
        print(f"   • Total Transaksi: {row['total_transactions']}")
        print(f"   • Unique Items: {row['unique_items']}")
        print(f"   • Transaction Days: {row['transaction_days']}")
        print(f"   • Avg Items/Transaction: {row['avg_items_per_transaction']:.2f}")

        # Analisis pola pembelian
        top_items = member_data['itemDescription'].value_counts().head(5)
        print(f"   • Top 5 Items: {list(top_items.index)}")
        print(f"   • Rentang Tanggal: {member_data['Date'].min()} hingga {member_data['Date'].max()}")

In [None]:
# Langkah 10: Business Recommendations dan Export Results

print("\n BUSINESS RECOMMENDATIONS DAN EXPORT RESULTS")
print("=" * 60)

# Business Recommendations berdasarkan analysis
print("BUSINESS RECOMMENDATIONS:")
print("\n1.  ASSOCIATION RULES OPTIMIZATION:")
if 'meaningful_rules' in locals() and len(meaningful_rules) > 0:
    best_rule = meaningful_rules.nlargest(1, 'composite_score').iloc[0]
    antecedents = list(best_rule['antecedents'])
    consequents = list(best_rule['consequents'])

    print(f"   • BUNDLING STRATEGY: Gabungkan {antecedents} dengan {consequents}")
    print(f"     (Confidence: {best_rule['confidence']:.1%}, Lift: {best_rule['lift']:.1f}x)")

    # Rekomendasi berdasarkan support dan confidence
    high_support_rules = meaningful_rules[meaningful_rules['support'] > meaningful_rules['support'].quantile(0.75)]
    if len(high_support_rules) > 0:
        print(f"   • STORE LAYOUT: Prioritaskan {len(high_support_rules)} rules dengan support tinggi")

    high_confidence_rules = meaningful_rules[meaningful_rules['confidence'] > 0.5]
    if len(high_confidence_rules) > 0:
        print(f"   • PROMOTION: Gunakan {len(high_confidence_rules)} rules dengan confidence > 50%")
else:
    print("   • Tidak ada rules kuat yang ditemukan untuk rekomendasi spesifik")

print("\n2.  ANOMALY DETECTION INSIGHTS:")
print(f"   • {len(anomalous_members)} member terdeteksi sebagai anomali ({len(anomalous_members)/len(df_member_features)*100:.1f}%)")
print(f"   • Fokus investigasi pada member dengan anomaly score > {anomalous_members['anomaly_score'].quantile(0.75):.3f}")

if len(anomalous_members) > 0:
    # Analisis tipe anomali
    high_transaction_anomalies = anomalous_members[anomalous_members['total_transactions'] > anomalous_members['total_transactions'].quantile(0.75)]
    low_transaction_anomalies = anomalous_members[anomalous_members['total_transactions'] < anomalous_members['total_transactions'].quantile(0.25)]

    print(f"   • {len(high_transaction_anomalies)} member dengan transaksi sangat tinggi")
    print(f"   • {len(low_transaction_anomalies)} member dengan transaksi sangat rendah")

print("\n3.  MONITORING & ACTION PLAN:")
print("   • Implementasi real-time association rules analysis")
print("   • Dashboard monitoring untuk member behavior anomalies")
print("   • Regular review dan update model (monthly)")
print("   • Integration dengan CRM untuk personalized marketing")

# Export results
print("\n EXPORTING RESULTS...")

# Export association rules jika ada
if 'meaningful_rules' in locals() and len(meaningful_rules) > 0:
    rules_export = meaningful_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift', 'conviction', 'composite_score']].copy()
    # Convert itemsets to string untuk export
    rules_export['antecedents'] = rules_export['antecedents'].apply(lambda x: ', '.join(list(x)))
    rules_export['consequents'] = rules_export['consequents'].apply(lambda x: ', '.join(list(x)))
    rules_export.to_csv('association_rules_results.csv', index=False)
    print("Association rules exported ke 'association_rules_results.csv'")
    files.download('association_rules_results.csv') # Move download inside the conditional block

# Export anomaly detection results
anomaly_export = df_member_features[['member_id', 'total_transactions', 'unique_items', 'transaction_days',
                                    'avg_items_per_transaction', 'anomaly_vote', 'anomaly_weighted', 'anomaly_score']]
anomaly_export.to_csv('anomaly_detection_results.csv', index=False)
print("Anomaly detection results exported ke 'anomaly_detection_results.csv'")

# Download files
files.download('anomaly_detection_results.csv')

print("\n ANALISIS SELESAI!")
print("File results telah didownload:")
if 'meaningful_rules' in locals() and len(meaningful_rules) > 0:
    print("   - association_rules_results.csv")
print("   - anomaly_detection_results.csv")