# Time-based Association Rule Mining - Example
Author: David C. Sisk, 2024/11/22


In [7]:
#! pip install mlxtend
#! pip install --upgrade mlxtend
#! pip list | findstr mlxtend

mlxtend                                  0.23.3



[notice] A new release of pip is available: 23.3.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Sample event log data with timestamps
data = [
    {'timestamp': '2024-11-21 10:00:00', 'event': 'login'},
    {'timestamp': '2024-11-21 10:05:00', 'event': 'view_page'},
    {'timestamp': '2024-11-21 10:10:00', 'event': 'add_to_cart'},
    {'timestamp': '2024-11-21 10:15:00', 'event': 'logout'},
    {'timestamp': '2024-11-21 11:00:00', 'event': 'login'},
    {'timestamp': '2024-11-21 11:05:00', 'event': 'view_page'},
    {'timestamp': '2024-11-21 11:15:00', 'event': 'logout'},
    {'timestamp': '2024-11-21 12:00:00', 'event': 'login'},
    {'timestamp': '2024-11-21 12:10:00', 'event': 'add_to_cart'},
    {'timestamp': '2024-11-21 12:15:00', 'event': 'purchase'},
    {'timestamp': '2024-11-21 13:00:00', 'event': 'login'},
    {'timestamp': '2024-11-21 13:05:00', 'event': 'view_page'},
    {'timestamp': '2024-11-21 13:10:00', 'event': 'add_to_cart'},
    {'timestamp': '2024-11-21 13:15:00', 'event': 'purchase'},
    {'timestamp': '2024-11-21 13:20:00', 'event': 'logout'},
]

# Convert to DataFrame
df = pd.DataFrame(data)

# Step 1: Group by transaction window (e.g., hourly)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['transaction_window'] = df['timestamp'].dt.floor('1H')  # Group by hour

# Step 2: Aggregate events into transactions
transactions = df.groupby('transaction_window')['event'].apply(list).tolist()

# Step 3: Transform data for mlxtend
te = TransactionEncoder()
te_data = te.fit_transform(transactions)
df_encoded = pd.DataFrame(te_data, columns=te.columns_)

# Step 4: Perform Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(df_encoded, min_support=0.5, use_colnames=True)

# Step 5: Generate association rules
# rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)  # mlxtend version issue
num_itemsets = frequent_itemsets['support'].count()
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0, num_itemsets=num_itemsets)


# Display results
print("Frequent Itemsets:")
print(frequent_itemsets)
print("\nAssociation Rules:")
print(rules)


Frequent Itemsets:
    support                                 itemsets
0      0.75                            (add_to_cart)
1      1.00                                  (login)
2      0.75                                 (logout)
3      0.50                               (purchase)
4      0.75                              (view_page)
5      0.75                     (add_to_cart, login)
6      0.50                    (add_to_cart, logout)
7      0.50                  (add_to_cart, purchase)
8      0.50                 (add_to_cart, view_page)
9      0.75                          (logout, login)
10     0.50                        (purchase, login)
11     0.75                       (login, view_page)
12     0.75                      (logout, view_page)
13     0.50             (add_to_cart, logout, login)
14     0.50           (add_to_cart, purchase, login)
15     0.50          (add_to_cart, login, view_page)
16     0.50         (add_to_cart, logout, view_page)
17     0.75               (

  df['transaction_window'] = df['timestamp'].dt.floor('1H')  # Group by hour
  cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom)
