In [1]:
# Question 4: Combine hierarchical clustering with Apriori to analyze clustered data and find frequent patterns within each cluster of a given dataset.

import numpy as np
import pandas as pd
from scipy.cluster.hierarchy import linkage, fcluster
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder

# Sample dataset
data = np.array([[1, 2], [2, 3], [3, 3], [6, 5], [7, 8], [8, 8]])

# Perform hierarchical clustering using ward linkage
Z = linkage(data, method='ward')

# Create clusters based on a given distance threshold
clusters = fcluster(Z, t=5, criterion='distance')

# Add the cluster labels to the data
df = pd.DataFrame(data, columns=['Feature1', 'Feature2'])
df['Cluster'] = clusters

# Now let's assume we need to perform market basket analysis (apriori) on a clustered subset of data.
# For demonstration, we convert the dataset into a transaction-like format based on clustering

# Sample transactional dataset
transactions = [
    ['Milk', 'Bread'],
    ['Milk', 'Diaper', 'Beer', 'Bread'],
    ['Milk', 'Diaper', 'Beer', 'Coke'],
    ['Bread', 'Egg', 'Milk'],
    ['Bread', 'Egg', 'Diaper', 'Milk', 'Beer']
]

# Transform the transactions into a binary matrix
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_trans = pd.DataFrame(te_ary, columns=te.columns_)

# Apply Apriori on the entire dataset
frequent_itemsets = apriori(df_trans, min_support=0.5, use_colnames=True)

# Print the frequent itemsets
print(frequent_itemsets)


   support              itemsets
0      0.6                (Beer)
1      0.8               (Bread)
2      0.6              (Diaper)
3      1.0                (Milk)
4      0.6        (Beer, Diaper)
5      0.6          (Beer, Milk)
6      0.8         (Milk, Bread)
7      0.6        (Milk, Diaper)
8      0.6  (Beer, Milk, Diaper)
