In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install mlxtend --quiet

In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
file_path = '/content/drive/MyDrive/mushroom_cleaned.csv'
df = pd.read_csv(file_path)

print("Original dataset shape:", df.shape)
print("Columns:", df.columns.tolist())
df.head(2)

Original dataset shape: (8124, 93)
Columns: ['class', 'cap-shape_c', 'cap-shape_f', 'cap-shape_k', 'cap-shape_s', 'cap-shape_x', 'cap-surface_g', 'cap-surface_s', 'cap-surface_y', 'cap-color_c', 'cap-color_e', 'cap-color_g', 'cap-color_n', 'cap-color_p', 'cap-color_r', 'cap-color_u', 'cap-color_w', 'cap-color_y', 'bruises_t', 'odor_c', 'odor_f', 'odor_l', 'odor_m', 'odor_n', 'odor_p', 'odor_s', 'odor_y', 'gill-attachment_f', 'gill-spacing_w', 'gill-size_n', 'gill-color_e', 'gill-color_g', 'gill-color_h', 'gill-color_k', 'gill-color_n', 'gill-color_o', 'gill-color_p', 'gill-color_r', 'gill-color_u', 'gill-color_w', 'gill-color_y', 'stalk-shape_t', 'stalk-root_b', 'stalk-root_c', 'stalk-root_e', 'stalk-root_r', 'stalk-surface-above-ring_k', 'stalk-surface-above-ring_s', 'stalk-surface-above-ring_y', 'stalk-surface-below-ring_k', 'stalk-surface-below-ring_s', 'stalk-surface-below-ring_y', 'stalk-color-above-ring_c', 'stalk-color-above-ring_e', 'stalk-color-above-ring_g', 'stalk-color-abov

Unnamed: 0,class,cap-shape_c,cap-shape_f,cap-shape_k,cap-shape_s,cap-shape_x,cap-surface_g,cap-surface_s,cap-surface_y,cap-color_c,...,population_n,population_s,population_v,population_y,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u,habitat_w
0,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# Calculate frequency of each feature
feature_freq = df.drop('class', axis=1).mean()

# Get the minimum support value from the 20th most common feature
min_support_value = feature_freq.sort_values(ascending=False).iloc[19] * 0.9

print(f"20th most common feature frequency: {feature_freq.sort_values(ascending=False).iloc[19]:.3f}")
print(f"Using min_support: {min_support_value:.3f}")

# Select top 20 features
top_features = feature_freq.nlargest(20).index.tolist()
df_mining = df[top_features].astype(bool)

print("\nSelected features:")
print(top_features)
print("Reduced dataset shape:", df_mining.shape)

20th most common feature frequency: 0.482
Using min_support: 0.434

Selected features:
['cap-shape_c', 'cap-shape_f', 'cap-shape_k', 'cap-shape_s', 'cap-shape_x', 'cap-surface_g', 'cap-surface_s', 'cap-surface_y', 'cap-color_c', 'cap-color_e', 'cap-color_g', 'cap-color_n', 'cap-color_p', 'cap-color_r', 'cap-color_u', 'cap-color_w', 'cap-color_y', 'bruises_t', 'odor_c', 'odor_f']
Reduced dataset shape: (8124, 20)


In [None]:
frequent_itemsets = apriori(
    df_mining,
    min_support=min_support_value,
    use_colnames=True,
    max_len=3,
    low_memory=True
)
if frequent_itemsets.empty:
    print("\n⚠️ No frequent itemsets found. Trying lower support threshold...")
    frequent_itemsets = apriori(
        df_mining,
        min_support=min_support_value * 0.7,  # Lower threshold
        use_colnames=True,
        max_len=3,
        low_memory=True
    )

print(f"\nFound {len(frequent_itemsets)} frequent itemsets")
if not frequent_itemsets.empty:
    display(frequent_itemsets.sort_values('support', ascending=False).head(10))


Found 1350 frequent itemsets


Unnamed: 0,support,itemsets
1349,0.482029,"(odor_c, bruises_t, odor_f)"
0,0.482029,(cap-shape_c)
1,0.482029,(cap-shape_f)
2,0.482029,(cap-shape_k)
3,0.482029,(cap-shape_s)
4,0.482029,(cap-shape_x)
1333,0.482029,"(cap-color_w, odor_f, cap-color_u)"
1332,0.482029,"(cap-color_w, odor_c, cap-color_u)"
1331,0.482029,"(cap-color_w, bruises_t, cap-color_u)"
1330,0.482029,"(cap-color_y, cap-color_w, cap-color_u)"


In [None]:
#  Generated association rules
if not frequent_itemsets.empty:
    rules = association_rules(
        frequent_itemsets,
        metric='confidence',
        min_threshold=0.5
    )

    if not rules.empty:
        print(f"Generated {len(rules)} association rules")

        # Process and show top 10 rules
        top_rules = rules.sort_values(
            by=['confidence', 'lift'],
            ascending=False
        ).head(10)

        top_rules['antecedents'] = top_rules['antecedents'].apply(lambda x: ', '.join(list(x)))
        top_rules['consequents'] = top_rules['consequents'].apply(lambda x: ', '.join(list(x)))

        result = top_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
        result.reset_index(drop=True, inplace=True)

        print("\nTop 10 Association Rules:")
        display(result)
    else:
        print("No association rules generated. Try lowering min_threshold.")
else:
    print("Cannot generate rules - no frequent itemsets found.")

Generated 7220 association rules

Top 10 Association Rules:


Unnamed: 0,antecedents,consequents,support,confidence,lift
0,cap-shape_c,cap-shape_f,0.482029,1.0,2.074566
1,cap-shape_f,cap-shape_c,0.482029,1.0,2.074566
2,cap-shape_c,cap-shape_k,0.482029,1.0,2.074566
3,cap-shape_k,cap-shape_c,0.482029,1.0,2.074566
4,cap-shape_c,cap-shape_s,0.482029,1.0,2.074566
5,cap-shape_s,cap-shape_c,0.482029,1.0,2.074566
6,cap-shape_c,cap-shape_x,0.482029,1.0,2.074566
7,cap-shape_x,cap-shape_c,0.482029,1.0,2.074566
8,cap-shape_c,cap-surface_g,0.482029,1.0,2.074566
9,cap-surface_g,cap-shape_c,0.482029,1.0,2.074566
