🔹 Step 1: Verify the Uploaded File

In [5]:
import os

# List files in the current working directory
print(os.listdir())


['.config', 'TBD3.json', 'sample_data']


 Step 2: Read the JSON File

In [12]:
import json

file_name = "TBD3.json"  # Ensure the file extension is correct!

# Open and load the JSON file
with open("TBD3.json", 'r', encoding='utf-8') as f:
    data = json.load(f)

print("✅ File loaded successfully!")
print("📂 Sample data:", data[:2])  # Display first 2 records for verification


✅ File loaded successfully!
📂 Sample data: [{'items': ['milk', 'bread', 'eggs', 'butter', 'cheese', 'apples']}, {'items': ['bread', 'diapers', 'beer', 'eggs', 'yogurt', 'bananas']}]


📊 Step 3: Convert JSON to a Pandas DataFrame


The Apriori algorithm requires structured data. Let’s convert the JSON file into a Pandas DataFrame.

In [14]:
import pandas as pd

# Convert JSON to DataFrame
df = pd.DataFrame(data)


print("✅ Data converted to DataFrame!")
print(df.head())  # Show the first few rows


✅ Data converted to DataFrame!
                                           items
0    [milk, bread, eggs, butter, cheese, apples]
1  [bread, diapers, beer, eggs, yogurt, bananas]
2     [milk, diapers, beer, cola, apples, chips]
3    [bread, milk, diapers, beer, bananas, soda]
4              [bread, cola, chips, soda, juice]


 Step 4: Preprocess the Data

Apriori works on transactions, so we need to process the data correctly.

In [15]:
# Assuming the dataset contains a column with lists of items bought together
# Adjust column names based on my JSON structure!
df['items'] = df['items'].apply(lambda x: x if isinstance(x, list) else [])

# Display processed transactions
transactions = df['items'].tolist()
print("✅ Transactions ready for Apriori!")
print(transactions[:3])  # Display first 3 transactions


✅ Transactions ready for Apriori!
[['milk', 'bread', 'eggs', 'butter', 'cheese', 'apples'], ['bread', 'diapers', 'beer', 'eggs', 'yogurt', 'bananas'], ['milk', 'diapers', 'beer', 'cola', 'apples', 'chips']]


📈 Step 5: Apply the Apriori Algorithm

using the MLxtend library to apply the Apriori algorithm.

In [16]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Convert transactions to the required format
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Apply Apriori algorithm
min_support = 0.01
frequent_itemsets = apriori(df_encoded, min_support=min_support, use_colnames=True)

print("✅ Frequent itemsets found!")
print(frequent_itemsets)


✅ Frequent itemsets found!
      support                           itemsets
0    0.013158                    (aluminum foil)
1    0.013158                       (antiseptic)
2    0.026316                           (apples)
3    0.013158                           (bagels)
4    0.013158                         (balloons)
..        ...                                ...
563  0.013158  (diapers, usb drives, beer, milk)
564  0.013158     (diapers, vinegar, beer, milk)
565  0.013158         (soda, cola, juice, bread)
566  0.111842       (diapers, bread, eggs, milk)
567  0.013158       (diapers, soda, bread, milk)

[568 rows x 2 columns]


📌 Step 6: Generate Association Rules

generating association rules based on the frequent itemsets.

In [21]:
# Generate association rules
min_confidence = 0.8  # Adjust confidence threshold as needed
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)

print("✅ Association Rules Generated!")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


✅ Association Rules Generated!
                antecedents       consequents   support  confidence      lift
0           (aluminum foil)           (bread)  0.013158         1.0  1.235772
1           (aluminum foil)            (milk)  0.013158         1.0  1.600000
2              (antiseptic)           (bread)  0.013158         1.0  1.235772
3                  (bagels)           (bread)  0.013158         1.0  1.235772
4                  (bagels)            (eggs)  0.013158         1.0  2.763636
..                      ...               ...       ...         ...       ...
666  (diapers, soda, bread)            (milk)  0.013158         1.0  1.600000
667   (diapers, soda, milk)           (bread)  0.013158         1.0  1.235772
668     (soda, milk, bread)         (diapers)  0.013158         1.0  1.747126
669         (diapers, soda)     (milk, bread)  0.013158         1.0  2.171429
670            (soda, milk)  (diapers, bread)  0.013158         1.0  2.171429

[671 rows x 5 columns]


In [22]:
filtered_rules = rules[rules['lift'] > 1.5]
print("✅ Filtered Strong Rules!")
print(filtered_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


✅ Filtered Strong Rules!
                antecedents       consequents   support  confidence      lift
1           (aluminum foil)            (milk)  0.013158         1.0  1.600000
4                  (bagels)            (eggs)  0.013158         1.0  2.763636
5                (balloons)            (beer)  0.013158         1.0  2.171429
6                (balloons)         (diapers)  0.013158         1.0  1.747126
7                (balloons)            (milk)  0.013158         1.0  1.600000
..                      ...               ...       ...         ...       ...
664           (soda, juice)     (cola, bread)  0.013158         1.0  4.470588
666  (diapers, soda, bread)            (milk)  0.013158         1.0  1.600000
668     (soda, milk, bread)         (diapers)  0.013158         1.0  1.747126
669         (diapers, soda)     (milk, bread)  0.013158         1.0  2.171429
670            (soda, milk)  (diapers, bread)  0.013158         1.0  2.171429

[526 rows x 5 columns]


Save the Results to CSV

In [23]:
filtered_rules.to_csv("strong_association_rules.csv", index=False)
print("✅ Rules saved to strong_association_rules.csv!")


✅ Rules saved to strong_association_rules.csv!


In [None]:
from google.colab import drive
drive.mount('/content/drive')