In [16]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

# Load the dataset
url = '/content/Online retail.xlsx'  # Replace with your file path or URL
df = pd.read_excel(url, header=None, engine='openpyxl')

# Preprocess the data
# Split the items in each transaction into a list
df[0] = df[0].apply(lambda x: x.split(','))

# Convert the list of transactions into the format needed for association rule mining
transactions = df[0].tolist()

# Apply Transaction Encoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)


# Handle missing values by dropping them
df.dropna()


# Remove duplicates
df.drop_duplicates(inplace=True)

# Convert 'InvoiceNo' to string type for consistency
df['InvoiceNo'] = df['InvoiceNo'].astype(str)

# Filter out cancellations (InvoiceNo starting with 'C')
df = df[~df['InvoiceNo'].str.startswith('C')]

# Display the cleaned data
df.head()


In [17]:
from mlxtend.frequent_patterns import apriori, association_rules

# Apply Apriori algorithm
frequent_itemsets = apriori(df_encoded, min_support=0.01, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Display the rules
rules_sorted = rules.sort_values(by='lift', ascending=False)
print(rules_sorted.head(10))  # Display top 10 rules


                    antecedents                 consequents  \
214             (herb & pepper)               (ground beef)   
215               (ground beef)             (herb & pepper)   
383  (spaghetti, mineral water)               (ground beef)   
386               (ground beef)  (spaghetti, mineral water)   
398                 (olive oil)  (spaghetti, mineral water)   
395  (spaghetti, mineral water)                 (olive oil)   
193                  (tomatoes)         (frozen vegetables)   
192         (frozen vegetables)                  (tomatoes)   
189                    (shrimp)         (frozen vegetables)   
188         (frozen vegetables)                    (shrimp)   

     antecedent support  consequent support   support  confidence      lift  \
214            0.049460            0.098254  0.015998    0.323450  3.291994   
215            0.098254            0.049460  0.015998    0.162822  3.291994   
383            0.059725            0.098254  0.017064    0.285714  2.

In [18]:
# Analyze the generated rules
print("Top 10 Association Rules:")
print(rules_sorted.head(10))

# Interpretation
# Example: Analyze some of the generated rules
for idx, row in rules_sorted.head(10).iterrows():
    print(f"Rule: {', '.join(list(row['antecedents']))} -> {', '.join(list(row['consequents']))}")
    print(f"Support: {row['support']:.2f}, Confidence: {row['confidence']:.2f}, Lift: {row['lift']:.2f}")
    print("-" * 50)


Top 10 Association Rules:
                    antecedents                 consequents  \
214             (herb & pepper)               (ground beef)   
215               (ground beef)             (herb & pepper)   
383  (spaghetti, mineral water)               (ground beef)   
386               (ground beef)  (spaghetti, mineral water)   
398                 (olive oil)  (spaghetti, mineral water)   
395  (spaghetti, mineral water)                 (olive oil)   
193                  (tomatoes)         (frozen vegetables)   
192         (frozen vegetables)                  (tomatoes)   
189                    (shrimp)         (frozen vegetables)   
188         (frozen vegetables)                    (shrimp)   

     antecedent support  consequent support   support  confidence      lift  \
214            0.049460            0.098254  0.015998    0.323450  3.291994   
215            0.098254            0.049460  0.015998    0.162822  3.291994   
383            0.059725            0.098254

  and should_run_async(code)


In [19]:
# Analyze the generated rules
print("Top 10 Association Rules:")
print(rules_sorted.head(10))

# Interpretation
# Example: Analyze some of the generated rules
for idx, row in rules_sorted.head(10).iterrows():
    print(f"Rule: {', '.join(list(row['antecedents']))} -> {', '.join(list(row['consequents']))}")
    print(f"Support: {row['support']:.2f}, Confidence: {row['confidence']:.2f}, Lift: {row['lift']:.2f}")
    print("-" * 50)


Top 10 Association Rules:
                    antecedents                 consequents  \
214             (herb & pepper)               (ground beef)   
215               (ground beef)             (herb & pepper)   
383  (spaghetti, mineral water)               (ground beef)   
386               (ground beef)  (spaghetti, mineral water)   
398                 (olive oil)  (spaghetti, mineral water)   
395  (spaghetti, mineral water)                 (olive oil)   
193                  (tomatoes)         (frozen vegetables)   
192         (frozen vegetables)                  (tomatoes)   
189                    (shrimp)         (frozen vegetables)   
188         (frozen vegetables)                    (shrimp)   

     antecedent support  consequent support   support  confidence      lift  \
214            0.049460            0.098254  0.015998    0.323450  3.291994   
215            0.098254            0.049460  0.015998    0.162822  3.291994   
383            0.059725            0.098254

  and should_run_async(code)
