In [2]:
# Step 1: Import necessary libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# Step 2: Upload the file
from google.colab import files

# Prompt user to upload the dataset
print("Please upload the Online_Retail.xlsx file.")
uploaded = files.upload()

# Step 3: Load the dataset
file_name = list(uploaded.keys())[0]  # Get the uploaded file name
df = pd.read_excel(file_name)

# Display the first few rows of the dataset
print("First 5 rows of the dataset:")
print(df.head())

# Step 4: Data cleanup
df['Description'] = df['Description'].str.strip()  # Remove extra spaces in the 'Description' column
df.dropna(axis=0, subset=['InvoiceNo'], inplace=True)  # Drop rows without InvoiceNo
df['InvoiceNo'] = df['InvoiceNo'].astype('str')  # Ensure InvoiceNo is a string
df = df[~df['InvoiceNo'].str.contains('C')]  # Remove credit transactions

# Step 5: Consolidate items into a single transaction per row
basket = (df[df['Country'] == "France"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum()
          .unstack()
          .reset_index()
          .fillna(0)
          .set_index('InvoiceNo'))

print("\nPreview of the consolidated basket dataset:")
print(basket.head())

# Step 6: Convert quantities to binary (1 for presence, 0 for absence)
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

basket_sets = basket.applymap(encode_units)

# Remove 'POSTAGE' column if it exists (not meaningful for analysis)
if 'POSTAGE' in basket_sets.columns:
    basket_sets.drop('POSTAGE', inplace=True, axis=1)

print("\nBasket set after encoding (binary representation):")
print(basket_sets.head())

# Step 7: Apply Apriori algorithm
frequent_itemsets = apriori(basket_sets, min_support=0.07, use_colnames=True)

print("\nFrequent Itemsets:")
print(frequent_itemsets)

# Step 8: Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

print("\nGenerated Association Rules:")
print(rules.head())

# Step 9: Filter rules with lift >= 6 and confidence >= 0.8
filtered_rules = rules[(rules['lift'] >= 6) & (rules['confidence'] >= 0.8)]

print("\nFiltered Rules (lift >= 6 and confidence >= 0.8):")
print(filtered_rules)

# Observations
print("\nObservations:")
print("1. A few rules with high lift values indicate items bought together more frequently than expected.")
print("2. Most rules also have high confidence values.")


Please upload the Online_Retail.xlsx file.


  and should_run_async(code)


Saving Online_Retail.xlsx to Online_Retail (1).xlsx
First 5 rows of the dataset:
  InvoiceNo StockCode                          Description  Quantity  \
0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   
1    536365     71053                  WHITE METAL LANTERN         6   
2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   
3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   
4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   

          InvoiceDate  UnitPrice  CustomerID         Country  
0 2010-12-01 08:26:00       2.55     17850.0  United Kingdom  
1 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  
2 2010-12-01 08:26:00       2.75     17850.0  United Kingdom  
3 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  
4 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  

Preview of the consolidated basket dataset:
Description  10 COLOUR SPACEBOY PEN  12 COLOURED PARTY BALLOONS

  basket_sets = basket.applymap(encode_units)



Basket set after encoding (binary representation):
Description  10 COLOUR SPACEBOY PEN  12 COLOURED PARTY BALLOONS  \
InvoiceNo                                                         
536370                            0                           0   
536852                            0                           0   
536974                            0                           0   
537065                            0                           0   
537463                            0                           0   

Description  12 EGG HOUSE PAINTED WOOD  12 MESSAGE CARDS WITH ENVELOPES  \
InvoiceNo                                                                 
536370                               0                                0   
536852                               0                                0   
536974                               0                                0   
537065                               0                                0   
537463                      



TypeError: association_rules() missing 1 required positional argument: 'num_itemsets'

In [4]:
# Step 1: Import necessary libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# Step 2: Upload the file
from google.colab import files

# Prompt user to upload the dataset
print("Please upload the Online_Retail.xlsx file.")
uploaded = files.upload()

# Step 3: Load the dataset
file_name = list(uploaded.keys())[0]  # Get the uploaded file name
df = pd.read_excel(file_name)

# Display the first few rows of the dataset
print("First 5 rows of the dataset:")
print(df.head())

# Step 4: Data cleanup
df['Description'] = df['Description'].str.strip()  # Remove extra spaces in the 'Description' column
df.dropna(axis=0, subset=['InvoiceNo'], inplace=True)  # Drop rows without InvoiceNo
df['InvoiceNo'] = df['InvoiceNo'].astype('str')  # Ensure InvoiceNo is a string
df = df[~df['InvoiceNo'].str.contains('C')]  # Remove credit transactions

# Step 5: Consolidate items into a single transaction per row
basket = (df[df['Country'] == "France"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum()
          .unstack()
          .reset_index()
          .fillna(0)
          .set_index('InvoiceNo'))

print("\nPreview of the consolidated basket dataset:")
print(basket.head())

# Step 6: Convert quantities to binary (1 for presence, 0 for absence)
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

basket_sets = basket.applymap(encode_units)

# Remove 'POSTAGE' column if it exists (not meaningful for analysis)
if 'POSTAGE' in basket_sets.columns:
    basket_sets.drop('POSTAGE', inplace=True, axis=1)

print("\nBasket set after encoding (binary representation):")
print(basket_sets.head())

# Step 7: Apply Apriori algorithm with 7% support
frequent_itemsets_7 = apriori(basket_sets, min_support=0.07, use_colnames=True)

print("\nFrequent Itemsets (Support ≥ 7%):")
print(frequent_itemsets_7)

# Step 8: Generate association rules (7% support)
rules_7 = association_rules(frequent_itemsets_7, metric="lift", min_threshold=1.0)

print("\nGenerated Association Rules (Support ≥ 7%):")
print(rules_7.head())

# Step 9: Apply Apriori algorithm with 5% support
frequent_itemsets_5 = apriori(basket_sets, min_support=0.05, use_colnames=True)

print("\nFrequent Itemsets (Support ≥ 5%):")
print(frequent_itemsets_5)

# Step 10: Generate association rules (5% support)
rules_5 = association_rules(frequent_itemsets_5, metric="lift", min_threshold=1.0)

print("\nGenerated Association Rules (Support ≥ 5%):")
print(rules_5.head())

# Step 11: Filter rules with lift ≥ 6 and confidence ≥ 0.8 (for 5% support)
filtered_rules_5 = rules_5[(rules_5['lift'] >= 6) & (rules_5['confidence'] >= 0.8)]

print("\nFiltered Rules (Support ≥ 5%, lift ≥ 6, confidence ≥ 0.8):")
print(filtered_rules_5)

# Observations
print("\nObservations:")
print("1. Frequent itemsets with a 5% support threshold include more combinations.")
print("2. Association rules derived from these itemsets reveal patterns with lower support but significant lift and confidence.")
print("3. High lift values suggest items are bought together more often than expected, especially in strong filtered rules.")


Please upload the Online_Retail.xlsx file.


  and should_run_async(code)


Saving Online_Retail.xlsx to Online_Retail (3).xlsx
First 5 rows of the dataset:
  InvoiceNo StockCode                          Description  Quantity  \
0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   
1    536365     71053                  WHITE METAL LANTERN         6   
2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   
3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   
4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   

          InvoiceDate  UnitPrice  CustomerID         Country  
0 2010-12-01 08:26:00       2.55     17850.0  United Kingdom  
1 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  
2 2010-12-01 08:26:00       2.75     17850.0  United Kingdom  
3 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  
4 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  

Preview of the consolidated basket dataset:
Description  10 COLOUR SPACEBOY PEN  12 COLOURED PARTY BALLOONS

  basket_sets = basket.applymap(encode_units)



Basket set after encoding (binary representation):
Description  10 COLOUR SPACEBOY PEN  12 COLOURED PARTY BALLOONS  \
InvoiceNo                                                         
536370                            0                           0   
536852                            0                           0   
536974                            0                           0   
537065                            0                           0   
537463                            0                           0   

Description  12 EGG HOUSE PAINTED WOOD  12 MESSAGE CARDS WITH ENVELOPES  \
InvoiceNo                                                                 
536370                               0                                0   
536852                               0                                0   
536974                               0                                0   
537065                               0                                0   
537463                      



TypeError: association_rules() missing 1 required positional argument: 'num_itemsets'

In [None]:
home task