In [228]:
import pandas as pd
from itertools import combinations
from collections import defaultdict


### Take user inputs


In [None]:

file_name = input("Enter the CSV file name (with .csv extension): ")
# 0.2
confidence_threshold = float(input("Enter the confidence threshold (e.g., 0.20): "))
# 10
sup_count = int(input("Enter the minimum support count: "))



### Load the dataset

In [230]:
df = pd.read_csv(file_name)

# Remove rows with NaN values
df = df.dropna()

# Remove rows with duplicate values
df = df.drop_duplicates()


# Get the number of rows 
print(len(df))



38006


### Generate Frequent 1 itemset 

In [231]:


# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])


# Create Transaction_ID with the format "Member_number_Date"
df['Transaction_ID'] = df['Member_number'].astype(str) + "_" + df['Date'].astype(str)



# Create vertical data format (item → set of transactions)
vertical_data = defaultdict(set)


# Show the number of the transactions
num_transactions = df['Transaction_ID'].nunique()
print("Number of unique transactions:", num_transactions)

# Create the vertical data 
for item, txn in zip(df['itemDescription'], df['Transaction_ID']):
    vertical_data[item].add(txn)

# Convert to dictionary and print sample
vertical_data = dict(vertical_data)
print(len(vertical_data))

# Create a copy from vertical data to use in the confidance calculation
one_item_vertical_data = vertical_data.copy()
print(len(one_item_vertical_data))


Number of unique transactions: 14963
167
167


### Generate Candidate 1 itemset 

In [232]:

# Filter vertical data based on support count
filtered_vertical_data = {}
for item, transactions in vertical_data.items():
    if len(transactions) >= sup_count:
        filtered_vertical_data[item] = transactions


### Generate The frequent item sets 

In [233]:

# variable to store number of combinations
max_k = 2


# Create a copy of the filtered vertical data 
filtered_vertical_data_copy = filtered_vertical_data.copy()



isCompleted = True

# Loop until no more combinations can be generated
while isCompleted:
    
    # Generate combinations of items based on the current maximum size max_k
    all_items = list(filtered_vertical_data.keys())  # List of all item names

    # Generate combinations of size max_k

    itemsets = combinations(all_items, max_k)  
    
    

    # Create a new map to store the item combinations and their common transactions
    new_filtered_vertical_data = {}

    # For each item combination, calculate the intersection of transactions
    for itemset in itemsets:

        # Fetch the transaction sets for each item in the combination
        transaction_sets = [filtered_vertical_data[item] for item in itemset]
        
        # Find the intersection of all transaction sets (common transactions)
        common_transactions = set.intersection(*transaction_sets)
        

        # If the combination greater than support count, add it to the new map
        if len(common_transactions) >= sup_count:
            new_filtered_vertical_data[itemset] = common_transactions
       

    # Check if we reached zero length, and if so, restore the previous state
    if len(new_filtered_vertical_data) == 0:
        filtered_vertical_data = filtered_vertical_data_copy
        isCompleted = False
        break  
    
    filtered_vertical_data_copy = new_filtered_vertical_data
    
    # Increase item set size for the next iteration
    max_k += 1  


### Print The frequent item sets 

In [234]:
for key in filtered_vertical_data.keys():
    print(key)


('tropical fruit', 'whole milk', 'other vegetables')
('tropical fruit', 'whole milk', 'rolls/buns')
('tropical fruit', 'whole milk', 'yogurt')
('tropical fruit', 'whole milk', 'soda')
('whole milk', 'pip fruit', 'rolls/buns')
('whole milk', 'other vegetables', 'rolls/buns')
('whole milk', 'other vegetables', 'frankfurter')
('whole milk', 'other vegetables', 'yogurt')
('whole milk', 'other vegetables', 'sausage')
('whole milk', 'other vegetables', 'root vegetables')
('whole milk', 'other vegetables', 'pastry')
('whole milk', 'other vegetables', 'soda')
('whole milk', 'rolls/buns', 'citrus fruit')
('whole milk', 'rolls/buns', 'yogurt')
('whole milk', 'rolls/buns', 'sausage')
('whole milk', 'rolls/buns', 'pastry')
('whole milk', 'rolls/buns', 'canned beer')
('whole milk', 'rolls/buns', 'soda')
('whole milk', 'rolls/buns', 'bottled beer')
('whole milk', 'citrus fruit', 'yogurt')
('whole milk', 'bottled water', 'soda')
('whole milk', 'yogurt', 'sausage')
('whole milk', 'yogurt', 'root veget

### Generate and print The strong association rules 

In [235]:



# Function to generate all non-empty subsets of a set
def get_subsets(itemset):
    subsets = []
    for i in range(1, len(itemset)):
        subsets.extend(combinations(itemset, i))
    return subsets



# Function to calculate support for a given itemset
def calculate_support(itemset, filtered_vertical_data):
    support_count = 0
    # Fetch the transaction sets for each item in the combination
    transaction_sets = [filtered_vertical_data[item] for item in itemset]

    
    # Find the intersection of all transaction sets (common transactions)
    common_transactions = set.intersection(*transaction_sets)
    
    # If the combination has common transactions, add it to the new map
    if len(common_transactions) > 0:
        support_count += len(common_transactions)

        
       
    return support_count


# Generate strong association rules
def generate_association_rules(filtered_vertical_data):
    rules = []
    
    # Iterate over all itemsets (frequent itemsets)
    for itemset, transactions in filtered_vertical_data.items():
        itemset_size = len(itemset)
        
        if itemset_size > 1:  

            # Get all non-empty subsets of the itemset
            subsets = get_subsets(itemset)
            
            for subset in subsets:
                # X is the subset and Y is the complement (rest of the items in the itemset)
                X = set(subset)
                Y = set(itemset) - X
                
                # Calculate support for X and X ∪ Y
                # For X, count how many transactions contain all items in X
                # and take the one_item vertical data 
                support_X = calculate_support(X, one_item_vertical_data)
                # Support for the entire itemset (X ∪ Y)
                support_XY = len(transactions)  
                # Calculate confidence
                confidence = support_XY / support_X if support_X > 0 else 0
                
                # If confidence is greater than or equal to the threshold, we have a strong rule
                if confidence >= confidence_threshold:
                    rule = {
                        'Rule': f"{X} => {Y}",
                        'SupportAll': support_XY,
                        'Support' : support_X,
                        'Confidence': confidence
                    }
                    rules.append(rule)
    
    return rules

# Now generate the strong association rules from filtered_vertical_data
strong_rules = generate_association_rules(filtered_vertical_data)

# Print the strong association rules
print("\nStrong Association Rules:")
for rule in strong_rules:
    print(f"Rule: {rule['Rule']}, SupportAll: {rule['SupportAll']}, Support : {rule['Support']} ,Confidence: {rule['Confidence']}")



Strong Association Rules:
Rule: {'rolls/buns', 'sausage'} => {'whole milk'}, SupportAll: 17, Support : 80 ,Confidence: 0.2125
Rule: {'yogurt', 'sausage'} => {'whole milk'}, SupportAll: 22, Support : 86 ,Confidence: 0.2558139534883721
Rule: {'pastry', 'sausage'} => {'whole milk'}, SupportAll: 11, Support : 48 ,Confidence: 0.22916666666666666
Rule: {'pastry', 'soda'} => {'whole milk'}, SupportAll: 14, Support : 61 ,Confidence: 0.22950819672131148
