# Inputs from user

In [1]:
print("Welcome!")

Welcome!


## Dataset selection input

In [2]:
dataset_mapping = {
    "1": "amazon",
    "2": "bestbuy",
    "3": "kmart",
    "4": "nike",
    "5": "custom"
}
min_dataset_index = list(dataset_mapping.keys())[0]
max_dataset_index = list(dataset_mapping.keys())[-1]
print("Available datasets: ")
for key, value in dataset_mapping.items():
    print(f"{key}. {value}")

while True:
    dataset_index = input(
        f"Choose a dataset to run the algorithm on ({min_dataset_index}-{max_dataset_index}): "
    )
    if dataset_index not in dataset_mapping.keys():
        print("Invalid dataset")
        print("Try again!")
        continue
    print(f"You have selected {dataset_mapping[dataset_index].title()} dataset")
    break

Available datasets: 
1. amazon
2. bestbuy
3. kmart
4. nike
5. custom
You have selected Custom dataset


## Min support input

In [3]:
while(True):
    try:
        min_support = float(input("Enter minimum support in % (1-100): "))
        if min_support < 1 or min_support > 100:
            print("Invalid value for minimum support")
            continue
        print(f"You have selected minimum support of {min_support}%")
        min_support = min_support / 100
        break
    except ValueError:
        print("Invalid value for minimum support")
        print("Try again!")

You have selected minimum support of 20.0%


## Min confidence input

In [4]:
while True:
    try:
        min_confidence = float(input("Enter minimum confidence in % (1-100): "))
        if min_confidence < 1 or min_confidence > 100:
            print("Invalid value for minimum confidence")
            continue
        print(f"You have selected minimum confidence of {min_confidence}%")
        min_confidence = min_confidence / 100
        break
    except ValueError:
        print("Invalid value for minimum confidence")
        print("Try again!")

You have selected minimum confidence of 40.0%


# Load the dataset

In [5]:
import pandas as pd

transactions_file = f"./data/{dataset_mapping[dataset_index]}-transactions.csv"
transactions_df = pd.read_csv(transactions_file)
items_file = f"./data/{dataset_mapping[dataset_index]}-items.csv"
items_df = pd.read_csv(items_file)

In [6]:
items_df

Unnamed: 0,Items
0,BISCUIT
1,BOURNVITA
2,BREAD
3,COKE
4,COFFEE
5,CORNFLAKES
6,JAM
7,MAGGI
8,MILK
9,SUGER


In [7]:
pd.options.display.max_colwidth = 100
transactions_df

Unnamed: 0,Transaction ID,Transaction
0,0,"MILK, BREAD, BISCUIT"
1,1,"BREAD, MILK, BISCUIT, CORNFLAKES"
2,2,"BREAD, TEA, BOURNVITA"
3,3,"JAM, MAGGI, BREAD, MILK"
4,4,"MAGGI, TEA, BISCUIT"
5,5,"BREAD, TEA, BOURNVITA"
6,6,"MAGGI, TEA, CORNFLAKES"
7,7,"MAGGI, BREAD, TEA, BISCUIT"
8,8,"JAM, MAGGI, BREAD, TEA"
9,9,"BREAD, MILK"


# Run the brute force algorithm

In [8]:
from bruteforce import bruteforce_algorithm
from datetime import datetime

brute_start_time = datetime.now()
bruteforce_algorithm(
    set(items_df["Items"].tolist()),
    transactions_df["Transaction"].apply(lambda t: t.split(", ")).apply(sorted).tolist(),
    min_support,
    min_confidence,
)
brute_end_time = datetime.now()



Final Association Rules:

Rule 1: {'CORNFLAKES'} -> {'COFFEE'}
Confidence: 71.43%
Support: 23.81%

Rule 2: {'COFFEE'} -> {'CORNFLAKES'}
Confidence: 55.56%
Support: 23.81%

Rule 3: {'TEA'} -> {'BREAD'}
Confidence: 62.50%
Support: 23.81%

Rule 4: {'MILK'} -> {'BREAD'}
Confidence: 83.33%
Support: 23.81%



# Run the apriori algorithm

In [9]:
from library import get_frequent_itemset_lib_algos

apriori_start_time = datetime.now()
lib_apriori_association_rules = get_frequent_itemset_lib_algos(
    transactions_df["Transaction"].apply(lambda t: t.split(", ")).apply(sorted).tolist(),
    min_support,
    min_confidence,
    algorithm="apriori",
)
apriori_end_time = datetime.now()



Association Rules from library apriori algorithm:

Rule 1: {'MILK'} -> {'BREAD'}
Confidence: 83.33%
Support: 23.81%

Rule 2: {'TEA'} -> {'BREAD'}
Confidence: 62.50%
Support: 23.81%

Rule 3: {'CORNFLAKES'} -> {'COFFEE'}
Confidence: 71.43%
Support: 23.81%

Rule 4: {'COFFEE'} -> {'CORNFLAKES'}
Confidence: 55.56%
Support: 23.81%



# Run the fpgrowth algorithm

In [10]:
fpgrowth_start_time = datetime.now()
lib_fpgrowth_association_rules = get_frequent_itemset_lib_algos(
    transactions_df["Transaction"].apply(lambda t: t.split(", ")).apply(sorted).tolist(),
    min_support,
    min_confidence,
    algorithm="fpgrowth",
)
fpgrowth_end_time = datetime.now()



Association Rules from library fpgrowth algorithm:

Rule 1: {'MILK'} -> {'BREAD'}
Confidence: 83.33%
Support: 23.81%

Rule 2: {'CORNFLAKES'} -> {'COFFEE'}
Confidence: 71.43%
Support: 23.81%

Rule 3: {'COFFEE'} -> {'CORNFLAKES'}
Confidence: 55.56%
Support: 23.81%

Rule 4: {'TEA'} -> {'BREAD'}
Confidence: 62.50%
Support: 23.81%



# Time comparison analysis

In [11]:
print("Time comparison analysis")
print("Bruteforce algorithm: ", brute_end_time - brute_start_time)
print("Apriori algorithm: ", apriori_end_time - apriori_start_time)
print("Fpgrowth algorithm: ", fpgrowth_end_time - fpgrowth_start_time)

Time comparison analysis
Bruteforce algorithm:  0:00:00.000700
Apriori algorithm:  0:00:00.004211
Fpgrowth algorithm:  0:00:00.002196
