In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# ECS766 Coursework 2 - Elliot Linsey

## Q1:

Computing the support of every subset of an itemset can lead to numbers too large for a computer to compute or store. This is because if we have a large itemset, then we have to compute every combination of items within to calculate every support. The equation to calculate the number of itemsets within a large itemset is $2^n-1$, with n being the number of items within your itemset. If we had an itemset of 100 items, the equation would be $2^{100}-1 \approx 1.27\times10^{30}$, this resulting number is too large to be handled by the computer.  

The apriori algorithm is used to reduce the amount of calculations involved by using apriori knowledge that the subset of a frequent itemset is also frequent. Therefore, if an itemset in $L_k$ is frequent then the corresponding subsets are also frequent. Using this knowledge, it can find the initial candidate itemsets, $C_1$, use this to find the frequent itemsets $L_1$ by comparing with the previously decided support count. Then, generate the next set of candidates $C_2$, or superset, of the original candidates by joining $L_1 \Join L_1$. After joining, you create $L_2$ by pruning the candidates of $C_2$ that lack frequent itemsets. As you are removing infrequent itemsets as you go through the algorithm, you do not need to calculate every single itemset support as you only keep the itemsets that meet your predetermined support level. 

## Q2:

If $L_1$ contains the initial frequent itemsets, then every $k\geq2$ must be a superset of $L_1$ because the apriori rule has the attribute of antimonotonicity. This refers to the fact that if a set cannot pass the support test, then all supersets will also fail. Therefore, for all sets in $k\geq2$, they must be made from an itemset of $L_1$ (that passes the support test), otherwise they too will not pass the support test. 

## Q3:

$C_3$ = {{1,2,4},{2,3,4}}

## Q4:

As $S_2$ is a subset of $S_1$, therefore the support of $S_2$ will be $\geq$ the support of $S_1$

## Q5:

0.8???

## Q6:

6

## Q7:

In [2]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]
'Kidney Beans' in dataset[0] and 'Eggs' in dataset[0]

True

In [3]:
def K_measure(dataset,A,B):
    count1 = 0
    count2 = 0
    count3 = 0
    for x in dataset:
        if set(A).issubset(set(x)) and set(B).issubset(set(x)):
            count1 += 1
        if set(A).issubset(set(x)):
            count2 += 1
        if set(B).issubset(set(x)):
            count3 += 1       
    supportAB = count1/len(dataset)
    conA = count1/count2
    conB = count1/count3
    return (conA+conB)/2
print('Kulczynski({Onion}, {Kidney Beans, Eggs}): ' + str(K_measure(dataset, ['Onion'], ['Kidney Beans','Eggs'])))     

Kulczynski({Onion}, {Kidney Beans, Eggs}): 0.875


## Q8:

In [4]:
def imb_ratio(dataset,A,B):
    count1 = 0
    count2 = 0
    count3 = 0
    for x in dataset:
        if set(A).issubset(set(x)) and set(B).issubset(set(x)):
            count1 += 1
        if set(A).issubset(set(x)):
            count2 += 1
        if set(B).issubset(set(x)):
            count3 += 1       
    supportA = count2/len(dataset)
    supportB = count3/len(dataset)
    supportAUB = count1/len(dataset)
    #print(supportA,supportB,supportAUB)
    conA = count1/count2
    conB = count1/count3
    return abs(supportA-supportB)/(supportA+supportB-supportAUB)
print('Imbalance Ratio({Onion}, {Kidney Beans, Eggs}: ' + str(imb_ratio(dataset, ['Onion'], ['Kidney Beans','Eggs'])))
#imb_ratio(dataset2, ['Eggs'], ['Kidney Beans'])  

Imbalance Ratio({Onion}, {Kidney Beans, Eggs}: 0.2500000000000001
