In [1]:
import numpy as np
import pandas as pd
import itertools

## Initializing the transaction baskets

In [2]:
# initialize lists
data = [[1,2,5], [2,3,5], [4,5], [1,6,7], [2,3,5,7], [1,2,7]]
pairs = [list(itertools.combinations(i, 2)) for i in data]
mydict={'Items':data, 'Pairs': pairs}

# Create the pandas DataFrame with column name is provided explicitly
df = pd.DataFrame(mydict)

# print dataframe.
display(df)

Unnamed: 0,Items,Pairs
0,"[1, 2, 5]","[(1, 2), (1, 5), (2, 5)]"
1,"[2, 3, 5]","[(2, 3), (2, 5), (3, 5)]"
2,"[4, 5]","[(4, 5)]"
3,"[1, 6, 7]","[(1, 6), (1, 7), (6, 7)]"
4,"[2, 3, 5, 7]","[(2, 3), (2, 5), (2, 7), (3, 5), (3, 7), (5, 7)]"
5,"[1, 2, 7]","[(1, 2), (1, 7), (2, 7)]"


## Calculating the supports

In [3]:
print(data)

[[1, 2, 5], [2, 3, 5], [4, 5], [1, 6, 7], [2, 3, 5, 7], [1, 2, 7]]


In [4]:
supports = {}
for row in data:
    for e in row:
        if e not in supports:
            supports[e] = 0

# Sorting dictionary
sorted_dict = supports.keys()
suppports = sorted(sorted_dict)
print(supports)

{1: 0, 2: 0, 5: 0, 3: 0, 4: 0, 6: 0, 7: 0}


In [5]:
for row in data:
    for e in row:
        supports[e] += 1

print(supports)

{1: 3, 2: 4, 5: 4, 3: 2, 4: 1, 6: 1, 7: 3}


In [17]:
#| label: tbl-support
#| tbl-cap: Support
item = [ key for key, value in supports.items()]
supp = [ value for key, value in supports.items()]
unique_items_count = {'Itemset': item, 'Sup': supp}

df_item_sup = pd.DataFrame(unique_items_count)
display(df_item_sup)

Unnamed: 0,Itemset,Sup
0,1,3
1,2,4
2,5,4
3,3,2
4,4,1
5,6,1
6,7,3


### Pass 1

In [7]:
def hash_f_pair(i,j):
    return (i*j) % 7

In [8]:
unique_items = []
for row in pairs:
    for e in row:
        if e not in unique_items:
            unique_items.append(e)
print(unique_items)


[(1, 2), (1, 5), (2, 5), (2, 3), (3, 5), (4, 5), (1, 6), (1, 7), (6, 7), (2, 7), (3, 7), (5, 7)]


In [9]:
unique_items[0][1]

2

In [10]:
hash_value_pairs = []
for e in unique_items:
        hash_value_pairs.append(hash_f_pair(e[0],e[1]))
print(hash_value_pairs)

[2, 5, 3, 6, 1, 6, 6, 0, 0, 0, 0, 0]


In [11]:
my_dict2 = {}
for pair in unique_items:
    my_dict2[str(pair)] = 0
print(my_dict2)

{'(1, 2)': 0, '(1, 5)': 0, '(2, 5)': 0, '(2, 3)': 0, '(3, 5)': 0, '(4, 5)': 0, '(1, 6)': 0, '(1, 7)': 0, '(6, 7)': 0, '(2, 7)': 0, '(3, 7)': 0, '(5, 7)': 0}


In [12]:
for row in pairs:
    for e in row:
        my_dict2[str(e)] +=1

print(my_dict2)

{'(1, 2)': 2, '(1, 5)': 1, '(2, 5)': 3, '(2, 3)': 2, '(3, 5)': 2, '(4, 5)': 1, '(1, 6)': 1, '(1, 7)': 2, '(6, 7)': 1, '(2, 7)': 2, '(3, 7)': 1, '(5, 7)': 1}


In [13]:
counts = [ value for key, value in my_dict2.items()]
print(counts)
# df_counts = pd.DataFrame()

[2, 1, 3, 2, 2, 1, 1, 2, 1, 2, 1, 1]


In [14]:
unique_items_count = {'Pairs': unique_items, 'Count': counts}
print(unique_items_count)

{'Pairs': [(1, 2), (1, 5), (2, 5), (2, 3), (3, 5), (4, 5), (1, 6), (1, 7), (6, 7), (2, 7), (3, 7), (5, 7)], 'Count': [2, 1, 3, 2, 2, 1, 1, 2, 1, 2, 1, 1]}


In [19]:
df_counts = pd.DataFrame(unique_items_count)
display(df_counts)

Unnamed: 0,Itemset,Sup
0,1,3
1,2,4
2,5,4
3,3,2
4,4,1
5,6,1
6,7,3


Minium support count is 2 so we eliminate from the Pair list the items that have less than 2 from @tbl-support

In [20]:
print()


