In [2]:
import numpy as np
import pandas as pd
import math

In [3]:
# Reading data from file and appending it with gene identifiers

file_data = pd.read_csv('associationruletestdata.txt', sep='\t', header=None,
                        index_col=None)

In [4]:
record_count = file_data.shape[0]  # number of rows
attribute_count = file_data.shape[1]  # number of columns

In [5]:
# Changing the column index to start from 1 instead of 0
file_data.columns = np.arange(1, attribute_count + 1)

for i in range(1, file_data.shape[1]):
    file_data[i] = 'G' + str(i) + '_' + file_data[i]

print(file_data)

        1        2        3        4        5        6        7        8    \
0     G1_Up    G2_Up  G3_Down    G4_Up  G5_Down    G6_Up    G7_Up  G8_Down   
1     G1_Up  G2_Down    G3_Up  G4_Down    G5_Up  G6_Down  G7_Down  G8_Down   
2   G1_Down  G2_Down    G3_Up    G4_Up    G5_Up    G6_Up  G7_Down    G8_Up   
3   G1_Down  G2_Down  G3_Down  G4_Down  G5_Down  G6_Down  G7_Down    G8_Up   
4     G1_Up    G2_Up  G3_Down  G4_Down  G5_Down  G6_Down    G7_Up    G8_Up   
..      ...      ...      ...      ...      ...      ...      ...      ...   
95  G1_Down    G2_Up  G3_Down    G4_Up  G5_Down    G6_Up    G7_Up    G8_Up   
96    G1_Up  G2_Down  G3_Down    G4_Up    G5_Up    G6_Up  G7_Down    G8_Up   
97    G1_Up  G2_Down    G3_Up  G4_Down    G5_Up    G6_Up  G7_Down    G8_Up   
98  G1_Down    G2_Up    G3_Up  G4_Down    G5_Up  G6_Down    G7_Up    G8_Up   
99  G1_Down    G2_Up  G3_Down    G4_Up  G5_Down  G6_Down    G7_Up    G8_Up   

        9         10   ...       92        93        94        

In [6]:
# Converting the input data into an array of sample sets
# Converted data to sets for intersection operation while matching itemsets

# Converting input data table into array of arrays, eliminating the last column (disease)
file_data_arr = file_data.values[:,:attribute_count - 1]
# print(file_data_arr)

sample_sets = []
for i in file_data_arr:
    sample_sets.append(set(i))
    
print(sample_sets)

[{'G86_Down', 'G47_Down', 'G100_Down', 'G20_Up', 'G75_Down', 'G83_Up', 'G95_Up', 'G59_Up', 'G79_Down', 'G94_Down', 'G60_Down', 'G65_Up', 'G42_Down', 'G49_Up', 'G30_Up', 'G96_Down', 'G22_Up', 'G7_Up', 'G58_Down', 'G89_Down', 'G24_Down', 'G33_Up', 'G48_Up', 'G98_Down', 'G53_Down', 'G76_Down', 'G80_Up', 'G31_Down', 'G23_Up', 'G39_Up', 'G9_Down', 'G67_Down', 'G92_Up', 'G84_Up', 'G90_Down', 'G35_Down', 'G56_Down', 'G52_Down', 'G27_Down', 'G77_Up', 'G15_Down', 'G54_Down', 'G62_Down', 'G72_Up', 'G88_Up', 'G38_Up', 'G43_Down', 'G69_Down', 'G97_Down', 'G99_Up', 'G71_Down', 'G25_Down', 'G1_Up', 'G55_Up', 'G50_Down', 'G6_Up', 'G64_Down', 'G41_Up', 'G51_Down', 'G13_Down', 'G44_Down', 'G87_Down', 'G34_Down', 'G16_Down', 'G18_Up', 'G78_Down', 'G68_Up', 'G5_Down', 'G17_Up', 'G3_Down', 'G37_Down', 'G10_Up', 'G12_Up', 'G66_Up', 'G32_Up', 'G45_Up', 'G85_Up', 'G57_Down', 'G73_Down', 'G19_Down', 'G93_Up', 'G82_Down', 'G74_Up', 'G40_Up', 'G29_Down', 'G2_Up', 'G28_Down', 'G11_Up', 'G4_Up', 'G26_Down', 'G61_

In [11]:
item_sets = []

for i in range(1, attribute_count):
    item_sets.append(set(['G' + str(i) + '_Up']))
    item_sets.append(set(['G' + str(i) + '_Down']))
print(item_sets)

[{'G1_Up'}, {'G1_Down'}, {'G2_Up'}, {'G2_Down'}, {'G3_Up'}, {'G3_Down'}, {'G4_Up'}, {'G4_Down'}, {'G5_Up'}, {'G5_Down'}, {'G6_Up'}, {'G6_Down'}, {'G7_Up'}, {'G7_Down'}, {'G8_Up'}, {'G8_Down'}, {'G9_Up'}, {'G9_Down'}, {'G10_Up'}, {'G10_Down'}, {'G11_Up'}, {'G11_Down'}, {'G12_Up'}, {'G12_Down'}, {'G13_Up'}, {'G13_Down'}, {'G14_Up'}, {'G14_Down'}, {'G15_Up'}, {'G15_Down'}, {'G16_Up'}, {'G16_Down'}, {'G17_Up'}, {'G17_Down'}, {'G18_Up'}, {'G18_Down'}, {'G19_Up'}, {'G19_Down'}, {'G20_Up'}, {'G20_Down'}, {'G21_Up'}, {'G21_Down'}, {'G22_Up'}, {'G22_Down'}, {'G23_Up'}, {'G23_Down'}, {'G24_Up'}, {'G24_Down'}, {'G25_Up'}, {'G25_Down'}, {'G26_Up'}, {'G26_Down'}, {'G27_Up'}, {'G27_Down'}, {'G28_Up'}, {'G28_Down'}, {'G29_Up'}, {'G29_Down'}, {'G30_Up'}, {'G30_Down'}, {'G31_Up'}, {'G31_Down'}, {'G32_Up'}, {'G32_Down'}, {'G33_Up'}, {'G33_Down'}, {'G34_Up'}, {'G34_Down'}, {'G35_Up'}, {'G35_Down'}, {'G36_Up'}, {'G36_Down'}, {'G37_Up'}, {'G37_Down'}, {'G38_Up'}, {'G38_Down'}, {'G39_Up'}, {'G39_Down'}, {'G

In [13]:

def main():
    min_support_values = [30, 40, 50, 60, 70]
    
    #frequent itemset for every support value. A dict with support value as key and a set of sets as value
    freq_itemsets = {}
    for min_support in min_support_values:
        print('\nSupport is set to be ' + str(min_support) + '%')
        f_itemsets_for_each_support = {}
        flag = True  # Flag used to determine when to stop
        length = 1  # length of frequent item sets
        item_set_support = []
        for items in item_sets:
            count = 0
            for sample in sample_items:
                if(len(sample.intersection(item) == len(item))):
                    count += 1
            sup = round((count * 100 / record_count))
            item_set_support.append(sup)
            freq_item_sets = []

            for index, sup in enumerate(item_set_support):
                if sup >= min_support:
                    freq_item_sets.append(item_sets[index])
        
#         while flag:
            
#             f_itemsets_for_each_support[length] = generateCandidateKPlusOne(f_itemsets[length-1])

#             for index, sup in enumerate(item_set_support):
#                 if sup >= min_support:
#                     freq_item_sets.append(item_sets[index])

#             sum += len(freq_item_sets)
#             if len(freq_item_sets) != 0:
#                 print('number of length-' + str(length) + ' frequent itemsets: ' + str(len(freq_item_sets)))
#             else:
#                 print('number of all lengths frequent itemsets:' + str(sum))
#                 sum = 0

#             if len(freq_item_sets) == 0:
#                 flag = False;
#             else:
#                 length += 1
#                 item_sets = generate_merge_sets(freq_item_sets, length)
