In [1]:
from numpy import *
from itertools import combinations, permutations

In [2]:
sequence = sorted([
    (31, 'E'), (32, 'D'), (33, 'F'), (35, 'A'), (37, 'B'), (38, 'C'), (39, 'E'),
    (40, 'F'), (42, 'C'), (44, 'D'), (46, 'B'), (47, 'A'), (48, 'D'), (50, 'C'),
    (53, 'E'), (54, 'F'), (55, 'C'), (57, 'B'), (58, 'E'), (59, 'A'), (60, 'E'),
    (61, 'C'), (62, 'F'), (65, 'A'), (67, 'D'),
], key=lambda x:x[0])

In [3]:
sequence2 = sorted([
    (10, 'D'), (20, 'C'), (30, 'A'), (40, 'B'), (50, 'D'), (60, 'A'), (70, 'B'), (80, 'C'),
], key=lambda x:x[0])

In [4]:
def slidingWindow(sequence, width, step=1):
    windows = [list(sequence[0][1])]
    t_end = sequence[0][0] + step
    t_start = t_end - width
    # number of windows = (Te - Ts + width - step)/step = (te_max - ts_min + width)/step
    noWins = int((sequence[-1][0] - sequence[0][0] + width)/step)
    for i in range(noWins-1): #because 1st window has been generated.
        row = []
        t_start += step; t_end += step
        for event in sequence:
            if t_start <= event[0] and event[0] < t_end:
                row.append(event[1])
        windows.append(row)
    return windows

In [5]:
dataSet = slidingWindow(sequence2, 40, 10)
dataSet

[['D'],
 ['D', 'C'],
 ['D', 'C', 'A'],
 ['D', 'C', 'A', 'B'],
 ['C', 'A', 'B', 'D'],
 ['A', 'B', 'D', 'A'],
 ['B', 'D', 'A', 'B'],
 ['D', 'A', 'B', 'C'],
 ['A', 'B', 'C'],
 ['B', 'C'],
 ['C']]

In [6]:
def createC1(dataSet):
    C1 = []
    for transaction in dataSet:
        for item in transaction:
            if not [item] in C1:
                C1.append([item])
                
    C1.sort()
    return list(C1) #use frozen set so we can use it as a key in a dict    

In [7]:
def isSubsetInOrderWithGap(sub, lst):
    ln, j = len(sub), 0
    for elem in lst:
        if elem == sub[j]:
            j += 1
        if j == ln:
            return True
    return False

In [8]:
def scanWindows(W, Ck, minSupport):
    ssCnt = {}
    for tid in W:
        for can in Ck:
            if isSubsetInOrderWithGap(can, tid):
                if not tuple(can) in ssCnt: ssCnt[tuple(can)]=1
                else: ssCnt[tuple(can)] += 1
    numItems = float(len(W))
    retList = []
    supportData = {}
    for key in ssCnt:
        support = ssCnt[key]/numItems
        if support >= minSupport:
            retList.insert(0,key)
        supportData[key] = support
    return retList, supportData

In [9]:
def checkSubsetFrequency(candidate, Lk, k):
    if k>1:    
        subsets = list(combinations(candidate, k))
    else:
        return True
    for elem in subsets:
        if not elem in Lk: #elem is tuple
            return False
    return True

In [10]:
def aprioriGen(Lk, k): #creates Ck
    resList = [] #result set
    candidatesK = [] 
    lk = sorted(set([item for t in Lk for item in t])) #get and sort elements from frozenset
    candidatesK = list(permutations(lk, k))
    for can in candidatesK:
        if checkSubsetFrequency(can, Lk, k-1):
            resList.append(can)
    return resList

In [14]:
def WinEpi(dataSet, minSupport):
    C1 = createC1(dataSet)
    L1, supportData = scanWindows(dataSet, C1, minSupport)
    L = [L1]
    k = 2
    while (len(L[k-2]) > 0):
        Ck = aprioriGen(L[k-2], k)
        Lk, supK = scanWindows(dataSet, Ck, minSupport) #scan DB to get Lk
        supportData.update(supK)
        L.append(Lk)
        k += 1
    #remove empty last itemset from L
    if L[-1] == []:
        L.pop()
    return L, supportData

In [15]:
L, suppData = WinEpi(dataSet, 0.4)

In [16]:
L

[[('B',), ('A',), ('C',), ('D',)], [('A', 'B'), ('D', 'A')]]

In [17]:
def generateRules(L, supportData, width, minConf):  #supportData is a dict coming from scanD
    for i in range(1, len(L)): #only get the sets with two or more items
        for item in L[i]: #for each item in a level
            for j in range(1, i+1): # i+1 equal to length of an item
                lhsList = list(combinations(item, j))
                for lhs in lhsList:
                    conf = supportData[item]/supportData[lhs]
                    if conf >= minConf:
                        print(list(lhs), " ==> ", list(item), " [", width, "]", " [", supportData[item], ", ", conf,"]", sep="")

In [18]:
generateRules(L, suppData, 50, 0.7)

['A'] ==> ['A', 'B'] [50] [0.5454545454545454, 0.8571428571428571]
['B'] ==> ['A', 'B'] [50] [0.5454545454545454, 0.8571428571428571]
['A'] ==> ['D', 'A'] [50] [0.45454545454545453, 0.7142857142857143]
