In [1]:
from numpy import *
from itertools import combinations

In [2]:
sequence = sorted([
    (31, 'E'), (32, 'D'), (33, 'F'), (35, 'A'), (37, 'B'), (38, 'C'), (39, 'E'),
    (40, 'F'), (42, 'C'), (44, 'D'), (46, 'B'), (47, 'A'), (48, 'D'), (50, 'C'),
    (53, 'E'), (54, 'F'), (55, 'C'), (57, 'B'), (58, 'E'), (59, 'A'), (60, 'E'),
    (61, 'C'), (62, 'F'), (65, 'A'), (67, 'D'),
], key=lambda x:x[0])

In [3]:
sequence2 = sorted([
    (10, 'D'), (20, 'C'), (30, 'A'), (40, 'B'), (50, 'D'), (60, 'A'), (70, 'B'), (80, 'C'),
], key=lambda x:x[0])

In [4]:
def slidingWindow(sequence, width, step=1):
    windows = [list(sequence[0][1])]
    t_end = sequence[0][0] + step
    t_start = t_end - width
    # number of windows = (Te - Ts + width - step)/step = (te_max - ts_min + width)/step
    noWins = int((sequence[-1][0] - sequence[0][0] + width)/step)
    for i in range(noWins-1): #because 1st window has been generated.
        row = []
        t_start += step; t_end += step
        for event in sequence:
            if t_start <= event[0] and event[0] < t_end:
                row.append(event[1])
        windows.append(row)
    return windows

In [5]:
dataSet = slidingWindow(sequence, 5, 1)
len(dataSet)

41

In [6]:
def createC1(dataSet):
    C1 = []
    for transaction in dataSet:
        for item in transaction:
            if not [item] in C1:
                C1.append([item])
                
    C1.sort()
    return list(C1)   

In [7]:
def scanWindows(W, Ck, minSupport):
    ssCnt = {}
    for tid in W:
        for can in Ck:
            if set(can).issubset(tid):
                if not tuple(can) in ssCnt: ssCnt[tuple(can)]=1
                else: ssCnt[tuple(can)] += 1
    numItems = float(len(W))
    retList = []
    supportData = {}
    for key in ssCnt:
        support = ssCnt[key]/numItems
        if support >= minSupport:
            retList.insert(0,key)
        supportData[key] = support
    return retList, supportData

In [8]:
def checkSubsetFrequency(candidate, Lk, k):
    if k>1:    
        subsets = list(combinations(candidate, k))
    else:
        return True
    for elem in subsets:
        if not elem in Lk:
            return False
    return True

In [9]:
def aprioriGen(Lk, k): #creates Ck
    resList = [] #result set
    candidatesK = [] 
    lk = sorted(set([item for t in Lk for item in t])) #get and sort elements from frozenset
    candidatesK = list(combinations(lk, k))
    for can in candidatesK:
        if checkSubsetFrequency(can, Lk, k-1):
            resList.append(can)
    return resList

In [10]:
def WinEpi(dataSet, minSupport):
    C1 = createC1(dataSet)
    D = list(map(set, dataSet))
    L1, supportData = scanWindows(D, C1, minSupport)
    L = [L1]
    k = 2
    while (len(L[k-2]) > 0):
        Ck = aprioriGen(L[k-2], k)
        Lk, supK = scanWindows(D, Ck, minSupport) #scan DB to get Lk
        supportData.update(supK)
        L.append(Lk)
        k += 1
    #remove empty last itemset from L
    if L[-1] == []:
        L.pop()
    return L, supportData

In [11]:
L, suppData = WinEpi(dataSet, 0.1)

In [12]:
L

[[('C',), ('B',), ('A',), ('F',), ('D',), ('E',)],
 [('B', 'D'),
  ('C', 'D'),
  ('C', 'F'),
  ('C', 'E'),
  ('B', 'E'),
  ('B', 'C'),
  ('A', 'C'),
  ('B', 'F'),
  ('A', 'B'),
  ('A', 'F'),
  ('A', 'E'),
  ('A', 'D'),
  ('E', 'F'),
  ('D', 'F')],
 [('C', 'E', 'F'), ('B', 'C', 'E'), ('A', 'C', 'E'), ('A', 'B', 'C')]]

In [13]:
def generateRules(L, supportData, width, minConf):  #supportData is a dict coming from scanD
    for i in range(1, len(L)): #only get the sets with two or more items
        for item in L[i]: #for each item in a level
            for j in range(1, i+1): # i+1 equal to length of an item
                lhsList = list(combinations(item, j))
                for lhs in lhsList:
                    conf = supportData[item]/supportData[lhs]
                    if conf >= minConf:
                        print(list(lhs), " ==> ", list(item), " [", width, "]", " [", supportData[item], ", ", conf,"]", sep="")

In [14]:
generateRules(L, suppData, 5, 0.1)

['B'] ==> ['B', 'D'] [5] [0.12195121951219512, 0.3333333333333333]
['D'] ==> ['B', 'D'] [5] [0.12195121951219512, 0.2631578947368421]
['C'] ==> ['C', 'D'] [5] [0.14634146341463414, 0.25]
['D'] ==> ['C', 'D'] [5] [0.14634146341463414, 0.3157894736842105]
['C'] ==> ['C', 'F'] [5] [0.34146341463414637, 0.5833333333333334]
['F'] ==> ['C', 'F'] [5] [0.34146341463414637, 0.7000000000000001]
['C'] ==> ['C', 'E'] [5] [0.3902439024390244, 0.6666666666666667]
['E'] ==> ['C', 'E'] [5] [0.3902439024390244, 0.7272727272727273]
['B'] ==> ['B', 'E'] [5] [0.1951219512195122, 0.5333333333333333]
['E'] ==> ['B', 'E'] [5] [0.1951219512195122, 0.36363636363636365]
['B'] ==> ['B', 'C'] [5] [0.24390243902439024, 0.6666666666666666]
['C'] ==> ['B', 'C'] [5] [0.24390243902439024, 0.4166666666666667]
['A'] ==> ['A', 'C'] [5] [0.21951219512195122, 0.45]
['C'] ==> ['A', 'C'] [5] [0.21951219512195122, 0.375]
['B'] ==> ['B', 'F'] [5] [0.12195121951219512, 0.3333333333333333]
['F'] ==> ['B', 'F'] [5] [0.12195121951