In [19]:
import pickle
import numpy as np
from sequential.seq2pat import Seq2Pat, Attribute
from sequential.pat2feat import Pat2Feat

In [6]:
sequences = pickle.load( open( "sequences.p", "rb" ) )

In [55]:

seq2pat = Seq2Pat(sequences=sequences)

position = Attribute(values=[[i for i,j in enumerate(sequence)] for sequence in sequences])

seq2pat.add_constraint(position.gap() <= 2)
seq2pat.add_constraint(4 <= position.span())

traces = seq2pat.get_patterns(min_frequency=100)
patterns = [trace[:-1] for trace in traces]
counts = [trace[-1] for trace in traces]

In [70]:
# Counts of all patterns in all sequences
np.sum(counts)

18685

In [57]:
pat2feat = Pat2Feat()
encodings = pat2feat.get_features(sequences, patterns, drop_pattern_frequency=False)
onehotmatrix = encodings.values[:, 1:]

In [72]:
# Count of all patterns in all sequences without constraints according to pat2feat
onehotmatrix.sum()

20078

In [58]:
onehotmatrix2 = np.zeros((len(sequences), len(patterns)))
for i, sequence in enumerate(sequences):
    seq = np.array(sequence)
    for j, pattern in enumerate(patterns):
        currentIndex = -1
        patterninside = True
        for pat in pattern:
            indices = np.where(seq == pat)[0]
            if np.all(indices <= currentIndex):
                patterninside = False
                break
            else:
                for index in indices:
                    if index > currentIndex:
                        currentIndex = index
                        break
        if patterninside:
            onehotmatrix2[i, j] = 1

In [73]:
# Count of all patterns in all sequences without constraints according to simple algorithm
onehotmatrix2.sum()

21065.0

In [74]:
# Since my algorithm finds more, check differences
differences = np.where(onehotmatrix != onehotmatrix2)

In [76]:
# there are not patterns that pat2feat finds, that my algortihm does not
onehotmatrix[differences].sum()

0

In [80]:
#check validity of my algorithm
# change i to check entries in this matrix
i = 0
sequenceid = differences[0][i]
patternid = differences[1][i]

In [81]:
sequences[sequenceid]

['Left-Down-2-358.0',
 'Left-Down-2-357.0',
 'Left-Down-1-81.0',
 'Left-Down-2-358.0',
 'CTRL + C-2-48.0',
 'Left-Down-1-81.0',
 'CTRL + V-1-1.0',
 'Left-Down-1-81.0',
 'Left-Down-1-92.0',
 'CTRL + C-1-13.0',
 'Left-Down-1-81.0',
 'CTRL + V-1-1.0',
 'Left-Down-2-358.0',
 'CTRL + C-2-48.0',
 'Left-Down-1-81.0',
 'CTRL + V-1-1.0',
 'Left-Down-1-754.0',
 'Left-Down-2-358.0',
 'CTRL + C-2-48.0',
 'Left-Down-1-81.0']

In [82]:
patterns[patternid]

['CTRL + C-2-48.0',
 'Left-Down-1-81.0',
 'CTRL + V-1-1.0',
 'Left-Down-2-358.0',
 'CTRL + C-2-48.0',
 'CTRL + V-1-1.0']