Skip to content

Commit

Permalink
first version
Browse files Browse the repository at this point in the history
  • Loading branch information
harrywang committed Jun 4, 2017
1 parent 4c33e56 commit f52eb29
Show file tree
Hide file tree
Showing 18 changed files with 446 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,5 @@ ENV/

# mypy
.mypy_cache/

.DS_Store
32 changes: 30 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,30 @@
# pyprom
Process Mining (ProM) in Python
# What is PyProM?
PyProM is a Python-based, open-source process mining package.

# About Event Logs

The example logs (in /logs folder) are from the ProM site (http://www.promtools.org). Each line is a case with a sequence of activities.

TODO: Logs should be stored in a csv file with columns, such as Case ID, Activity, Start Time, End Time, Agent, Role, and Data. This format is used in Disco (https://fluxicon.com/disco/)

# Setup Instructions
Intall graphviz - we use graphviz to visualize the process in petri net format
```
brew install graphviz
```
Setup virtual environment and activate it:
```
virtualenv venv
source venv/bin/activate
```
Install packages: `pip install -r requirements.txt`

Run the program with different log files to generate the petri net images and related dot files in the output folder
```
python pyprom.py exercise1.txt
```

# References
- PyPM: https://github.com/tdi/pypm: I referred to this project to start PyProM - many thanks to the author.
- ProM: http://www.promtools.org
- Disco: https://fluxicon.com/disco/
Empty file added algo/__init__.py
Empty file.
177 changes: 177 additions & 0 deletions algo/alpha.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#!/usr/bin/python
# Module for Alpha Algorithm
# https://en.wikipedia.org/wiki/Alpha_algorithm
import graphviz as gv


def apply(log, input_file, output_file):

tl = set() # all task list
df = [] # direct following tasks
cs = [] # causalities tasks
ncs = [] # non-causality tasks
par = [] # parallel tasks
xl = []
yl = []
ti = []
to = []

tl, df, cs, ncs, par = build_ordering_relations(log)
xl, yl, ti, to = make_sets(log, tl, df, cs, ncs)

print "all tasks:", tl
print "direct followers:", df
print "causalities:", cs
print "no_causalities:", ncs
print "parallels:", par
print "x list:", xl
print "y list:", yl
print "initial tasks:", ti
print "terminal tasks:", to

build_petrinet(tl, yl, ti, to, output_file)


def build_ordering_relations(log):
tl = set([item for sub in log for item in sub])
df = get_direct_followers(log)
cs = get_causalities(tl, df)
ncs = get_no_causalities(tl, df)
par = get_parallels(tl, df)

return tl, df, cs, ncs, par


def make_sets(log, tl, df, cs, ncs):
xl = make_xl_set(tl, df, cs, ncs)
yl = make_yl_set(xl)
ti = make_ti_set(log)
to = make_to_set(log)

return xl, yl, ti, to


def get_direct_followers(log):
df = []
for trace in log:
for index, event in enumerate(trace):
print index, event
if index != len(trace)-1:
if (event, trace[index+1]) not in df:
df.append((event, trace[index+1]))
return df


def get_causalities(all_tasks, direct_followers):
cs = [] # causalities
for event in all_tasks:
for event2 in all_tasks:
if (event, event2) not in cs:
if (event, event2) in direct_followers and \
(event2, event) not in direct_followers:
cs.append((event, event2))
return cs


def get_no_causalities(all_tasks, direct_followers):
ncs = [] # no causalities
for event in all_tasks:
for event2 in all_tasks:
if (event, event2) not in ncs:
if (event, event2) not in direct_followers and \
(event2, event) not in direct_followers:
ncs.append((event, event2))
return ncs


def get_parallels(all_tasks, direct_followers):
par = [] # parallel tasks
for event in all_tasks:
for event2 in all_tasks:
if (event, event2) not in par:
if (event, event2) in direct_followers and \
(event2, event) in direct_followers:
par.append((event, event2))
return par


def check_set(A, ncs):
for event in A:
for event2 in A:
if (event, event2) not in ncs:
return False
return True


def check_outsets(A, B, cs):
for event in A:
for event2 in B:
if (event, event2) not in cs:
return False
return True


def make_xl_set(all_tasks, direct_followers, causalities, no_causalities):
import itertools
xl = set()
subsets = set()
for i in range(1, len(all_tasks)):
for s in itertools.combinations(all_tasks, i):
subsets.add(s)
for a in subsets:
reta = check_set(a, no_causalities)
for b in subsets:
retb = check_set(b, no_causalities)
if reta and retb and \
check_outsets(a, b, causalities):
xl.add((a, b))
return xl


def make_yl_set(xl):
import copy
yl = copy.deepcopy(xl)
for a in xl:
A = a[0]
B = a[1]
for b in xl:

if set(A).issubset(b[0]) and set(B).issubset(b[1]):
if a != b:
yl.discard(a)
return yl


# Ti is the set of all tasks which occur trace-initially
def make_ti_set(log):
ti = set()
[ti.add(event[0]) for event in log]
return ti


# To is the set of all tasks which occur trace-terminally
def make_to_set(log):
to = set()
[to.add(event[-1]) for event in log]
return to


def build_petrinet(tl, yl, ti, to, output_file):
pn = gv.Digraph(format='png')
pn.attr(rankdir='LR') # left to righ layout - default is top down
pn.node('start')
pn.node('end')

for elem in yl:
for i in elem[0]:
pn.edge(i, str(elem))
pn.node(i, shape='box')
pn.node(str(elem), shape='circle')
for i in elem[1]:
pn.edge(str(elem), i)
pn.node(i, shape='box')
for i in ti:
pn.edge('start', i)
for o in to:
pn.edge(o, 'end')
pn.render(output_file)
3 changes: 3 additions & 0 deletions logs/exercise1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
A B C D
A C B D
A E D
2 changes: 2 additions & 0 deletions logs/exercise2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
1x Case1 A C D
1x Case2 B C E
4 changes: 4 additions & 0 deletions logs/exercise3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
A C E G
A E C G
B D F G
B F D G
6 changes: 6 additions & 0 deletions logs/exercise4.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
a b c d f
a c b d f
a b d c f
a c d b f
a d e f
a e d f
37 changes: 37 additions & 0 deletions output/exercise1
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
digraph {
rankdir=LR
start
end
B -> "(('B', 'E'), ('D',))"
B [shape=box]
"(('B', 'E'), ('D',))" [shape=circle]
E -> "(('B', 'E'), ('D',))"
E [shape=box]
"(('B', 'E'), ('D',))" [shape=circle]
"(('B', 'E'), ('D',))" -> D
D [shape=box]
A -> "(('A',), ('B', 'E'))"
A [shape=box]
"(('A',), ('B', 'E'))" [shape=circle]
"(('A',), ('B', 'E'))" -> B
B [shape=box]
"(('A',), ('B', 'E'))" -> E
E [shape=box]
A -> "(('A',), ('C', 'E'))"
A [shape=box]
"(('A',), ('C', 'E'))" [shape=circle]
"(('A',), ('C', 'E'))" -> C
C [shape=box]
"(('A',), ('C', 'E'))" -> E
E [shape=box]
C -> "(('C', 'E'), ('D',))"
C [shape=box]
"(('C', 'E'), ('D',))" [shape=circle]
E -> "(('C', 'E'), ('D',))"
E [shape=box]
"(('C', 'E'), ('D',))" [shape=circle]
"(('C', 'E'), ('D',))" -> D
D [shape=box]
start -> A
D -> end
}
Binary file added output/exercise1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
40 changes: 40 additions & 0 deletions output/exercise2
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
digraph {
rankdir=LR
start
end
"1x" -> "(('1x',), ('Case1', 'Case2'))"
"1x" [shape=box]
"(('1x',), ('Case1', 'Case2'))" [shape=circle]
"(('1x',), ('Case1', 'Case2'))" -> Case1
Case1 [shape=box]
"(('1x',), ('Case1', 'Case2'))" -> Case2
Case2 [shape=box]
C -> "(('C',), ('E', 'D'))"
C [shape=box]
"(('C',), ('E', 'D'))" [shape=circle]
"(('C',), ('E', 'D'))" -> E
E [shape=box]
"(('C',), ('E', 'D'))" -> D
D [shape=box]
A -> "(('A', 'B'), ('C',))"
A [shape=box]
"(('A', 'B'), ('C',))" [shape=circle]
B -> "(('A', 'B'), ('C',))"
B [shape=box]
"(('A', 'B'), ('C',))" [shape=circle]
"(('A', 'B'), ('C',))" -> C
C [shape=box]
Case2 -> "(('Case2',), ('B',))"
Case2 [shape=box]
"(('Case2',), ('B',))" [shape=circle]
"(('Case2',), ('B',))" -> B
B [shape=box]
Case1 -> "(('Case1',), ('A',))"
Case1 [shape=box]
"(('Case1',), ('A',))" [shape=circle]
"(('Case1',), ('A',))" -> A
A [shape=box]
start -> "1x"
E -> end
D -> end
}
Binary file added output/exercise2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
60 changes: 60 additions & 0 deletions output/exercise3
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
digraph {
rankdir=LR
start
end
A -> "(('A',), ('E',))"
A [shape=box]
"(('A',), ('E',))" [shape=circle]
"(('A',), ('E',))" -> E
E [shape=box]
C -> "(('C', 'D'), ('G',))"
C [shape=box]
"(('C', 'D'), ('G',))" [shape=circle]
D -> "(('C', 'D'), ('G',))"
D [shape=box]
"(('C', 'D'), ('G',))" [shape=circle]
"(('C', 'D'), ('G',))" -> G
G [shape=box]
C -> "(('C', 'F'), ('G',))"
C [shape=box]
"(('C', 'F'), ('G',))" [shape=circle]
F -> "(('C', 'F'), ('G',))"
F [shape=box]
"(('C', 'F'), ('G',))" [shape=circle]
"(('C', 'F'), ('G',))" -> G
G [shape=box]
E -> "(('E', 'F'), ('G',))"
E [shape=box]
"(('E', 'F'), ('G',))" [shape=circle]
F -> "(('E', 'F'), ('G',))"
F [shape=box]
"(('E', 'F'), ('G',))" [shape=circle]
"(('E', 'F'), ('G',))" -> G
G [shape=box]
E -> "(('E', 'D'), ('G',))"
E [shape=box]
"(('E', 'D'), ('G',))" [shape=circle]
D -> "(('E', 'D'), ('G',))"
D [shape=box]
"(('E', 'D'), ('G',))" [shape=circle]
"(('E', 'D'), ('G',))" -> G
G [shape=box]
B -> "(('B',), ('F',))"
B [shape=box]
"(('B',), ('F',))" [shape=circle]
"(('B',), ('F',))" -> F
F [shape=box]
B -> "(('B',), ('D',))"
B [shape=box]
"(('B',), ('D',))" [shape=circle]
"(('B',), ('D',))" -> D
D [shape=box]
A -> "(('A',), ('C',))"
A [shape=box]
"(('A',), ('C',))" [shape=circle]
"(('A',), ('C',))" -> C
C [shape=box]
start -> A
start -> B
G -> end
}
Binary file added output/exercise3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit f52eb29

Please sign in to comment.