## Generating constraint and violation data for building services on top of the project

In [1]:
from pathlib import Path
import os
from semconstmining.parsing.label_parser.nlp_helper import NlpHelper
from semconstmining.config import Config
from semconstmining.main import get_resource_handler, get_or_mine_constraints
from semconstmining.declare.enums import Template

MODEL_COLLECTION = "semantic_sap_sam_filtered"
config = Config(Path(os.getcwd()).parents[1].resolve(), MODEL_COLLECTION)
config.CONSTRAINT_TYPES_TO_IGNORE.remove(Template.NOT_CO_EXISTENCE.templ_str)

  from tqdm.autonotebook import tqdm, trange


In [None]:
nlp_helper = NlpHelper(config)
resource_handler = get_resource_handler(config, nlp_helper)
all_constraints = get_or_mine_constraints(config, resource_handler, min_support=1)

[2024-07-10 11:33:31,796] p84679 {keyedvectors.py:2047} INFO - loading projection weights from /Users/adrianrebmann/gensim-data/glove-wiki-gigaword-50/glove-wiki-gigaword-50.gz
[2024-07-10 11:33:46,643] p84679 {utils.py:448} INFO - KeyedVectors lifecycle event {'msg': 'loaded (400000, 50) matrix of type float32 from /Users/adrianrebmann/gensim-data/glove-wiki-gigaword-50/glove-wiki-gigaword-50.gz', 'binary': False, 'encoding': 'utf8', 'datetime': '2024-07-10T11:33:46.643594', 'gensim': '4.3.2', 'python': '3.10.13 (main, Aug 24 2023, 12:59:26) [Clang 15.0.0 (clang-1500.1.0.2.5)]', 'platform': 'macOS-14.5-x86_64-i386-64bit', 'event': 'load_word2vec_format'}
[2024-07-10 11:33:46,745] p84679 {nlp_helper.py:95} INFO - Loaded 0 known embeddings and 238914 known similarities
[2024-07-10 11:33:46,746] p84679 {resource_handler.py:113} INFO - Loading elements from /Users/adrianrebmann/Develop/Best-Practice Conf./content/dev/data/interim/semantic_sap_sam_filtered_bpmn_elements.pkl.
[2024-07-10 11

In [None]:
all_constraints

In [None]:
# Export
all_constraints.to_csv("example_constraints.csv")

### Creation violation data

In [None]:
from semconstmining.log.loghandler import LogHandler
from semconstmining.log.loginfo import LogInfo
from semconstmining.log.logstats import LogStats
from semconstmining.selection.instantiation.filter_config import FilterConfig
from semconstmining.selection.instantiation.recommendation_config import RecommendationConfig
from semconstmining.main import get_parts_of_constraints, compute_relevance_for_log, get_log_and_info, recommend_constraints_for_log, check_constraints, get_violation_to_cases
from semconstmining.selection.instantiation.constraintfilter import ConstraintFilter
from semconstmining.selection.instantiation.constraintrecommender import ConstraintRecommender
from os.path import exists
import pandas as pd


CURRENT_LOG_FILE = "BPI_Challenge_2019-3-w-after.xes"

filter_config = FilterConfig(config)
recommender_config = RecommendationConfig(config, top_k=250)

nlp_helper.pre_compute_embeddings(sentences=get_parts_of_constraints(config, all_constraints))

const_filter = ConstraintFilter(config, filter_config, resource_handler)
filtered_constraints = const_filter.filter_constraints(all_constraints)
event_log, log_info = get_log_and_info(config, nlp_helper, CURRENT_LOG_FILE)

# Log-specific constraint recommendation
if not exists(config.DATA_INTERIM / (CURRENT_LOG_FILE + "-constraints_with_relevance.pkl")):
    filtered_constraints = compute_relevance_for_log(config, filtered_constraints, nlp_helper, CURRENT_LOG_FILE,
                                                     pd_log=event_log, precompute=True)
    filtered_constraints.to_pickle(config.DATA_INTERIM / (CURRENT_LOG_FILE + "-constraints_with_relevance.pkl"))
else:
    filtered_constraints = pd.read_pickle(config.DATA_INTERIM / (CURRENT_LOG_FILE + "-constraints_with_relevance.pkl"))
recommended_constraints = recommend_constraints_for_log(config, recommender_config, filtered_constraints,
                                                        nlp_helper,
                                                        CURRENT_LOG_FILE, pd_log=event_log)
consistent_recommended_constraints = recommended_constraints
violations = check_constraints(config, CURRENT_LOG_FILE, consistent_recommended_constraints, nlp_helper, pd_log=event_log)
violations_to_cases = get_violation_to_cases(config, violations)
violation_df = pd.DataFrame.from_records(
    [{"violation": violation, "num_violations": len(cases), "cases": cases} for violation, cases in
     violations_to_cases.items()])

In [None]:
violation_df

In [None]:
# Export
violation_df.to_pickle("example_violations.pkl")