In [None]:
# needed: Python 3.10-3.11

# requirements:
## pm4py>=2.7.11.13
## duckdb
## openai
## crewai
## crewai[tools]
## langchain
## langchain-openai
## requests

# you can install them with the command: pip install -U pm4py>=2.7.11.13 duckdb openai crewai crewai[tools] langchain langchain-openai requests

# in Colab, you can uncomment the following line:
# !pip install pm4py>=2.7.11.13 duckdb openai crewai crewai[tools] langchain langchain-openai requests

In [None]:
import pm4py
import duckdb
import pickle

In [None]:
# FIRST IMPLEMENTATION PARADIGM - DIRECT PROVISION OF INSIGHTS
# The LLM is provided with a textual description of the event log object, and responds to the inquiry based on that.

# for instance, let's get the textual abstraction of the DFG of an event log object
log = pm4py.read_xes("renting_log_high.xes.gz")
dfg_abstraction = pm4py.llm.abstract_dfg(log)
print(dfg_abstraction)

In [None]:
# create a prompt
prompt = dfg_abstraction + "\n\nWhat are the main points for potential unfairness?"

# sends the prompt to an LLM
#base_url = "https://api.deepinfra.com/v1/openai"
base_url = "https://api.openai.com/v1"

api_key = open("api_key.txt", "r").read().strip()

headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_key}"
}

payload = {
    "model": "gpt-4o-mini",
    "messages": [
        {"role": "user", "content": prompt}
    ]
}

import requests
r = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
content = r.json()["choices"][0]["message"]["content"]

print(content)

In [None]:
# SECOND IMPLEMENTATION PARADIGM - CODE GENERATION

log = pm4py.read_xes("renting_log_high.xes.gz")

prompt = "Could you generate a DuckDB SQL statement to compute the number of cases of the event log stored in the 'log' variable? The case ID column is 'case:concept:name', the activity column is 'concept:name', the timestamp column is 'time:timestamp'."

# sends the prompt to an LLM
#base_url = "https://api.deepinfra.com/v1/openai"
base_url = "https://api.openai.com/v1"

api_key = open("api_key.txt", "r").read().strip()

headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_key}"
}

payload = {
    "model": "gpt-4o-mini",
    "messages": [
        {"role": "user", "content": prompt}
    ]
}

import requests
r = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
content = r.json()["choices"][0]["message"]["content"]

print(content)

In [None]:
# use DuckDB to execute the SQL query on the Pandas dataframe
import duckdb

res = duckdb.sql(content.split("```sql")[1].split("```")[0]).to_df()
print(res)