In [None]:
import os
from dotenv import load_dotenv
import json
import subprocess
from graphviz import Digraph

In [None]:
load_dotenv()

True

In [12]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [17]:
text = """

The process begins when the student logs in to the university's website. He then takes an online exam. After that, the
system grades it. If the student scores below 60%, he takes the exam again. If the student scores 60% or higher , the professor enters the grade

"""

In [18]:
from pydantic import BaseModel
import openai
import json

class Task(BaseModel):
    description: str
    actor_name: str

class TaskResponse(BaseModel):
    tasks: list[Task]

def tasks(text: str) -> TaskResponse:
    prompt = f"""
    You're a GenAI expert in the field of banking, working on creating a BPMN workflow from text data.

    Your task is to:
    1. Read the standard operating procedure (SOP) provided to you.
    2. Identify each task and its associated actor from the SOP.
    3. Exclude any tasks that are conditional, involve decision points, or depend on a specific outcome (like 'if', 'then', 'otherwise', 'unless').

    Extract a structured JSON list of tasks from the following text. Include only straightforward tasks that describe an action without any conditional or branching logic. Ignore any tasks or instructions that involve conditions, branching, or decisions. The output should be a plain JSON array of objects, where each object includes:
    {{
        "description": "<Task Description>",
        "actor_name": "<Actor>"
    }}

    ### Instructions for Output
    - Only include actions that are unconditional and must happen every time, with no exceptions.
    - Exclude any tasks that depend on specific criteria or outcomes to occur.
    - In "description", include only the action performed, without any reference to the actor.

    Here is the SOP for you to analyze:

    ***Begin SOP***
      {text}
    ***End SOP***
    """
    response_text = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=500,
        temperature=0,
    )['choices'][0]['message']['content'].strip()

    parsed_json = json.loads(response_text)
    if isinstance(parsed_json, list) and all("description" in task and "actor_name" in task for task in parsed_json):
        return TaskResponse(tasks=[Task(**task) for task in parsed_json])

    return TaskResponse(tasks=[])

json_output = tasks(text)
print(json.dumps(json_output.dict(),indent=2))

{
  "tasks": [
    {
      "description": "logs in to the university's website",
      "actor_name": "student"
    },
    {
      "description": "takes an online exam",
      "actor_name": "student"
    },
    {
      "description": "grades it",
      "actor_name": "system"
    }
  ]
}


C:\Users\hp\AppData\Local\Temp\ipykernel_17820\1957402152.py:52: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  print(json.dumps(json_output.dict(),indent=2))


In [19]:
class Task(BaseModel):
    description: str
    condition: str

class TaskResponse(BaseModel):
    tasks: list[Task]

def gateway(text: str) -> TaskResponse:
    # Define the prompt for conditional tasks
    prompt = f"""
    You're a GenAI expert in the field of banking, working on creating a BPMN workflow from text data.

    Your task is to:
    1. Read the standard operating procedure (SOP) provided to you.
    2. Identify each task from the SOP.
    3. Include only tasks that are parallel, exclusive, conditional, involve decision points, or depend on a specific outcome (like 'if', 'then', 'otherwise', 'unless', 'if-else').

    Extract a structured JSON list of tasks from the following text. Only include tasks that describe conditional actions, branching logic, or decisions. Ignore any tasks or instructions that happen unconditionally or must occur every time. Format the output as follows:
    [
        {{
            "description": "Takes the exam again",
            "condition": "Score below 60%"
        }}
    ]

    ### Instructions for Output
    - Only include actions that are conditional, require decisions, or are based on meeting specific criteria.
    - Do not include any straightforward tasks that occur unconditionally. Ignore any tasks that do not involve a decision or condition.

    Here is the SOP for you to analyze:

    ***Begin SOP***

    {text}

    ***End SOP***
    """

    # Send the prompt to OpenAI's GPT model
    response_text = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=500,
        temperature=0,
    )['choices'][0]['message']['content'].strip()

    # Parse the JSON response directly
    parsed_json = json.loads(response_text)
    if isinstance(parsed_json, list) and all("description" in task and "condition" in task for task in parsed_json):
        return TaskResponse(tasks=[Task(**task) for task in parsed_json])

    # Return an empty TaskResponse if the parsed data doesn't match the expected structure
    return TaskResponse(tasks=[])


gateway_output = gateway(text)
print(json.dumps(gateway_output.dict(), indent=2))


{
  "tasks": [
    {
      "description": "Takes the exam again",
      "condition": "Score below 60%"
    },
    {
      "description": "Professor enters the grade",
      "condition": "Score 60% or higher"
    }
  ]
}


C:\Users\hp\AppData\Local\Temp\ipykernel_17820\2686492303.py:57: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  print(json.dumps(gateway_output.dict(), indent=2))


In [20]:
# Integrate into BPMN workflow generation function

def generate_bpmn_workflow(json_output, gateway_output):
  gateway_output_dict = gateway_output.dict()
  prompt = f"""
  Given two sets of tasks in json format, 'json_output' containing straightforward tasks and 'gateway_output' containing conditional tasks, generate a structured BPMN workflow.

  Your task is to:
  - Merge both sets of tasks into a single BPMN structure.
  - Execute the straightforward tasks in sequential order.
  - Use an exclusive gateway to handle the conditional tasks from 'gateway_output', branching based on specified conditions.
  - Ensure BPMN workflow includes a start event, sequential straightforward tasks, an exclusive gateway for conditional tasks, and an end event.
  - Provide the output **only** as JSON, without any explanation or additional text, so it can be directly parsed.
  - Exclude "sequenceFlow" and "definitions".

  json_output: {json.dumps(json_output.dict())}
  gateway_output: {json.dumps(gateway_output.dict())}
  """

  response = openai.ChatCompletion.create(
      model="gpt-4",
      messages=[
            {"role": "system", "content": "You are a BPMN expert."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=1000,
        temperature=0
    )

  return response.choices[0].message.content.strip()

# Generate the BPMN structure using the generated gateway output
bpmn_structure = generate_bpmn_workflow(json_output, gateway_output)
print("Generated BPMN JSON Structure:", bpmn_structure)

C:\Users\hp\AppData\Local\Temp\ipykernel_17820\751442845.py:4: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  gateway_output_dict = gateway_output.dict()
C:\Users\hp\AppData\Local\Temp\ipykernel_17820\751442845.py:16: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  json_output: {json.dumps(json_output.dict())}
C:\Users\hp\AppData\Local\Temp\ipykernel_17820\751442845.py:17: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  gateway_output: {json.dumps(gateway_output.dict())}


Generated BPMN JSON Structure: {
  "BPMN": {
    "startEvent": {
      "id": "start",
      "outgoing": "task1"
    },
    "tasks": [
      {
        "id": "task1",
        "description": "logs in to the university's website",
        "actor_name": "student",
        "incoming": "start",
        "outgoing": "task2"
      },
      {
        "id": "task2",
        "description": "takes an online exam",
        "actor_name": "student",
        "incoming": "task1",
        "outgoing": "task3"
      },
      {
        "id": "task3",
        "description": "grades it",
        "actor_name": "system",
        "incoming": "task2",
        "outgoing": "gateway1"
      }
    ],
    "exclusiveGateway": {
      "id": "gateway1",
      "incoming": "task3",
      "outgoing": ["task4", "task5"]
    },
    "conditionalTasks": [
      {
        "id": "task4",
        "description": "Takes the exam again",
        "condition": "Score below 60%",
        "incoming": "gateway1",
        "outgoing": "end"


In [None]:
# Function to generate BPMN Diagram using Graphviz:

def generate_bpmn_diagram(bpmn_workflow, output_filename="BPMN_Workflow_Diagram"):
    dot = Digraph('BPMN_Workflow', format='png')
    dot.attr(rankdir='TB') 

    # Start Event
    start_event = bpmn_workflow["workflow"]["start_event"]
    dot.node(start_event["id"], "Start", shape="ellipse", style="filled", color="lightpink")

    # Sequential Tasks
    previous_task_id = start_event["id"]
    for task in bpmn_workflow["workflow"]["tasks"]:
        task_id = task["id"]
        task_label = f"{task['actor']}: {task['name']}"
        dot.node(task_id, task_label, shape="box", style="filled", color="moccasin")
        dot.edge(previous_task_id, task_id)
        previous_task_id = task_id

    # Exclusive Gateway
    exclusive_gateway = bpmn_workflow["workflow"]["exclusive_gateway"]
    gateway_id = exclusive_gateway["id"]
    dot.node(gateway_id, "X", shape="diamond", style="filled", color="lightblue")
    dot.edge(previous_task_id, gateway_id)

    # Conditional Branches
    for branch in exclusive_gateway["outgoing"]:
        branch_id = branch["id"]
        branch_label = f"{branch['actor']}: {branch['name']}\nIf {branch['condition']}"
        dot.node(branch_id, branch_label, shape="box", style="filled", color="moccasin")
        dot.edge(gateway_id, branch_id, label=branch["condition"])

    # End Event
    end_event = bpmn_workflow["workflow"]["end_event"]
    end_event_id = end_event["id"]
    dot.node(end_event_id, "End", shape="ellipse", style="filled", color="lightpink")

    # Connect each branch to the End Event
    for branch in exclusive_gateway["outgoing"]:
        branch_id = branch["id"]
        dot.edge(branch_id, end_event_id)

    # Save the diagram
    output_path = f"{output_filename}"
    dot.render(output_path)

    # Open the image in VS Code
    image_path = f"{output_filename}.png"
    if os.name == "nt":
        os.startfile(image_path) 
    else:
        subprocess.run(["code", image_path]) 

    print(f"BPMN diagram saved at: {image_path}")


BPMN diagram saved at: BPMN_Workflow_Diagram.png
