read json file

In [None]:
import json
def read_json_file(file_path):
    """
    Reads the JSON file from the provided file path and returns a Python dictionary.
    """
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [None]:
read_json_file('content-generation-sample.json')

{'id': 'p5bfwpcRy6LK33Io',
 'meta': {'instanceId': 'a4bfc93e975ca233ac45ed7c9227d84cf5a2329310525917adaf3312e10d5462',
  'templateCredsSetupCompleted': True},
 'name': 'Automate Content Generator for WordPress with DeepSeek R1',
 'tags': [],
 'nodes': [{'id': 'c4a6995f-7769-4b77-80ca-1e6bccef77c1',
   'name': 'When clicking ‘Test workflow’',
   'type': 'n8n-nodes-base.manualTrigger',
   'position': [-20, 200],
   'parameters': {},
   'typeVersion': 1},
  {'id': 'c76b1458-5130-41e7-b2f2-1cfe22eab536',
   'name': 'Get Ideas',
   'type': 'n8n-nodes-base.googleSheets',
   'position': [200, 200],
   'parameters': {'options': {},
    'sheetName': {'__rl': True, 'mode': 'id', 'value': '=Sheet1'},
    'documentId': {'__rl': True, 'mode': 'id', 'value': 'YOURDOCUMENT'}},
   'credentials': {'googleSheetsOAuth2Api': {'id': 'JYR6a64Qecd6t8Hb',
     'name': 'Google Sheets account'}},
   'typeVersion': 4.5},
  {'id': '8d17a640-3e15-42e9-9481-e3291d395ccd',
   'name': 'Set your prompt',
   'type': 'n

In [None]:
data = read_json_file('content-generation-sample.json')

parse and analyse json structure

breakdown workflow into agents, tasks, dependencies, inputs, outputs

In [None]:
def parse_workflow_data(workflow_data):

  parsed_data = {}

  workflow_id = workflow_data.get('id', '')
  workflow_name = workflow_data.get('name', '')
  meta_data = workflow_data.get('meta', {})

  parsed_data['workflow_id'] = workflow_id
  parsed_data['workflow_name'] = workflow_name
  parsed_data['meta_data'] = meta_data

  nodes = workflow_data.get('nodes', [])
  agents = []
  for node in nodes:
    if node.get("type") == "n8n-nodes-base.stickyNote":
        continue
    agent = {
        'id': node.get('id', ''),
        'name': node.get('name', ''),
        'type': node.get('type',''),
        'parameters': node.get('parameters',{}),
        'credentials': node.get('credentials',{})
    }
    agents.append(agent)
    parsed_data['agents'] = agents

  dependencies = {}
  connections = workflow_data.get('connections', {})
  # connections is embedded in dictionary with node: connected node as key-value pair
  for source_node, connection_data in connections.items():
    # connection_data is the value of the source node, return a 2D list, list of connection list
    main_connections = connection_data.get('main', [])
    dependent_list = []
    for connection_array in main_connections:
      for connected_node in connection_array:
        dependent_list.append(connected_node.get('node', ''))
    dependencies[source_node] = dependent_list
  parsed_data['dependencies'] = dependencies

  sticky_notes = []
  for node in nodes:
    if node.get('type') == "n8n-nodes-base.stickyNote":
      sticky_notes.append(node['parameters'].get('content',''))
  parsed_data['sticky_notes'] = sticky_notes

  return parsed_data

In [None]:
parse_workflow_data(data)

{'workflow_id': 'p5bfwpcRy6LK33Io',
 'workflow_name': 'Automate Content Generator for WordPress with DeepSeek R1',
 'meta_data': {'instanceId': 'a4bfc93e975ca233ac45ed7c9227d84cf5a2329310525917adaf3312e10d5462',
  'templateCredsSetupCompleted': True},
 'agents': [{'id': 'c4a6995f-7769-4b77-80ca-1e6bccef77c1',
   'name': 'When clicking ‘Test workflow’',
   'type': 'n8n-nodes-base.manualTrigger',
   'parameters': {},
   'credentials': {}},
  {'id': 'c76b1458-5130-41e7-b2f2-1cfe22eab536',
   'name': 'Get Ideas',
   'type': 'n8n-nodes-base.googleSheets',
   'parameters': {'options': {},
    'sheetName': {'__rl': True, 'mode': 'id', 'value': '=Sheet1'},
    'documentId': {'__rl': True, 'mode': 'id', 'value': 'YOURDOCUMENT'}},
   'credentials': {'googleSheetsOAuth2Api': {'id': 'JYR6a64Qecd6t8Hb',
     'name': 'Google Sheets account'}}},
  {'id': '8d17a640-3e15-42e9-9481-e3291d395ccd',
   'name': 'Set your prompt',
   'type': 'n8n-nodes-base.set',
   'parameters': {'options': {},
    'assignm

generate questions based on key features
1. The workflow’s purpose (why it exists).
2. The agents’ roles (who does what).
3.  The dependencies between tasks or agents (the sequence or required order).
4. The main inputs and outputs (the data or artifacts processed).
5. Potential optimization points or improvement areas.


In [None]:
OPENAI_API_KEY = 'sk-XXXX'

In [None]:
import openai
import os
import re
from openai import OpenAI

os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
client = OpenAI()

def generate_questions_llm(parsed_data, llm_model):
    """
    Uses an open-source LLM to generate creative questions (high temperature).
    """
    prompt = f"""
    You are an expert workflow analyst. The workflow includes:
        ID: {parsed_data.get("workflow_id", "")}
        Name: {parsed_data.get("workflow_name", "")}
        Meta: {parsed_data.get("meta", {})}
        Agents: {parsed_data.get("agents", [])}
        Dependencies: {parsed_data.get("dependencies", {})}
        Notes: {parsed_data.get("sticky_notes", "")}
    Please generate 5 insightful questions that deeply explore this workflow setup,
    potential challenges, or interesting aspects. Be creative and detailed.
    """

    response = client.chat.completions.create(
        model=llm_model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature = 1.2,
        top_p = 0.95,
        max_tokens = 2048
    )

    response_text = response.choices[0].message.content.strip()
    lines = response_text.split("\n")

    questions = []
    for line in lines:
        line = line.strip()
        if not line:
            continue
        if re.match(r"^\d+\.\s", line):
            questions.append(line)


    return questions

In [None]:
generate_questions_llm(parse_workflow_data(data), 'gpt-3.5-turbo')

['1. How does the automated content generation process ensure that the content created is aligned with SEO best practices and meets the needs of the target audience, considering the instructions provided for structuring the article and generating the title?',
 '2. What safeguards are in place within the workflow to prevent duplication or plagiarism of content, especially when using AI-generated text from DeepSeek and DALL-E to create articles and cover images for WordPress posts?',
 '3. Can you elaborate on how the workflow handles potential discrepancies or errors that may arise during the content generation process, such as inaccuracies in the generated content, failed image uploads, or incorrect data being updated in the Google Sheet or WordPress post?',
 '4. How does the workflow manage the scalability of content creation and publishing, particularly when dealing with a large volume of prompts in the Google Sheet, multiple concurrent article generation requests, and frequent update

In [None]:
questions = generate_questions_llm(parse_workflow_data(data), 'gpt-3.5-turbo')

In [None]:
print(questions)

["1. How does the workflow ensure that the generated content is SEO-friendly and aligns with the input prompt provided in the Google Sheet? Are there any mechanisms in place to measure the content's SEO effectiveness?", '2. What measures are taken within the workflow to maintain a coherent narrative flow in the generated article produced by DeepSeek R1? How does the system ensure that the content is engaging and keeps readers interested throughout?', '3. Considering the automated nature of the workflow, what safeguards or quality control checks are implemented to prevent potential errors or inaccuracies in the generated content, especially when it comes to publishing directly on WordPress without human intervention?', '4. How does the workflow handle potential issues such as formatting discrepancies or content structure variations between the generated article from DeepSeek R1 and the final post created on WordPress? Are there any steps taken to streamline this process and ensure consi

In [None]:
def generate_answers_llm(questions, parsed_data, llm_model):
    """
    Uses an open-source LLM to generate creative questions (high temperature).
    """
    answers = []

    for question in questions:
        prompt = f"""
        You are an expert workflow analyst.
        Context:
        ID: {parsed_data.get("workflow_id", "")}
        Name: {parsed_data.get("workflow_name", "")}
        Meta: {parsed_data.get("meta", {})}
        Agents: {parsed_data.get("agents", [])}
        Dependencies: {parsed_data.get("dependencies", {})}
        Notes: {parsed_data.get("sticky_notes", "")}


        The question is: "{question}"

        Provide a concise, factual, and well-structured answer.
        """

        response = client.chat.completions.create(
            model=llm_model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}],
            temperature = 0.25,
            top_p = 0.95,
            max_tokens = 2048)
        answer = response.choices[0].message.content.strip()
        answers.append(answer)

    return answers

In [None]:
generate_answers_llm(questions, parse_workflow_data(data), 'gpt-3.5-turbo')

["To ensure that the generated content is SEO-friendly and aligns with the input prompt provided in the Google Sheet, the workflow incorporates several key mechanisms:\n\n1. **Input Prompt Alignment**: The workflow starts by setting the prompt based on the input provided in the Google Sheet. This prompt guides the content generation process to ensure that the article addresses the specific topic or idea provided.\n\n2. **Content Generation with DeepSeek**: The content generation process utilizes DeepSeek R1 to create the article based on the input prompt. DeepSeek generates the content following specific instructions, such as introduction length, chapter structure, logical flow, and depth of information. This structured approach helps in creating SEO-friendly content that aligns with the input prompt.\n\n3. **Title Generation with DeepSeek**: A title for the article is generated using DeepSeek, ensuring that it is relevant to the content and contains keywords from the article itself. T

generate deterministic questions

In [None]:
def deterministic_questions(parsed_data):
    """
    Generates a list of five default questions about the workflow.
    """
    workflow_name = parsed_data.get("workflow_name", "")
    agents = parsed_data.get("agents", [])
    dependencies = parsed_data.get("dependencies", {})
    sticky_notes = parsed_data.get("sticky_notes", "")

    # extract basic naming
    qa1 = {
        "question": "What is the exact name of the workflow as recorded in the parsed data?",
        "answer": workflow_name
    }

    # extract purpose
    workflow_purpose = next((note for note in sticky_notes if "## Target" in note), "No description found.")
    qa2 = {
        "question": "Based on the provided descriptions (sticky notes), what is the primary objective or intended function of this workflow?",
        "answer": workflow_purpose
    }

    # extract agent
    agent_list = []
    for agent in agents:
      agent_name = agent.get('name', 'Unknown')
      agent_type = agent.get('type', 'Unknown').split('.')[-1]
      agent_list.append(f"{agent_name} (type: {agent_type})")
    qa3 = {
        "question": "List all agents (nodes) present in the workflow along with their names and types.",
        "answer": agent_list if agent_list else "No agents available."
    }

    # extract Dependencies (Agent Interactions)
    dependency_list = []
    for source, target in dependencies.items():
      target_str = ', '.join(target)
      format_string = f"{source} → {target_str}"
      dependency_list.append(format_string)
    dependency_list = "\n".join(dependency_list)

    qa4 = {
        "question": "Describe the dependencies between the agents, specifying which agents trigger or depend on others.",
        "answer": dependency_list if dependency_list else "No dependencies found."
    }

    #  extract Additional Context from Sticky Notes for crediential / API requirements
    additional_context = "\n".join([note for note in sticky_notes if "credential" in note.lower() or "API" in note])
    qa5 = {
        "question": "Summarize any additional contextual information from sticky notes that explain the workflow's purpose, best practices, or specific considerations.",
        "answer": additional_context if additional_context else "No additional context provided."
    }


    return [qa1, qa2, qa3, qa4, qa5]



In [None]:
deterministic_questions(parse_workflow_data(data))

[{'question': 'What is the exact name of the workflow as recorded in the parsed data?',
  'answer': 'Automate Content Generator for WordPress with DeepSeek R1'},
 {'question': 'Based on the provided descriptions (sticky notes), what is the primary objective or intended function of this workflow?',
  'answer': '## Target\nThis workflow is designed to automatically generate seo-friendly content for wordpress through DeepSeek R1 by giving input ideas on how to structure the article. A cover image is also generated and uploaded with OpenAI DALL-E 3. This flow is designed to be executed automatically (ad "On a schedule" node) and thus have a complete editorial plan.\n\nThis process is useful for blog managers who want to automate content creation and publishing.\n\n## Preliminary step\nCreate a google sheet with the following columns:\n- Date\n- Prompt\n- Title\n- Post ID\n\nFill in only the "Prompt" column with basic ideas that DeepSeek will work on to generate the content.'},
 {'question'

format QA pairs

In [None]:
def build_qa_pairs(json_filepath, llm_model):
  workflow_data = read_json_file(json_filepath)
  parsed_data = parse_workflow_data(workflow_data)

  # generate deterministic QA pairs
  deterministic_qas = deterministic_questions(parsed_data)

  # generate generative questions using LLM
  questions = generate_questions_llm(parsed_data, llm_model)
  answers = generate_answers_llm(questions, parsed_data, llm_model)

  generative_qas = []
  for q, a in zip(questions, answers):
    generative_qas.append({"question": q, "answer": a})

  qa_pairs = deterministic_qas + generative_qas


  return qa_pairs

In [None]:
qa_data = build_qa_pairs('content-generation-sample.json', 'gpt-3.5-turbo')

In [None]:
print(qa_data)

[{'question': 'What is the exact name of the workflow as recorded in the parsed data?', 'answer': 'Automate Content Generator for WordPress with DeepSeek R1'}, {'question': 'Based on the provided descriptions (sticky notes), what is the primary objective or intended function of this workflow?', 'answer': '## Target\nThis workflow is designed to automatically generate seo-friendly content for wordpress through DeepSeek R1 by giving input ideas on how to structure the article. A cover image is also generated and uploaded with OpenAI DALL-E 3. This flow is designed to be executed automatically (ad "On a schedule" node) and thus have a complete editorial plan.\n\nThis process is useful for blog managers who want to automate content creation and publishing.\n\n## Preliminary step\nCreate a google sheet with the following columns:\n- Date\n- Prompt\n- Title\n- Post ID\n\nFill in only the "Prompt" column with basic ideas that DeepSeek will work on to generate the content.'}, {'question': 'Lis

In [None]:
print(len(qa_data))

10
