From 64bbddd7265f8f374ab4cfd0063fceb8fba8f69e Mon Sep 17 00:00:00 2001 From: AmishaBisht Date: Tue, 5 May 2026 17:31:43 +0530 Subject: [PATCH 1/5] chore: add the dify workflow --- .github/scripts/sync_to_dify.py | 96 ++++++++++++++++++++++++++++++ .github/workflows/sync_to_dify.yml | 32 ++++++++++ 2 files changed, 128 insertions(+) create mode 100644 .github/scripts/sync_to_dify.py create mode 100644 .github/workflows/sync_to_dify.yml diff --git a/.github/scripts/sync_to_dify.py b/.github/scripts/sync_to_dify.py new file mode 100644 index 000000000..c3cc3ae04 --- /dev/null +++ b/.github/scripts/sync_to_dify.py @@ -0,0 +1,96 @@ +import os +import subprocess +import requests + +API_KEY = os.environ["DIFY_API_KEY"] +DATASET_ID = os.environ["DIFY_DATASET_ID"] +BASE_URL = os.environ.get("DIFY_BASE_URL", "https://api.dify.ai/v1").rstrip("/") + +HEADERS = { + "Authorization": f"Bearer {API_KEY}", +} + + +def get_changed_md_files(): + result = subprocess.run( + ["git", "diff", "--name-only", "HEAD~1", "HEAD"], + capture_output=True, text=True + ) + files = result.stdout.strip().splitlines() + return [f for f in files if f.endswith(".md") and os.path.exists(f)] + + +def get_existing_documents(): + docs = {} + page = 1 + while True: + resp = requests.get( + f"{BASE_URL}/datasets/{DATASET_ID}/documents", + headers=HEADERS, + params={"page": page, "limit": 100} + ) + resp.raise_for_status() + data = resp.json() + for doc in data.get("data", []): + docs[doc["name"]] = doc["id"] + if not data.get("has_more"): + break + page += 1 + return docs + + +def create_document(file_path, content): + filename = os.path.basename(file_path) + resp = requests.post( + f"{BASE_URL}/datasets/{DATASET_ID}/document/create-by-file", + headers=HEADERS, + files={"file": (filename, content.encode("utf-8"), "text/markdown")}, + data={ + "data": '{"indexing_technique":"high_quality","process_rule":{"mode":"automatic"}}' + } + ) + resp.raise_for_status() + print(f" āœ… Created: {filename}") + + +def update_document(doc_id, file_path, content): + filename = os.path.basename(file_path) + resp = requests.post( + f"{BASE_URL}/datasets/{DATASET_ID}/documents/{doc_id}/update-by-file", + headers=HEADERS, + files={"file": (filename, content.encode("utf-8"), "text/markdown")}, + data={ + "data": '{"indexing_technique":"high_quality","process_rule":{"mode":"automatic"}}' + } + ) + resp.raise_for_status() + print(f" šŸ”„ Updated: {filename}") + + +def main(): + changed_files = get_changed_md_files() + + if not changed_files: + print("No markdown files changed. Nothing to sync.") + return + + print(f"Found {len(changed_files)} changed file(s). Fetching existing Dify docs...") + existing_docs = get_existing_documents() + + for file_path in changed_files: + filename = os.path.basename(file_path) + print(f"\nProcessing: {file_path}") + + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + if filename in existing_docs: + update_document(existing_docs[filename], file_path, content) + else: + create_document(file_path, content) + + print("\nāœ… Sync complete!") + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/sync_to_dify.yml b/.github/workflows/sync_to_dify.yml new file mode 100644 index 000000000..1159c4d99 --- /dev/null +++ b/.github/workflows/sync_to_dify.yml @@ -0,0 +1,32 @@ +name: Sync Docs to Dify Knowledge Base + +on: + push: + branches: + - main + paths: + - "docs/**" + +jobs: + sync: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + fetch-depth: 2 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: pip install requests + + - name: Sync changed docs to Dify + env: + DIFY_API_KEY: ${{ secrets.DIFY_API_KEY }} + DIFY_DATASET_ID: ${{ secrets.DIFY_DATASET_ID }} + DIFY_BASE_URL: ${{ secrets.DIFY_BASE_URL }} + run: python .github/scripts/sync_to_dify.py From c4132cc7d3bfdde3dcaafded6d3d8eeea973f11e Mon Sep 17 00:00:00 2001 From: AmishaBisht Date: Tue, 5 May 2026 18:09:16 +0530 Subject: [PATCH 2/5] test: test the work flow --- .github/scripts/sync_to_dify.py | 48 ++++++++++++++++++++---------- .github/workflows/sync_to_dify.yml | 1 + 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/.github/scripts/sync_to_dify.py b/.github/scripts/sync_to_dify.py index c3cc3ae04..c588ef16e 100644 --- a/.github/scripts/sync_to_dify.py +++ b/.github/scripts/sync_to_dify.py @@ -17,7 +17,16 @@ def get_changed_md_files(): capture_output=True, text=True ) files = result.stdout.strip().splitlines() - return [f for f in files if f.endswith(".md") and os.path.exists(f)] + return [ + f for f in files + if f.endswith(".md") and f.startswith("docs/") and os.path.exists(f) + ] + + +def to_doc_name(file_path): + # Use the full repo-relative path so files with the same basename + # in different folders don't overwrite each other in Dify. + return file_path.replace("/", "__") def get_existing_documents(): @@ -40,31 +49,31 @@ def get_existing_documents(): def create_document(file_path, content): - filename = os.path.basename(file_path) + doc_name = to_doc_name(file_path) resp = requests.post( f"{BASE_URL}/datasets/{DATASET_ID}/document/create-by-file", headers=HEADERS, - files={"file": (filename, content.encode("utf-8"), "text/markdown")}, + files={"file": (doc_name, content.encode("utf-8"), "text/markdown")}, data={ "data": '{"indexing_technique":"high_quality","process_rule":{"mode":"automatic"}}' } ) resp.raise_for_status() - print(f" āœ… Created: {filename}") + print(f" āœ… Created: {doc_name}") def update_document(doc_id, file_path, content): - filename = os.path.basename(file_path) + doc_name = to_doc_name(file_path) resp = requests.post( f"{BASE_URL}/datasets/{DATASET_ID}/documents/{doc_id}/update-by-file", headers=HEADERS, - files={"file": (filename, content.encode("utf-8"), "text/markdown")}, + files={"file": (doc_name, content.encode("utf-8"), "text/markdown")}, data={ "data": '{"indexing_technique":"high_quality","process_rule":{"mode":"automatic"}}' } ) resp.raise_for_status() - print(f" šŸ”„ Updated: {filename}") + print(f" šŸ”„ Updated: {doc_name}") def main(): @@ -77,17 +86,26 @@ def main(): print(f"Found {len(changed_files)} changed file(s). Fetching existing Dify docs...") existing_docs = get_existing_documents() + failures = [] for file_path in changed_files: - filename = os.path.basename(file_path) + doc_name = to_doc_name(file_path) print(f"\nProcessing: {file_path}") - with open(file_path, "r", encoding="utf-8") as f: - content = f.read() - - if filename in existing_docs: - update_document(existing_docs[filename], file_path, content) - else: - create_document(file_path, content) + try: + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + if doc_name in existing_docs: + update_document(existing_docs[doc_name], file_path, content) + else: + create_document(file_path, content) + except Exception as e: + print(f" āŒ Failed: {file_path} — {e}") + failures.append(file_path) + + if failures: + print(f"\nāš ļø Sync finished with {len(failures)} failure(s): {failures}") + raise SystemExit(1) print("\nāœ… Sync complete!") diff --git a/.github/workflows/sync_to_dify.yml b/.github/workflows/sync_to_dify.yml index 1159c4d99..a456b717e 100644 --- a/.github/workflows/sync_to_dify.yml +++ b/.github/workflows/sync_to_dify.yml @@ -4,6 +4,7 @@ on: push: branches: - main + - dify-workflow paths: - "docs/**" From 972b71e78456ced96e709f9ac9434788cc1d4fe0 Mon Sep 17 00:00:00 2001 From: AmishaBisht Date: Tue, 5 May 2026 22:12:51 +0530 Subject: [PATCH 3/5] test: trigger dify sync --- .github/scripts/sync_to_dify.py | 18 ++++++++++++++++-- .github/workflows/sync_to_dify.yml | 1 + docs/01. Glific Overview.md | 1 + 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/.github/scripts/sync_to_dify.py b/.github/scripts/sync_to_dify.py index c588ef16e..60fe534fd 100644 --- a/.github/scripts/sync_to_dify.py +++ b/.github/scripts/sync_to_dify.py @@ -23,6 +23,15 @@ def get_changed_md_files(): ] +def get_all_md_files(): + files = [] + for root, _, names in os.walk("docs"): + for n in names: + if n.endswith(".md"): + files.append(os.path.join(root, n)) + return sorted(files) + + def to_doc_name(file_path): # Use the full repo-relative path so files with the same basename # in different folders don't overwrite each other in Dify. @@ -77,13 +86,18 @@ def update_document(doc_id, file_path, content): def main(): - changed_files = get_changed_md_files() + event = os.environ.get("GITHUB_EVENT_NAME", "") + if event == "workflow_dispatch": + print("Manual run: syncing ALL markdown files under docs/") + changed_files = get_all_md_files() + else: + changed_files = get_changed_md_files() if not changed_files: print("No markdown files changed. Nothing to sync.") return - print(f"Found {len(changed_files)} changed file(s). Fetching existing Dify docs...") + print(f"Found {len(changed_files)} file(s) to sync. Fetching existing Dify docs...") existing_docs = get_existing_documents() failures = [] diff --git a/.github/workflows/sync_to_dify.yml b/.github/workflows/sync_to_dify.yml index a456b717e..8e2c6b936 100644 --- a/.github/workflows/sync_to_dify.yml +++ b/.github/workflows/sync_to_dify.yml @@ -7,6 +7,7 @@ on: - dify-workflow paths: - "docs/**" + workflow_dispatch: jobs: sync: diff --git a/docs/01. Glific Overview.md b/docs/01. Glific Overview.md index b9f37b429..7a26f3767 100644 --- a/docs/01. Glific Overview.md +++ b/docs/01. Glific Overview.md @@ -189,3 +189,4 @@ If your organization is undergoing a strategizing phase, pausing operations, or A subscription fee of INR 1500 per month + taxes will apply for the account maintenance. --- + From 828850e448761cfcfbe8b7fd1274075f9cc5a1a7 Mon Sep 17 00:00:00 2001 From: AmishaBisht Date: Tue, 5 May 2026 22:44:06 +0530 Subject: [PATCH 4/5] chore: remove test-only changes before merge - drop dify-workflow from workflow branches filter - revert trailing newline in docs/01. Glific Overview.md Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/sync_to_dify.yml | 1 - docs/01. Glific Overview.md | 1 - 2 files changed, 2 deletions(-) diff --git a/.github/workflows/sync_to_dify.yml b/.github/workflows/sync_to_dify.yml index 8e2c6b936..556a64969 100644 --- a/.github/workflows/sync_to_dify.yml +++ b/.github/workflows/sync_to_dify.yml @@ -4,7 +4,6 @@ on: push: branches: - main - - dify-workflow paths: - "docs/**" workflow_dispatch: diff --git a/docs/01. Glific Overview.md b/docs/01. Glific Overview.md index 7a26f3767..b9f37b429 100644 --- a/docs/01. Glific Overview.md +++ b/docs/01. Glific Overview.md @@ -189,4 +189,3 @@ If your organization is undergoing a strategizing phase, pausing operations, or A subscription fee of INR 1500 per month + taxes will apply for the account maintenance. --- - From e57e5cbbc84bfacd666496d9f4f3d9200df628ec Mon Sep 17 00:00:00 2001 From: AmishaBisht Date: Tue, 5 May 2026 22:50:13 +0530 Subject: [PATCH 5/5] chore: scope workflow permissions to contents: read --- .github/workflows/sync_to_dify.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/sync_to_dify.yml b/.github/workflows/sync_to_dify.yml index 556a64969..e00fcae52 100644 --- a/.github/workflows/sync_to_dify.yml +++ b/.github/workflows/sync_to_dify.yml @@ -1,5 +1,8 @@ name: Sync Docs to Dify Knowledge Base +permissions: + contents: read + on: push: branches: