In [2]:
import re
from dotenv import load_dotenv
from langchain_community.utilities.jira import JiraAPIWrapper
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

# Load environment variables from .env file
load_dotenv()


True

In [3]:
import os
from langchain_community.document_loaders import ConfluenceLoader

# Initialize ConfluenceLoader
loader = ConfluenceLoader(
    url=os.getenv('CONFLUENCE_CLOUD_URL'),
    username=os.getenv('CONFLUENCE_CLOUD_USER'),
    api_key=os.getenv('CONFLUENCE_CLOUD_TOKEN'),
    cloud=True
)

def get_confluence_page_content(page_id):
    """
    Get the content of a Confluence page.
    
    :param page_id: ID of the Confluence page
    :return: Content of the Confluence page
    """
    page = loader.confluence.get_page_by_id(page_id, expand="body.storage")
    content = page["body"]["storage"]["value"]
    return content

def get_confluence_page_comments(page_id):
    """
    Get all comments of a specific Confluence page.
    
    :param page_id: ID of the Confluence page
    :return: List of comments with timestamps
    """
    comments = loader.confluence.get_page_comments(page_id, expand="body.view.value", depth="all")["results"]
    comment_list = []
    for comment in comments:
        body = comment["body"]["view"]["value"]
        created = comment.get("created", "No timestamp")
        comment_list.append({"body": body, "created": created})
    return comment_list

# Example usage
page_id = '3557066091'

# Get page content
page_content = get_confluence_page_content(page_id)
print("Page Content:", page_content)

# Get page comments
page_comments = get_confluence_page_comments(page_id)
print("Page Comments:", page_comments)


Page Content: <h2><ac:emoticon ac:name="blue-star" ac:emoji-shortname=":calendar_spiral:" ac:emoji-id="1f5d3" ac:emoji-fallback="\uD83D\uDDD3" />&nbsp;Date</h2><p><time datetime="2024-09-26" /></p><h2><ac:emoticon ac:name="blue-star" ac:emoji-shortname=":busts_in_silhouette:" ac:emoji-id="1f465" ac:emoji-fallback="\uD83D\uDC65" />&nbsp;Participants</h2><ul><li><p><ac:link><ri:user ri:account-id="60d83aad0717c60069a63fa1" /></ac:link></p></li><li><p><ac:link><ri:user ri:account-id="603d9146c668f4006af6065b" /></ac:link> </p></li><li><p><ac:link><ri:user ri:account-id="603d914f20122b00686adc47" /></ac:link> </p></li><li><p><ac:link><ri:user ri:account-id="712020:b1bd0746-e1c8-4f3d-9c96-8980264c416d" /></ac:link> </p></li><li><p><ac:link><ri:user ri:account-id="603d914dc58c72007121411f" /></ac:link> </p></li><li><p><ac:link><ri:user ri:account-id="5c7c8f00039b847f6a382ab7" /></ac:link>  </p></li><li><p><ac:link><ri:user ri:account-id="5f59cc1b0cef2d007dd12757" /></ac:link></p></li><li><p>

In [6]:
def get_all_child_pages(page_id, max_depth):
    """
    Recursively get child pages under a specific Confluence page up to a certain depth.
    
    :param page_id: ID of the parent Confluence page
    :param max_depth: Maximum depth to recurse
    :return: List of child pages with their content
    """
    def _get_child_pages(page_id, current_depth):
        if current_depth >= max_depth:
            return []
        child_pages = []
        direct_children = loader.confluence.get_page_child_by_type(
            page_id=page_id,
            type='page',
            start=None,
            limit=None,
            expand='body.storage'
        )
        for child in direct_children:
            child_id = child['id']
            child_title = child['title']
            child_content = child['body']['storage']['value']
            child_pages.append({
                'id': child_id,
                'title': child_title,
                'content': child_content
            })
            # Recursively get child pages of the current child
            child_pages.extend(_get_child_pages(child_id, current_depth + 1))
        return child_pages
    return _get_child_pages(page_id, 0)

# Example usage
page_id = '3006138328'  # Parent page ID
max_depth = 2  # Maximum depth

# Retrieve child pages up to the specified depth
limited_child_pages = get_all_child_pages(page_id, max_depth)

# Display the retrieved pages
for page in limited_child_pages:
    print(f"Page ID: {page['id']}, Title: {page['title']}")

Page ID: 3006563417, Title: Work Rules (ENG)
Page ID: 3006432351, Title: Organization & Job (ENG)
Page ID: 3006629295, Title: Work & Leave (ENG)
Page ID: 3008200706, Title: Leave of Absence/Return to Work/Termination (ENG)
Page ID: 3402072375, Title: Job classification table
Page ID: 3008069650, Title: Salary/Compensation (ENG)
Page ID: 3007676493, Title: Evaluation (ENG)
Page ID: 3219196761, Title: Level (ENG)
Page ID: 3110207795, Title: OKR (Objectives & Key Results) (ENG)
Page ID: 3071312765, Title: [Seoul] Business trip
Page ID: 3008102421, Title: Others (ENG)
Page ID: 3078357142, Title: [Global] Work & Leave
Page ID: 3021411267, Title: [Global] [Leave] Annual Leave
Page ID: 3021411287, Title: [Global] [Leave] Sick Leave
Page ID: 3021411373, Title: [Global] Termination Process
Page ID: 3021411342, Title: [Global] Probationary Period
Page ID: 3021411227, Title: [Global] HR Policy
Page ID: 3125870662, Title: [Global] Business Trip Guide
Page ID: 3021409229, Title: [Lunit USA] Busines