In [40]:
import requests
import csv
import sys
sys.path.append("..")  # Add the parent directory to the sys.path
from config import CR_EMAIL, API_KEY, BASE_URL

In [18]:
def save_spaces_to_csv(spaces_data):
    with open('Space and Page details/outputs/spaces_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Space Name', 'Space Key', 'Type', 'Status', 'Created By', 'Created Date', 'Total Unique Pages']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for space in spaces_data:
            writer.writerow(space)

In [21]:
def get_total_unique_spaces_and_pages_in_confluence():
    email = CR_EMAIL
    api_token = API_KEY
    auth = (email, api_token)
    base_url = "https://cybereason.atlassian.net/wiki/rest/api/"
    unique_spaces = set()
    total_unique_pages = 0
    spaces_data = []

    try:
        # Make request to Confluence API to get total number of spaces
        limit_spaces = 10000  # Adjust the limit as needed
        start_spaces = 0

        while True:
            spaces_response = requests.get(base_url + f"space?start={start_spaces}&limit={limit_spaces}&expand=history", auth=auth)
            spaces_data_raw = spaces_response.json()
            current_spaces = spaces_data_raw.get('results', [])

            if not current_spaces:
                break

            # Loop through each space
            for space in current_spaces:
                space_key = space['key']
                space_name = space['name']
                space_type = space['type']
                space_status = space['status']

                # Get space history
                history = space.get('history', {})

                # Reset total_unique_pages counter for each space
                total_unique_pages_space = 0

                # Make request to Confluence API to get total number of pages for current space
                pages_response = requests.get(base_url + f"content?spaceKey={space_key}&limit=1", auth=auth)
                total_pages = pages_response.json().get('size', 0)

                # Initialize variables for pagination
                limit_pages = 10000  # Adjust the limit as needed
                start_pages = 0

                while start_pages < total_pages:
                    # Make request to Confluence API to fetch pages for current space with pagination
                    pages_response = requests.get(base_url + f"content?spaceKey={space_key}&limit={limit_pages}&start={start_pages}", auth=auth)
                    pages_data = pages_response.json().get('results', [])

                    # Count unique pages for current space
                    unique_pages = set(page['id'] for page in pages_data)
                    total_unique_pages_space += len(unique_pages)

                    # Increment start_pages for next page of results
                    start_pages += limit_pages

                # Append space data to spaces_data
                spaces_data.append({
                    'Space Name': space_name,
                    'Space Key': space_key,
                    'Type': space_type,
                    'Status': space_status,
                    'Created By': history.get('createdBy', {}).get('displayName', 'Unknown'),
                    'Created Date': history.get('createdDate', 'Unknown'),
                    'Total Unique Pages': total_unique_pages_space
                })

                # Update total_unique_pages with unique pages for current space
                total_unique_pages += total_unique_pages_space

                # Display space details
                print(f"Space: {space_name}, Key: {space_key}, Type: {space_type}, Status: {space_status}, Total Unique Pages: {total_unique_pages_space}")

            # Increment start_spaces for next batch of spaces
            start_spaces += limit_spaces

            # If there are more spaces to fetch, continue from the last space
            if 'next' in spaces_data_raw.get('_links', {}):
                next_url = spaces_data_raw['_links']['next']
                start_spaces = int(next_url.split('start=')[-1])

    except KeyboardInterrupt:
        print("Keyboard interrupt detected. Saving data to CSV...")
        save_spaces_to_csv(spaces_data)
        print("Data saved successfully.")

    total_unique_spaces = len(unique_spaces)
    print("\nTotal unique spaces in Confluence:", total_unique_spaces)
    print("Total unique pages in Confluence:", total_unique_pages)

    # Save data to CSV
    save_spaces_to_csv(spaces_data)

    return {"total_unique_spaces": total_unique_spaces, "total_unique_pages": total_unique_pages}

In [22]:
get_total_unique_spaces_and_pages_in_confluence()

Space: 17.5 CRKK testing, Key: CT175, Type: global, Status: current, Total Unique Pages: 15
Space: Aaron Pieper, Key: ~836652237, Type: personal, Status: current, Total Unique Pages: 5
Space: Abbigail Aitken, Key: ~615abf7407ac3c0068fda4cb, Type: personal, Status: current, Total Unique Pages: 5
Space: Abdelkader Cornelius, Key: ~6156af4ba70710006962b1de, Type: personal, Status: current, Total Unique Pages: 5
Space: Abigail Barbara Maines, Key: ~682047288, Type: personal, Status: current, Total Unique Pages: 5
Space: abigail.kluger, Key: ~617914adbcb57400688a60c5, Type: personal, Status: current, Total Unique Pages: 5
Space: ActiveProbe Chapter, Key: ActiveProbe, Type: global, Status: current, Total Unique Pages: 433
Space: Adam Gates, Key: ~615b2576198b4f006863ba35, Type: personal, Status: current, Total Unique Pages: 5
Space: adam.ginton, Key: ~60a0cc1047ba02006ff0d216, Type: personal, Status: current, Total Unique Pages: 5
Space: Adam Hollman, Key: ~5daa04c6f406020c3b181c06, Type: pe

{'total_unique_spaces': 0, 'total_unique_pages': 24928}