# save config.py and import the API_KEY and CR_EMAIL

In [1]:
from config import API_KEY, CR_EMAIL

# Get all unique pages till space 1000. Confluence has a cb limit of 1000 so you must run the next on which starts from space 1001

In [None]:
# spaces 1-1000
import csv
import requests
import sys

def save_page_details_to_csv(page_details, filename):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Space Name', 'Page Name', 'Created By', 'Last Modified By', 'Last Modified Date', 'Views', 'Web URL'])
        for page in page_details:
            writer.writerow([
                page['space_name'],
                page['page_name'],
                page['created_by'],
                page['last_modified_by'],
                page['last_modified_date'],
                page['views'],
                page['web_url']
            ])

def get_total_unique_spaces_and_pages_in_confluence():
    email = CR_EMAIL
    api_token = API_KEY
    auth = (email, api_token)
    base_url = "https://cybereason.atlassian.net/wiki/"
    unique_spaces = set()
    page_details = []

    # Make request to Confluence API to get total number of spaces
    limit_spaces = 10000  # Adjust the limit as needed
    start_spaces = 0

    try:
        # Fetch spaces
        while True:
            spaces_response = requests.get(base_url + f"rest/api/space?start={start_spaces}&limit={limit_spaces}", auth=auth)
            spaces_data = spaces_response.json()
            current_spaces = spaces_data.get('results', [])

            # Loop through each space
            for space in current_spaces:
                space_key = space.get('key')
                space_name = space.get('name')
                unique_spaces.add(space_key)

                # Make request to Confluence API to get total number of pages for current space
                pages_response = requests.get(base_url + f"rest/api/content?spaceKey={space_key}&limit=1000", auth=auth)
                pages_data = pages_response.json().get('results', [])

                # Display space name
                print("Space Name:", space_name)

                # Loop through each page in the current space
                for page in pages_data:
                    page_id = page.get('id')
                    page_name = page.get('title', 'Unknown')
                    
                    # Fetch detailed information about the page
                    page_details_response = requests.get(base_url + f"rest/api/content/{page_id}", auth=auth)
                    page_details_data = page_details_response.json()
                    
                    creator = page_details_data.get('history', {}).get('createdBy', {}).get('displayName', 'Unknown')
                    
                    # Fetching last modified information from version
                    last_modified_info = page_details_data.get('version', {}).get('by', {})
                    last_modifier = last_modified_info.get('displayName', 'Unknown')
                    last_modified_date = page_details_data.get('version', {}).get('friendlyWhen', 'Unknown')
                    
                    # Fetch views for the page
                    views_response = requests.get(base_url + f"rest/api/analytics/content/{page_id}/views", auth=auth)
                    views_data = views_response.json().get('count', 'Unknown')
                    
                    web_url = page_details_data.get('_links', {}).get('webui')
                    
                    # Add page details to the list
                    page_details.append({
                        'space_name': space_name,
                        'page_name': page_name,
                        'created_by': creator,
                        'last_modified_by': last_modifier,
                        'last_modified_date': last_modified_date,
                        'views': views_data,
                        'web_url': base_url + web_url
                    })

                # Display total unique pages for current space
                print("Total Unique Pages in this space:", len(pages_data))
                print()  # Add a blank line for separation

            # Check if there are more spaces to fetch
            start_spaces += limit_spaces
            if 'next' not in spaces_data['_links']:
                break

        total_unique_spaces = len(unique_spaces)
        print("\nTotal unique spaces in Confluence:", total_unique_spaces)
        print("Total unique pages in Confluence:", len(page_details))

        # Save page details to CSV
        save_page_details_to_csv(page_details, 'confluence_pages.csv')

        return {"total_unique_spaces": total_unique_spaces, "total_unique_pages": len(page_details)}

    except KeyboardInterrupt:
        print("\nKeyboard interrupt detected. Saving current progress to CSV...")
        save_page_details_to_csv(page_details, 'confluence_pages_interrupted.csv')
        print("Current progress saved.")
        sys.exit(0)

    except Exception as e:
        print("An error occurred:", str(e))
        print("Saving current progress to CSV...")
        save_page_details_to_csv(page_details, 'confluence_pages_interrupted.csv')
        print("Current progress saved.")
        sys.exit(1)

get_total_unique_spaces_and_pages_in_confluence()

# Get all unique pages from space 1001

In [None]:
# spaces 1001 and above
import csv
import requests
import sys

def save_page_details_to_csv(page_details, filename):
    with open(filename, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        for page in page_details:
            writer.writerow([
                page['space_name'],
                page['page_name'],
                page['created_by'],
                page['last_modified_by'],
                page['last_modified_date'],
                page['views'],
                page['web_url']
            ])

def get_total_unique_spaces_and_pages_in_confluence(start_index=0):
    email = CR_EMAIL
    api_token = API_KEY
    auth = (email, api_token)
    base_url = "https://cybereason.atlassian.net/wiki/"
    unique_spaces = set()
    page_details = []

    # Make request to Confluence API to get total number of spaces
    limit_spaces = 10000  # Adjust the limit as needed
    start_spaces = start_index

    try:
        # Fetch spaces
        while True:
            spaces_response = requests.get(base_url + f"rest/api/space?start={start_spaces}&limit={limit_spaces}", auth=auth)
            spaces_data = spaces_response.json()
            current_spaces = spaces_data.get('results', [])

            # Loop through each space
            for space in current_spaces:
                space_key = space.get('key')
                space_name = space.get('name')
                unique_spaces.add(space_key)

                # Make request to Confluence API to get total number of pages for current space
                pages_response = requests.get(base_url + f"rest/api/content?spaceKey={space_key}&limit=1000", auth=auth)
                pages_data = pages_response.json().get('results', [])

                # Display space name
                print("Space Name:", space_name)

                # Loop through each page in the current space
                for page in pages_data:
                    page_id = page.get('id')
                    page_name = page.get('title', 'Unknown')
                    
                    # Fetch detailed information about the page
                    page_details_response = requests.get(base_url + f"rest/api/content/{page_id}", auth=auth)
                    page_details_data = page_details_response.json()
                    
                    creator = page_details_data.get('history', {}).get('createdBy', {}).get('displayName', 'Unknown')
                    
                    # Fetching last modified information from version
                    last_modified_info = page_details_data.get('version', {}).get('by', {})
                    last_modifier = last_modified_info.get('displayName', 'Unknown')
                    last_modified_date = page_details_data.get('version', {}).get('friendlyWhen', 'Unknown')
                    
                    # Fetch views for the page
                    views_response = requests.get(base_url + f"rest/api/analytics/content/{page_id}/views", auth=auth)
                    views_data = views_response.json().get('count', 'Unknown')
                    
                    web_url = page_details_data.get('_links', {}).get('webui')
                    
                    # Add page details to the list
                    page_details.append({
                        'space_name': space_name,
                        'page_name': page_name,
                        'created_by': creator,
                        'last_modified_by': last_modifier,
                        'last_modified_date': last_modified_date,
                        'views': views_data,
                        'web_url': base_url + web_url
                    })

                # Display total unique pages for current space
                print("Total Unique Pages in this space:", len(pages_data))
                print()  # Add a blank line for separation

            # Check if there are more spaces to fetch
            start_spaces += limit_spaces
            if 'next' not in spaces_data['_links']:
                break

        total_unique_spaces = len(unique_spaces)
        print("\nTotal unique spaces in Confluence:", total_unique_spaces)
        print("Total unique pages in Confluence:", len(page_details))

        # Save page details to CSV
        save_page_details_to_csv(page_details, 'confluence_pages(1000+).csv')

        return {"total_unique_spaces": total_unique_spaces, "total_unique_pages": len(page_details)}

    except KeyboardInterrupt:
        print("\nKeyboardInterrupt detected.")
        print("Saving current progress to CSV...")
        save_page_details_to_csv(page_details, 'confluence_pages_interrupted.csv')
        print("Current progress saved.")
        sys.exit(0)

    except Exception as e:
        print("An error occurred:", str(e))
        print("Saving current progress to CSV...")
        save_page_details_to_csv(page_details, 'confluence_pages_interrupted.csv')
        print("Current progress saved.")
        sys.exit(1)

get_total_unique_spaces_and_pages_in_confluence(start_index=1001)

# Combine both csv files

In [None]:
import csv

def combine_csv_files(file1, file2, output_file):
    with open(file1, 'r', newline='', encoding='utf-8') as f1, open(file2, 'r', newline='', encoding='utf-8') as f2, open(output_file, 'w', newline='', encoding='utf-8') as out_file:
        reader1 = csv.reader(f1)
        reader2 = csv.reader(f2)
        writer = csv.writer(out_file)

        # Write headers from the first file
        headers = next(reader1)
        writer.writerow(headers)

        # Write rows from the first file
        for row in reader1:
            writer.writerow(row)

        # Skip headers in the second file
        next(reader2)

        # Write rows from the second file
        for row in reader2:
            writer.writerow(row)

# Combine CSV files
combine_csv_files('confluence_pages.csv', 'confluence_pages(1000+).csv', 'combined_confluence_pages.csv')