In [1]:
from selenium import webdriver
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd

In [2]:
base_url='https://pve.proxmox.com/pve-docs/api-viewer/index.html'

# 設定 edge 選項
edge_options = webdriver.EdgeOptions()
edge_options.add_argument('--no-sandbox')  # 提高穩定性
edge_options.add_argument('--disable-dev-shm-usage')  # 避免記憶體問題
edge_options.add_argument('--disable-gpu')  # 降低GPU使用

# 啟動 edge 瀏覽器
driver = webdriver.Edge(options=edge_options)

In [3]:
driver.get(base_url)

In [4]:
columns = ['Path','Method','Description','Parameters','Parameter Details']
api_table = pd.DataFrame(columns=columns)

In [5]:
# 等待頁面完全載入
wait = WebDriverWait(driver, 3)
wait.until(EC.presence_of_element_located((By.XPATH, '//div[contains(@class,"x-tool-expand")]')))
driver.find_element(By.XPATH,'//div[@id="tool-1017-toolEl"]').click()

In [6]:
nodes = driver.find_elements(By.XPATH,'//span[@class="x-tree-node-text "]')
last_node = nodes[-1]

54
access
{name}


['access',
 'domains',
 '{realm}',
 'sync',
 'groups',
 '{groupid}',
 'openid',
 'auth-url',
 'login',
 'roles',
 '{roleid}',
 'tfa',
 '{userid}',
 '{id}',
 'users',
 '{userid}',
 'token',
 '{tokenid}',
 'tfa',
 'unlock-tfa',
 'acl',
 'password',
 'permissions',
 'ticket',
 'cluster',
 'acme',
 'account',
 '{name}',
 'plugins',
 '{id}',
 'challenge-schema',
 'directories',
 'meta',
 'tos',
 'backup',
 '{id}',
 'included_volumes',
 'backup-info',
 'not-backed-up',
 'ceph',
 'flags',
 '{flag}',
 'metadata',
 'status',
 'config',
 'nodes',
 '{node}',
 'apiversion',
 'join',
 'qdevice',
 'totem',
 'firewall',
 'aliases',
 '{name}']

In [7]:
def get_parameter():
    parameter={}
    parameter_descriptions = {}
    parameters_rows = driver.find_elements(By.XPATH, '//div[text()="Parameters"]/ancestor::*[5]//tr[@role="row"]')
    for row in parameters_rows:
        try:
            name = row.find_element(By.XPATH, './/td[1]//div').text.strip()
            type_ = row.find_element(By.XPATH, './/td[2]//div').text.strip()
            description = row.find_element(By.XPATH, './/td[last()]//div').text.strip()  # 提取參數描述
            if name and type_:
                parameter[name] = type_
                parameter_descriptions[name] = description
        except Exception as e:
            pass  # 忽略無效行
    return parameter, parameter_descriptions

In [8]:
def get_api_info(nodes:list[WebElement],error_catch:list=None):
    result=pd.DataFrame(columns=columns)
    len_=len(nodes)
    conunt=0
    for node in nodes:
        try:
            driver.execute_script("arguments[0].scrollIntoView(true);", node)
            wait.until(EC.element_to_be_clickable(node))
            node.click()
        except Exception as e:
            print(f'Error occurred for node: {node.text if node.is_enabled() else "Unknown"}')
            print(f'Error details: {str(e)}')
            if error_catch is not None:
                error_catch.append(node)
            continue
        tabs = driver.find_elements(By.XPATH,'//a[@role="tab"]')
        api_info=['']*len(tabs)
        for i,e in enumerate(tabs):
            e.click()
            usages = driver.find_elements(By.XPATH,'//td[contains(text(),"HTTP")]/../td[2]')
            descriptions = driver.find_elements(By.XPATH,'//div[text()="Description"]/ancestor::*[5]//div[@class="x-autocontainer-innerCt"]')
            methods = [usage.text for usage in usages]
            parameters, parameter_descriptions = get_parameter()
            # route[i] = methods[i]
            api_info[i] = {
                'Path':methods[i].split(' ')[1],
                'Method':methods[i].split(' ')[0],
                'Description':descriptions[i].text,
                'Parameters':parameters,
                'Parameter Details':parameter_descriptions
            }
        # api_info = pd.DataFrame(api_info)
        result = pd.concat([result,pd.DataFrame(api_info)],ignore_index=True)
        conunt+=1
        print(f'{conunt}/{len_}',flush=True,end='\r')
    return result

In [50]:
error_list=[]
insert_table = get_api_info(nodes,error_catch=error_list)
insert_table.tail()

20/20

Unnamed: 0,Path,Method,Description,Parameters,Parameter Details
28,/api2/json/storage,POST,Create a new storage.,"{'storage': 'string', 'type': 'enum', 'authsup...","{'storage': 'The storage identifier.', 'type':..."
29,/api2/json/storage/{storage},GET,Read storage configuration.,{'storage': 'string'},{'storage': 'The storage identifier.'}
30,/api2/json/storage/{storage},PUT,Update storage configuration.,"{'storage': 'string', 'blocksize': 'string', '...","{'storage': 'The storage identifier.', 'blocks..."
31,/api2/json/storage/{storage},DELETE,Delete storage configuration.,{'storage': 'string'},{'storage': 'The storage identifier.'}
32,/api2/json/version,GET,"API version details, including some parts of t...",{},{}


In [51]:
[error.text for error in error_list]

[]

In [52]:
api_table = pd.concat([api_table,insert_table],ignore_index=True)
api_table

Unnamed: 0,Path,Method,Description,Parameters,Parameter Details
0,/api2/json/access,GET,Directory index.,{},{}
1,/api2/json/access/domains,GET,Authentication domain index.,{},{}
2,/api2/json/access/domains,POST,Add an authentication server.,"{'realm': 'string', 'type': 'enum', 'acr-value...","{'realm': 'Authentication domain ID', 'type': ..."
3,/api2/json/access/domains/{realm},GET,Get auth server configuration.,{'realm': 'string'},{'realm': 'Authentication domain ID'}
4,/api2/json/access/domains/{realm},PUT,Update authentication server settings.,"{'realm': 'string', 'acr-values': 'string', 'a...","{'realm': 'Authentication domain ID', 'acr-val..."
...,...,...,...,...,...
602,/api2/json/storage,POST,Create a new storage.,"{'storage': 'string', 'type': 'enum', 'authsup...","{'storage': 'The storage identifier.', 'type':..."
603,/api2/json/storage/{storage},GET,Read storage configuration.,{'storage': 'string'},{'storage': 'The storage identifier.'}
604,/api2/json/storage/{storage},PUT,Update storage configuration.,"{'storage': 'string', 'blocksize': 'string', '...","{'storage': 'The storage identifier.', 'blocks..."
605,/api2/json/storage/{storage},DELETE,Delete storage configuration.,{'storage': 'string'},{'storage': 'The storage identifier.'}


In [53]:
nodes = driver.find_elements(By.XPATH,'//span[@class="x-tree-node-text "]')
if last_node in nodes:
    nodes = nodes[nodes.index(last_node)+1:]
if len(nodes)>0:
    last_node = nodes[-1]
[node.text for node in nodes]

True
False
0


IndexError: list index out of range

In [56]:
api_table.to_csv('proxmox_api.csv',index=False)

In [57]:
driver.quit()