In [1]:
import json
import os
import sys
import pandas as pd

In [2]:
def extract_api_info_from_json_dir(base_dir, output_excel_path="api_extracted_1.xlsx"):
    """
    Recursively scans JSON files in the given base directory to extract API endpoint information,
    including full URLs, and saves the results to an Excel file.

    Handles both OpenAPI and Postman collection formats.
    """
    results = []

    def extract_postman_items(items, bank_name):
        for item in items:
            name = item.get("name", "")
            # Recurse if sub-items
            if "item" in item:
                extract_postman_items(item["item"], bank_name)
            elif "request" in item:
                req = item["request"]
                method = req.get("method", "")
                url = req.get("url")
                # url can be a string or a dict
                if isinstance(url, dict):
                    url_str = url.get("raw") or url.get("url") or ""
                    if not url_str and "host" in url and "path" in url:
                        url_str = "/".join(url.get("host", [])) + "/" + "/".join(url.get("path", []))
                else:
                    url_str = url or ""
                results.append({
                    "bank": bank_name,
                    "method": method.upper(),
                    "endpoint": url_str,
                    "url": url_str,
                    "description": name,
                    "responses": ""
                })

    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.lower().endswith('.json'):
                json_path = os.path.join(root, file)
                try:
                    with open(json_path, 'r', encoding='utf-8-sig') as f:
                        data = json.load(f)

                    rel_path = os.path.relpath(json_path, base_dir)
                    path_parts = rel_path.split(os.sep)
                    bank_name = path_parts[0] if path_parts else "Unknown"

                    # Detect OpenAPI vs Postman
                    if "paths" in data:
                        # OpenAPI
                        base_url = ""
                        servers = data.get("servers", [])
                        if isinstance(servers, list) and servers:
                            first_server = servers[0]
                            if isinstance(first_server, dict):
                                base_url = first_server.get("url", "")
                                if isinstance(base_url, str):
                                    base_url = base_url.rstrip("/")
                                else:
                                    base_url = ""
                        paths = data.get('paths', {})
                        for endpoint, methods in paths.items():
                            for method, details in methods.items():
                                if method.lower() not in ["get", "post", "put", "delete", "patch", "options", "head"]:
                                    continue
                                summary = details.get("summary", "")
                                description = details.get("description", "")
                                full_url = f"{base_url}{endpoint}" if base_url else endpoint
                                responses = details.get("responses", {})
                                formatted_responses = json.dumps(responses, indent=2) if isinstance(responses, dict) else str(responses)
                                results.append({
                                    "bank": bank_name,
                                    "method": method.upper(),
                                    "endpoint": endpoint,
                                    "url": full_url,
                                    "description": summary or description,
                                    "responses": formatted_responses
                                })
                    elif "item" in data:
                        # Postman Collection
                        extract_postman_items(data["item"], bank_name)
                except json.JSONDecodeError as e:
                    print(f"JSON decode error in {json_path}: {e}")
                except Exception as e:
                    print(f"Error processing {json_path}: {e}")

    # # Print results
    # for item in results:
    #     print(f"{item['bank']} | {item['method']:6} {item['endpoint']:40} {item['url']:50} {item['description']}")

    # Save to Excel
    if results:
        df = pd.DataFrame(results)
        df.to_excel(output_excel_path, index=False)
        # print(f"Results saved to {output_excel_path}")
    else:
        print("No API information found.")

    return results

In [3]:
# def extract_api_info_from_json_dir(base_dir, output_excel_path="api_extracted.xlsx"):
#     """
#     Recursively scans JSON files in the given base directory to extract API endpoint information,
#     including full URLs, and saves the results to an Excel file.

#     For each API, the bank name is extracted from the OpenAPI info.title field if available.

#     Parameters:
#         base_dir (str): The directory to scan.
#         output_excel_path (str): Path to save the resulting Excel file.

#     Returns:
#         list: A list of dictionaries with API information.
#     """
#     results = []

#     for root, dirs, files in os.walk(base_dir):
#         for file in files:
#             if file.lower().endswith('.json'):
#                 json_path = os.path.join(root, file)
#                 try:
#                     with open(json_path, 'r', encoding='utf-8-sig') as f:
#                         data = json.load(f)

#                     # Safely extract base_url
#                     base_url = ""
#                     servers = data.get("servers", [])
#                     if isinstance(servers, list) and servers:
#                         first_server = servers[0]
#                         if isinstance(first_server, dict):
#                             base_url = first_server.get("url", "")
#                             if isinstance(base_url, str):
#                                 base_url = base_url.rstrip("/")
#                             else:
#                                 base_url = ""

#                     # Always use the top-level directory under base_dir as the bank name
#                     rel_path = os.path.relpath(json_path, base_dir)
#                     path_parts = rel_path.split(os.sep)
#                     bank_name = path_parts[0] if path_parts else "Unknown"

#                     paths = data.get('paths', {})
#                     for endpoint, methods in paths.items():
#                         for method, details in methods.items():
#                             if method.lower() not in ["get", "post", "put", "delete", "patch", "options", "head"]:
#                                 continue
#                             summary = details.get("summary", "")
#                             description = details.get("description", "")
#                             full_url = f"{base_url}{endpoint}" if base_url else endpoint

#                             # Get responses as JSON string
#                             responses = details.get("responses", {})
#                             if isinstance(responses, dict):
#                                 formatted_responses = json.dumps(responses, indent=2)
#                             else:
#                                 formatted_responses = str(responses)

#                             results.append({
#                                 "bank": bank_name,
#                                 "method": method.upper(),
#                                 "endpoint": endpoint,
#                                 "url": full_url,
#                                 "description": summary or description,
#                                 "responses": formatted_responses
#                             })
#                 except json.JSONDecodeError as e:
#                     print(f"JSON decode error in {json_path}: {e}")
#                 except Exception as e:
#                     print(f"Error processing {json_path}: {e}")

#     # Print results
#     for item in results:
#         print(f"{item['bank']} | {item['method']:6} {item['endpoint']:40} {item['url']:50} {item['description']}")


#     # Save to Excel
#     if results:
#         df = pd.DataFrame(results)
#         df.to_excel(output_excel_path, index=False)
#         print(f"Results saved to {output_excel_path}")
#     else:
#         print("No API information found.")

#     return results

In [4]:
extract_api_info_from_json_dir("Banks API's json files")


[{'bank': '10x banking',
  'method': 'GET',
  'endpoint': 'https://api.sandbox.10xbanking.com/v2/packages/public',
  'url': 'https://api.sandbox.10xbanking.com/v2/packages/public',
  'description': 'Retrieve packages',
  'responses': ''},
 {'bank': '10x banking',
  'method': 'GET',
  'endpoint': 'https://api.sandbox.10xbanking.com/v2/products//versions//summary',
  'url': 'https://api.sandbox.10xbanking.com/v2/products//versions//summary',
  'description': 'Retrieve transaction account product summary',
  'responses': ''},
 {'bank': '10x banking',
  'method': 'GET',
  'endpoint': 'https://api.sandbox.10xbanking.com/v2/products//versions//summary',
  'url': 'https://api.sandbox.10xbanking.com/v2/products//versions//summary',
  'description': 'Retrieve deposit account product summary',
  'responses': ''},
 {'bank': '10x banking',
  'method': 'GET',
  'endpoint': 'https://api.sandbox.10xbanking.com/v2/products//versions//summary',
  'url': 'https://api.sandbox.10xbanking.com/v2/products//