# Neo4j Json Import Test

## Basic Data

In [1]:
title = "Training Large Language Models to Reason in a Continuous Latent Space"

In [2]:
import sys
import os

# 获取当前脚本所在目录的父目录 (即 my_project)
parent_dir = os.path.dirname(os.getcwd())

# 将父目录添加到 sys.path
sys.path.append(parent_dir)

In [3]:
from apis.arxiv_tool import ArxivKit
from apis.semanticscholar_tool import SemanticScholarKit

Arxiv Metadata

In [4]:
arxiv = ArxivKit()
arxiv_metadata = arxiv.retrieve_metadata_by_paper(query_term=title, max_cnt=3)

2025-02-19 09:23:26,386 - INFO - Requesting page (first: True, try: 0): https://export.arxiv.org/api/query?search_query=Training+Large+Language+Models+to+Reason+in+a+Continuous+Latent+Space&id_list=&sortBy=relevance&sortOrder=descending&start=0&max_results=100
2025-02-19 09:23:31,574 - INFO - Got first page: 100 of 2660947 total results


SemanticScholar Metadata

In [5]:
ss = SemanticScholarKit()
ss_metadata = ss.search_paper_by_keywords(query=title, limit=3)

2025-02-19 09:23:33,113 - INFO - HTTP Request: GET https://api.semanticscholar.org/graph/v1/paper/search?query=Training+Large+Language+Models+to+Reason+in+a+Continuous+Latent+Space&fields=abstract%2Cauthors%2CcitationCount%2CcitationStyles%2CcorpusId%2CexternalIds%2CfieldsOfStudy%2CinfluentialCitationCount%2CisOpenAccess%2Cjournal%2CopenAccessPdf%2CpaperId%2CpublicationDate%2CpublicationTypes%2CpublicationVenue%2CreferenceCount%2Cs2FieldsOfStudy%2Ctitle%2Curl%2Cvenue%2Cyear&offset=0&limit=3 "HTTP/1.1 200 OK"


In [6]:
# paper_ss_id = ss_metadata[0][0].get('paperId')
paper_ss_id = ss_metadata[0].get('paperId')
print(paper_ss_id)

673fbdd957cada770d10dffca5e45b53da43a3c6


Reference and Citedby data

In [7]:
reference_metadata = ss.get_semanticscholar_references(paper_id=paper_ss_id, limit=100)
len(reference_metadata)

2025-02-19 09:23:34,692 - INFO - HTTP Request: GET https://api.semanticscholar.org/graph/v1/paper/673fbdd957cada770d10dffca5e45b53da43a3c6/references?fields=contexts%2Cintents%2CcontextsWithIntent%2CisInfluential%2Cabstract%2Cauthors%2CcitationCount%2CcitationStyles%2CcorpusId%2CexternalIds%2CfieldsOfStudy%2CinfluentialCitationCount%2CisOpenAccess%2Cjournal%2CopenAccessPdf%2CpaperId%2CpublicationDate%2CpublicationTypes%2CpublicationVenue%2CreferenceCount%2Cs2FieldsOfStudy%2Ctitle%2Curl%2Cvenue%2Cyear&offset=0&limit=100 "HTTP/1.1 200 OK"


49

In [8]:
citedby_metadata = ss.get_semanticscholar_citedby(paper_id=paper_ss_id, limit=100)
len(citedby_metadata)

2025-02-19 09:23:36,722 - INFO - HTTP Request: GET https://api.semanticscholar.org/graph/v1/paper/673fbdd957cada770d10dffca5e45b53da43a3c6/citations?fields=contexts%2Cintents%2CcontextsWithIntent%2CisInfluential%2Cabstract%2Cauthors%2CcitationCount%2CcitationStyles%2CcorpusId%2CexternalIds%2CfieldsOfStudy%2CinfluentialCitationCount%2CisOpenAccess%2Cjournal%2CopenAccessPdf%2CpaperId%2CpublicationDate%2CpublicationTypes%2CpublicationVenue%2CreferenceCount%2Cs2FieldsOfStudy%2Ctitle%2Curl%2Cvenue%2Cyear&offset=0&limit=100 "HTTP/1.1 200 OK"


11

## Meatadata Process

for semantic scholar paper metadata

In [9]:
def move_key_to_first(input_dict, key_to_move):
    """将字典的某个键移动到第一位。"""
    if key_to_move not in input_dict:
        return input_dict  # 如果键不存在，则直接返回原字典

    value = input_dict[key_to_move]
    new_dict = {key_to_move: value} # 创建新字典，首先插入要移动的键
    for k, v in input_dict.items():
        if k != key_to_move:
            new_dict[k] = v
    return new_dict

In [10]:
def filter_and_reorder_dict_comprehension(input_dict, keys_to_keep):
    """使用字典推导式过滤并按指定键顺序调整字典。"""
    return {key: input_dict[key] for key in keys_to_keep if key in input_dict}

In [11]:
import copy 

def delete_keys_del(input_dict, keys_to_delete):
    """使用 del 关键字删除字典中特定的键。"""
    opt_dct = copy.deepcopy(input_dict)
    for key in keys_to_delete:
        if key in opt_dct:  # 检查键是否存在，避免 KeyError
            del opt_dct[key]
    return opt_dct # 为了方便链式调用，返回修改后的字典

In [12]:
def remove_kth_element(original_list, k):
    """删除list中第k个元素 (不改变原list的值，仅返回新list)"""
    if k <= 0 or k > len(original_list):
        return list(original_list)  # 返回原list的副本，不改变原list
    else:
        new_list = list(original_list) # 创建原list的副本
        new_list.pop(k - 1) # 删除索引为 k-1 的元素 (因为list索引是 0-based)
        return new_list

In [27]:
import json

def convert_dict_values_to_json(dict_data):
    """检查字典的值，如果值是字典类型，则将其转换为 JSON 字符串。
    Args:
        dict_data (dict): 输入字典。
    Returns:
        dict: 值被转换后的字典。
    """
    modified_dict = {}
    for key, value in dict_data.items():
        if isinstance(value, dict):
            modified_dict[key] = json.dumps(value, ensure_ascii=False)
        else:
            modified_dict[key] = value
    return modified_dict

For better alignment of data types:
- separate by node and relationship types
- first mapping data types between neo4j, then import data
- import nodes and relationships separately

In [33]:
ss_papermeta_json = []
# for node json
ss_papermeta_paper_json, ss_papermeta_author_json, ss_papermeta_journal_json, ss_papermeta_venue_json = [], [], [], []
# for relationship json
ss_papermeta_author_paper_json, ss_papermeta_paper_journal_json, ss_papermeta_paper_venue_json = [], [], []

for item in ss_metadata:
    paper_id = item.get('paperId')
    if paper_id is not None:
        # process paper node
        arxiv_id = item.get('externalIds',{}).get('ArXiv')
        if arxiv_id is not None:
            arxiv_id = arxiv_id.replace('10.48550/arXiv.', '') 
        item['arxivId'] = arxiv_id

        doi = item.get('externalIds',{}).get('DOI')
        if doi is None:
            if arxiv_id is not None:
                doi = f"10.48550/arXiv.{arxiv_id}"  # assign 10.48550/arXiv. for arxiv id https://info.arxiv.org/help/doi.html
            else:
                doi = paper_id
        item['DOI'] = doi

        authors = item.get('authors', [])[:10] if item.get('authors', []) is not None else []
        journal = item.get('journal', {}) if item.get('journal', {}) is not None else {}
        venue = item.get('publicationVenue', {}) if item.get('publicationVenue', {}) is not None else {}
      
        paper_node = {
            "type": "node",
            "id": doi,
            "labels": ["Paper"],
            "properties": item
            }
        ss_papermeta_paper_json.append(paper_node)
        ss_papermeta_json.append(paper_node)

        for idx, author in enumerate(authors[:10]):
            # process author node
            author_id = author.get('authorId')
            if author_id is not None:
                author_node = {
                    "type": "node",
                    "id": author.get('authorId'),
                    "labels": ["Author"],
                    "properties": author}
                ss_papermeta_author_json.append(author_node)
                ss_papermeta_json.append(author_node)
            
                # process author -> WRITES -> paper
                author_order = idx + 1
                coauthors = remove_kth_element(authors, idx)
                author_paper_relationship = {
                    "type": "relationship",
                    "relationshipType": "WRITES",
                    "startNodeId": author_id,
                    "endNodeId": doi,
                    "properties": {'authorOrder': author_order, 'coauthors': coauthors}
                    }
                ss_papermeta_author_paper_json.append(author_paper_relationship)
                ss_papermeta_json.append(author_paper_relationship)

        journal_name = journal.get('name')
        if journal_name is not None:
            # process journal node
            journal_node = {
                  "type": "node",
                  "id": journal_name,
                  "labels": ["Journal"],
                  "properties": {"name": journal_name}}
            ss_papermeta_journal_json.append(journal_node)
            ss_papermeta_json.append(journal_node)
            
            if 'arxiv' not in journal_name.lower():  # journal可能会有大量热点，预先进行排除
                # process paper -> PRINTS_ON -> journal
                paper_journal_relationship = {
                  "type": "relationship",
                  "relationshipType": "PRINTS_ON",
                  "startNodeId": doi,
                  "endNodeId": journal_name,
                  "properties": journal}
                ss_papermeta_paper_journal_json.append(paper_journal_relationship)
                ss_papermeta_json.append(paper_journal_relationship)

        venue_id = venue.get('id')
        if venue_id is not None:
            # process venue node
            venue_node = {
                  "type": "node",
                  "id": venue_id,
                  "labels": ["Venue"],
                  "properties": venue
                  }
            ss_papermeta_venue_json.append(venue_node)
            ss_papermeta_json.append(venue_node)
            
            # process paper -> RELEASES_IN -> venue
            if 'arxiv' not in venue.get('name').lower():  # venue可能会有大量热点，预先进行排除
                paper_venue_relationship = {
                  "type": "relationship",
                  "relationshipType": "RELEASES_IN",
                  "startNodeId": doi,
                  "endNodeId": venue.get('id'),
                  "properties": {}}
                ss_papermeta_paper_venue_json.append(paper_venue_relationship)
                ss_papermeta_json.append(paper_venue_relationship)

In [42]:
len(ss_papermeta_json)

60

## For Paper Entity

In [30]:
import json
from neo4j import GraphDatabase  # pip install neo4j https://github.com/neo4j/neo4j-python-driver
# import jsonschema  # pip install jsonschema https://github.com/python-jsonschema/jsonschema
# from jsonschema import Draft7Validator

neo4j_uri = "bolt://localhost:7687"  # 替换为你的 Neo4j Bolt URI
neo4j_user = "neo4j"           # 替换为你的 Neo4j 用户名
neo4j_password = "25216590"      # 替换为你的 Neo4j 密码
database = "paper-graph-v0-1"

In [None]:
import json
from neo4j import GraphDatabase

def is_neo4j_compatible(value):
    """检查值是否可被Neo4j存储为属性（基本类型或其数组）"""
    if isinstance(value, (str, int, float, bool, type(None))):
        return True
    elif isinstance(value, list):
        return all(is_neo4j_compatible(item) for item in value)
    else:
        return False

def import_json_to_neo4j(processed_data, uri, username, password, database):
    driver = GraphDatabase.driver(uri, auth=(username, password))

    with driver.session(database=database) as session:
        for item in processed_data:
            if item['type'] == 'node':
                labels = ":".join(item['labels'])
                parameters = {"id": item['id']}
                set_clauses = []

                if item.get('properties') and isinstance(item['properties'], dict):
                    for key, value in item['properties'].items():
                        if is_neo4j_compatible(value):
                            parameters[key] = value
                        else:
                            # 序列化非兼容类型为JSON字符串
                            parameters[key] = json.dumps(value, ensure_ascii=False)
                        set_clauses.append(f"n.{key} = ${key}")

                merge_query = f"MERGE (n:{labels} {{id: $id}})"
                if set_clauses:
                    set_query = "SET " + ", ".join(set_clauses)
                    cypher_query = f"""
                        {merge_query}
                        ON CREATE {set_query}
                        ON MATCH {set_query}
                    """
                else:
                    cypher_query = merge_query
                cypher_query += " RETURN n"
                session.run(cypher_query, parameters)

            elif item['type'] == 'relationship':
                rel_type = item['relationshipType']
                parameters = {"startId": item['startNodeId'], "endId": item['endNodeId']}
                set_clauses = []

                if item.get('properties') and isinstance(item['properties'], dict):
                    for key, value in item['properties'].items():
                        if is_neo4j_compatible(value):
                            parameters[key] = value
                        else:
                            parameters[key] = json.dumps(value, ensure_ascii=False)
                        set_clauses.append(f"r.{key} = ${key}")

                cypher_query = f"""
                    MATCH (a {{id: $startId}}), (b {{id: $endId}})
                    MERGE (a)-[r:{rel_type}]->(b)
                """
                if set_clauses:
                    set_query = "SET " + ", ".join(set_clauses)
                    cypher_query += f"""
                        ON CREATE {set_query}
                        ON MATCH {set_query}
                    """
                cypher_query += " RETURN r"
                session.run(cypher_query, parameters)

    driver.close()

In [None]:
import_json_to_neo4j(processed_data=ss_papermeta_json, uri=neo4j_uri, username=neo4j_user, password=neo4j_password, database=database)

2025-02-19 11:29:50,853 - INFO - Received notification from DBMS server: {severity: INFORMATION} {code: Neo.ClientNotification.Statement.CartesianProduct} {category: PERFORMANCE} {title: This query builds a cartesian product between disconnected patterns.} {description: If a part of a query contains multiple disconnected patterns, this will build a cartesian product between all those parts. This may produce a large amount of data and slow down query processing. While occasionally intended, it may often be possible to reformulate the query that avoids the use of this cross product, perhaps by adding a relationship between the different parts or by using OPTIONAL MATCH (identifier is: (b))} {position: line: 2, column: 21, offset: 21} for query: '\n                    MATCH (a {id: $startId}), (b {id: $endId})\n                    MERGE (a)-[r:WRITES]->(b)\n                \n                        ON CREATE SET r.authorOrder = $authorOrder, r.coauthors = $coauthors\n                     

## Another Approach

First mapping json schema to neo4j data type, then insert

In [None]:
def load_json(raw_data):
    """validate and load json data"""
    if isinstance(raw_data, str):
        try:
            json_data = json.loads(raw_data)
            if isinstance(json_data, dict):
                json_data = [json_data]
            return json_data
        except json.JSONDecodeError:
            print("Error: Invalid JSON string provided.")
            return []
    elif isinstance(raw_data, list):
        return raw_data
    elif isinstance(raw_data, dict):
        return [raw_data] # 统一处理为列表
    else:
        print("Error: Invalid JSON data type. Please provide a JSON string, list or dict.")
        return []


In [None]:
def infer_node_mapping_with_schema(json_data, top_n=None):
    """从JSON数据中推断节点相关的 mapping 字典部分，并使用 JSON Schema 进行数据类型检查，
    **仅基于前 N 个 JSON 元素进行推断。**

    Args:
        json_data: JSON数据 (list of dict).
        num_elements (int, optional):  用于推断 mapping 的 JSON 元素数量上限。默认为 None，表示使用所有元素.

    Returns:
        dict: 推断出的节点 mapping 字典.
    """
    node_mapping = {}
    node_types_discovered = {} # 记录已发现的节点类型, 避免重复处理
    elements_to_process = json_data[:top_n] if top_n is not None else json_data # **限制处理的元素数量**

    for record in elements_to_process: # **遍历限制数量的 JSON 元素**
        if not isinstance(record, dict):
            print("Warning: Skipping non-dictionary record:", record)
            continue

        # 简单地使用 JSON 对象的 key 作为节点类型名
        node_type_name = record.keys().__iter__().__next__()
        if not node_type_name:
            print("Warning: Skipping record without keys:", record)
            continue

        if node_type_name not in node_types_discovered and isinstance(record[node_type_name], str):
            node_types_discovered[node_type_name] = True
            node_def = {
                "node_label": node_type_name.capitalize(),
                "properties": {},
                "relationships": []
            }

            # 使用 JSON Schema 进行属性类型推断和检查
            schema = Draft7Validator.check_schema({}) # 初始化一个空的schema validator
            for key, value in record.items():
                if key != node_type_name:
                    property_type = "string" # 默认类型
                    if isinstance(value, int):
                        property_type = "integer"
                    elif isinstance(value, float):
                        property_type = "number"
                    elif isinstance(value, bool):
                        property_type = "boolean"
                    elif isinstance(value, list):
                        property_type = "array"
                    elif isinstance(value, dict):
                        property_type = "object"
                    else:
                        property_type = "string" # 默认字符串类型

                    node_def["properties"][key] = {"neo4j_property": key, "type": property_type} # 存储属性类型信息

            node_mapping[node_def["node_label"]] = node_def

    return node_mapping

In [None]:
node_mapping = infer_node_mapping_with_schema(ss_metadata, top_n=None)

In [None]:
node_mapping = {
    'Paper': {
        'node_label': 'Paperid',
        'properties': 
            {
                'externalIds': {'neo4j_property': 'externalIds',
                'type': 'object'},
                'corpusId': {'neo4j_property': 'corpusId', 'type': 'integer'},
                'publicationVenue': {'neo4j_property': 'publicationVenue',
                    'type': 'object'},
                'url': {'neo4j_property': 'url', 'type': 'string'},
                'title': {'neo4j_property': 'title', 'type': 'string'},
                'abstract': {'neo4j_property': 'abstract', 'type': 'string'},
                'venue': {'neo4j_property': 'venue', 'type': 'string'},
                'year': {'neo4j_property': 'year', 'type': 'integer'},
                'referenceCount': {'neo4j_property': 'referenceCount', 'type': 'integer'},
                'citationCount': {'neo4j_property': 'citationCount', 'type': 'integer'},
                'influentialCitationCount': {'neo4j_property': 'influentialCitationCount',
                    'type': 'integer'},
                'isOpenAccess': {'neo4j_property': 'isOpenAccess', 'type': 'integer'},
                'openAccessPdf': {'neo4j_property': 'openAccessPdf', 'type': 'string'},
                'fieldsOfStudy': {'neo4j_property': 'fieldsOfStudy', 'type': 'array'},
                's2FieldsOfStudy': {'neo4j_property': 's2FieldsOfStudy', 'type': 'array'},
                'publicationTypes': {'neo4j_property': 'publicationTypes', 'type': 'array'},
                'publicationDate': {'neo4j_property': 'publicationDate', 'type': 'string'},
                'journal': {'neo4j_property': 'journal', 'type': 'object'},
                'citationStyles': {'neo4j_property': 'citationStyles', 'type': 'object'},
                'authors': {'neo4j_property': 'authors', 'type': 'array'}
    },
    'relationships': []}}

In [None]:
class Json2Neo4j:
    def __init__(self, input_data, neo4j_uri, neo4j_user, database):
        """
        Args: 
            json_data (str or list or dict): JSON数据 (可以是JSON字符串, JSON对象列表, 或JSON对象).
        """
        self.json_data = self.load_json(input_data)
        self.neo4j_uri = neo4j_uri
        self.neo4j_user = neo4j_user
        self.database = database
        self.driver = GraphDatabase.driver(uri=neo4j_uri, auth=(neo4j_user, database))
        if self.driver is None:
            print("Failed to create Neo4j driver.")
            return


    def load_json(self, raw_data):
        """validate and load json data"""
        if isinstance(raw_data, str):
            try:
                json_data = json.loads(raw_data)
                if isinstance(json_data, dict):
                    json_data = [json_data]
                return json_data
            except json.JSONDecodeError:
                print("Error: Invalid JSON string provided.")
                return []
        elif isinstance(raw_data, list):
            return raw_data
        elif isinstance(raw_data, dict):
            return [raw_data] # 统一处理为列表
        else:
            print("Error: Invalid JSON data type. Please provide a JSON string, list or dict.")
            return []

    def infer_node_mapping_with_schema(self, top_n=None):
        """从JSON数据中推断节点相关的 mapping 字典部分，并使用 JSON Schema 进行数据类型检查，
        **仅基于前 N 个 JSON 元素进行推断。**

        Args:
            json_data (str or list or dict): JSON数据 (可以是JSON字符串, JSON对象列表, 或JSON对象).
            num_elements (int, optional):  用于推断 mapping 的 JSON 元素数量上限。默认为 None，表示使用所有元素.

        Returns:
            dict: 推断出的节点 mapping 字典.
        """
        node_mapping = {}
        node_types_discovered = {} # 记录已发现的节点类型, 避免重复处理
        elements_to_process = self.json_data[:top_n] if top_n is not None else self.json_data # **限制处理的元素数量**

        for record in elements_to_process: # **遍历限制数量的 JSON 元素**
            if not isinstance(record, dict):
                print("Warning: Skipping non-dictionary record:", record)
                continue

            # 简单地使用 JSON 对象的 key 作为节点类型名
            node_type_name = record.keys().__iter__().__next__()
            if not node_type_name:
                print("Warning: Skipping record without keys:", record)
                continue

            if node_type_name not in node_types_discovered and isinstance(record[node_type_name], str):
                node_types_discovered[node_type_name] = True
                node_def = {
                    "node_label": node_type_name.capitalize(),
                    "properties": {},
                    "relationships": []
                }

                # 使用 JSON Schema 进行属性类型推断和检查
                schema = Draft7Validator.check_schema({}) # 初始化一个空的schema validator
                for key, value in record.items():
                    if key != node_type_name:
                        property_type = "string" # 默认类型
                        if isinstance(value, int):
                            property_type = "integer"
                        elif isinstance(value, float):
                            property_type = "number"
                        elif isinstance(value, bool):
                            property_type = "boolean"
                        elif isinstance(value, list):
                            property_type = "array"
                        elif isinstance(value, dict):
                            property_type = "object"
                        else:
                            property_type = "string" # 默认字符串类型

                        node_def["properties"][key] = {"neo4j_property": key, "type": property_type} # 存储属性类型信息

                node_mapping[node_def["node_label"]] = node_def

        return node_mapping


def infer_relationship_mapping_with_schema(json_data, node_mapping, num_elements=None):
    """
    从JSON数据中推断关联关系的 mapping 字典部分，并添加到已有的节点 mapping 中，
    **仅基于前 N 个 JSON 元素进行推断。**

    Args:
        json_data (str or list or dict): JSON数据 (可以是JSON字符串, JSON对象列表, 或JSON对象).
        node_mapping (dict):  已经推断出的节点 mapping 字典 (由 infer_node_mapping_with_schema 函数生成).
        num_elements (int, optional):  用于推断 mapping 的 JSON 元素数量上限。默认为 None，表示使用所有元素.

    Returns:
        dict:  完整的 mapping 字典，包含节点和关系 mapping.
    """
    if isinstance(json_data, str):
        try:
            data = json.loads(json_data)
        except json.JSONDecodeError:
            print("Error: Invalid JSON string provided.")
            return node_mapping
    elif isinstance(json_data, list):
        data = json_data
    elif isinstance(json_data, dict):
        data = [json_data] # 统一处理为列表
    else:
        print("Error: Invalid JSON data type. Please provide a JSON string, list or dict.")
        return node_mapping

    if not data:
        print("Error: Empty JSON data.")
        return node_mapping

    elements_to_process = data[:num_elements] if num_elements is not None else data # **限制处理的元素数量**

    for record in elements_to_process: # **遍历限制数量的 JSON 元素**
        if not isinstance(record, dict):
            continue

        # 简单地使用 JSON 对象的 key 作为节点类型名
        node_type_name = record.keys().__iter__().__next__()
        if not node_type_name or not isinstance(record[node_type_name], str):
            continue

        node_label = node_type_name.capitalize()

        if node_label in node_mapping and "relationships" in node_mapping[node_label]:
            node_def = node_mapping[node_label]

            # 推断关系 (这里只是一个非常基础的示例，假设 "relationship" 字段表示关系)
            if "relationship" in record and isinstance(record["relationship"], list):
                for relation in record["relationship"]:
                    if isinstance(relation, dict) and "type" in relation and "target_" in list(relation.keys())[1]:
                        relationship_type = relation["type"].upper()
                        target_key = list(relation.keys())[1]
                        target_node_label = target_key.replace("target_", "").capitalize()
                        target_node_property_key = target_key
                        source_property_key = node_type_name
                        target_json_key = target_key

                        relationship_def = {
                            "relationship_type": relationship_type,
                            "target_node_label": target_node_label,
                            "target_node_property_key": target_node_property_key,
                            "source_property_key": source_property_key,
                            "target_json_key": target_json_key,
                            "json_relationship_type": relation["type"]
                        }
                        if relationship_def not in node_def["relationships"]:
                            node_def["relationships"].append(relationship_def)

    return node_mapping


def infer_mapping_from_json_with_schema(json_data, num_elements=None):
    """
    从JSON数据中推断完整的Neo4j mapping字典，包括节点和关系，并使用 JSON Schema 进行数据类型检查，
    **仅基于前 N 个 JSON 元素进行推断。**

    Args:
        json_data (str or list or dict): JSON数据 (可以是JSON字符串, JSON对象列表, 或JSON对象).
        num_elements (int, optional):  用于推断 mapping 的 JSON 元素数量上限。默认为 None，表示使用所有元素.

    Returns:
        dict: 推断出的完整的 mapping 字典.
    """
    node_mapping = infer_node_mapping_with_schema(json_data, num_elements) # 推断节点 mapping，传递 num_elements
    full_mapping = infer_relationship_mapping_with_schema(json_data, node_mapping, num_elements) # 推断关系 mapping 并合并，传递 num_elements
    return full_mapping