In [39]:
from py2neo import Node, Relationship, Graph
import json

### 1. Connect to neo4j

In [40]:
graph = Graph('neo4j://localhost:7687', user="neo4j", password="Beijingdaxue123")
graph.run("MATCH (n) DETACH DELETE n")
graph

Graph('neo4j://localhost:7687')

### 2. Import Data

In [41]:

# File names
data_files = ['./data/pred_test.json', './data/pred_train.json', './data/pred_val.json']


### 3. Add data to graph

In [42]:
nodes = {}
for file_name in data_files:
    with open(file_name, 'r') as file:
        data = json.load(file)

    for dataset in data:
        for entry in dataset:
            if 'Entity' in entry:
                entity = entry['Entity']
                attributes = entry['Attributes']
                node_id = entry['Id']

                # Convert string representation of dictionary to actual dictionary
                # Merge node with unique constraint on 'name' attribute
                node = Node(entity, **attributes)
                graph.merge(node, entity, 'name')
                nodes[node_id] = node

                # Debugging: Print the created or merged node
                print(f"Created or merged node: {node}")

        for entry in dataset:
            if 'Relationship' in entry:
                rel_type = entry['Relationship']
                start_node_id = entry['StartNode']
                end_node_id = entry['EndNode']

                start_node = nodes[start_node_id]
                end_node = nodes[end_node_id]

                # Create relationship
                relationship = Relationship(start_node, rel_type, end_node)
                graph.create(relationship)

                # Debugging: Print the created relationship
                print(f"Created relationship: {relationship}")

    print("Nodes and relationships created successfully.")

Created or merged node: (_2623:media {name: 'news programs'})
Created or merged node: (_2624:content {name: 'violence'})
Created or merged node: (_0:medium {name: 'game'})
Created or merged node: (_1:group {name: 'players'})
Created relationship: (violence)-[:isRecivedby {}]->(players)
Created relationship: (game)-[:isDisplayedvia {}]->(news programs)
Created relationship: (game)-[:isDistributedto {}]->(players)
Created relationship: (players)-[:isInformedby {}]->(news programs)
Created or merged node: (_2:medium {name: 'article'})
Created or merged node: (_3:group {name: 'authors'})
Created or merged node: (_4:medium {name: 'technology'})
Created or merged node: (_5:group {name: 'small pharmacy chains'})
Created relationship: (article)-[:isDistributedto {}]->(authors)
Created relationship: (article)-[:isDistributedto {}]->(small pharmacy chains)
Created relationship: (technology)-[:isDistributedto {}]->(authors)
Created relationship: (technology)-[:isDistributedto {}]->(small pharmacy

### 4. General Descriptive Stats of the graph

In [43]:
query = """
MATCH (n)
OPTIONAL MATCH (n)-[r1]->()
OPTIONAL MATCH ()-[r2]->(n)
RETURN n, COUNT(r1) AS OutDegree, COUNT(r2) AS InDegree
"""

# Run the query
result = graph.run(query).data()

# Calculate cumulative degrees and sort the nodes
nodes_with_degrees = [
    {
        "node": record["n"],
        "cumulative_degree": record["OutDegree"] + record["InDegree"]
    }
    for record in result
]

# Sort nodes by cumulative degree in descending order
sorted_nodes = sorted(nodes_with_degrees, key=lambda x: x["cumulative_degree"], reverse=True)

# Print the sorted nodes with their cumulative degrees
for item in sorted_nodes:
    node = item["node"]
    cumulative_degree = item["cumulative_degree"]
    print(f"Node: {node}, Cumulative Degree: {cumulative_degree}")

print("Nodes sorted by cumulative degrees successfully.")

Node: (_124:group {name: 'people'}), Cumulative Degree: 880
Node: (_535:group {name: 'friends'}), Cumulative Degree: 150
Node: (_246:group {name: 'author'}), Cumulative Degree: 72
Node: (_28:medium {name: 'book'}), Cumulative Degree: 60
Node: (_3:group {name: 'authors'}), Cumulative Degree: 54
Node: (_121:group {name: 'women'}), Cumulative Degree: 52
Node: (_303:group {name: 'user'}), Cumulative Degree: 52
Node: (_683:group {name: 'team'}), Cumulative Degree: 52
Node: (_184:group {name: 'writer'}), Cumulative Degree: 48
Node: (_23:group {name: 'students'}), Cumulative Degree: 42
Node: (_415:group {name: 'band'}), Cumulative Degree: 42
Node: (_71:group {name: 'religion'}), Cumulative Degree: 36
Node: (_280:group {name: 'student'}), Cumulative Degree: 36
Node: (_174:group {name: 'family'}), Cumulative Degree: 32
Node: (_295:group {name: 'children'}), Cumulative Degree: 32
Node: (_70:group {name: 'science'}), Cumulative Degree: 30
Node: (_216:group {name: 'researchers'}), Cumulative Degre

In [44]:
total_nodes_query = "MATCH (n) RETURN COUNT(n) AS TotalNodes"
total_nodes = graph.run(total_nodes_query)
total_nodes
# for record in total_nodes:
#     print(record)

TotalNodes
2623


In [45]:
query = """
MATCH (n)
WITH labels(n) AS lbl, COUNT(n) AS cnt
RETURN lbl AS Label, cnt AS Count
ORDER BY Count DESC
"""

# Run the query
result = graph.run(query).data()

# Print the results
print("Total Entities by Entity Type:")
for record in result:
    labels = record['Label']
    count = record['Count']
    labels_str = ', '.join(labels)
    print(f"Label(s): {labels_str}, Count: {count}")

Total Entities by Entity Type:
Label(s): group, Count: 1215
Label(s): medium, Count: 611
Label(s): content, Count: 586
Label(s): media, Count: 211


In [46]:
# Total number of nodes
total_nodes_query = "MATCH (n) RETURN COUNT(n) AS TotalNodes"
total_nodes = graph.run(total_nodes_query).evaluate()

# Total number of relationships
total_relationships_query = "MATCH ()-[r]->() RETURN COUNT(r) AS TotalRelationships"
total_relationships = graph.run(total_relationships_query).evaluate()

print(f"Total Nodes: {total_nodes}")
print(f"Total Relationships: {total_relationships}")

Total Nodes: 2623
Total Relationships: 2436


In [47]:
average_in_degree_query = """
MATCH (n)
OPTIONAL MATCH ()-[r]->(n)
WITH n, COUNT(r) AS InDegree
RETURN AVG(InDegree) AS AverageInDegree
"""
average_in_degree = graph.run(average_in_degree_query).evaluate()

average_out_degree_query = """
MATCH (n)
OPTIONAL MATCH (n)-[r]->()
WITH n, COUNT(r) AS OutDegree
RETURN AVG(OutDegree) AS AverageOutDegree
"""
average_out_degree = graph.run(average_out_degree_query).evaluate()
print(average_in_degree)
print(average_out_degree)

0.9287075867327474
0.928707586732749


### 5. The Question: #bu tong mei jie dui butong qunti de xi yin li

In [58]:
#the most distributed medium
query = """
MATCH (m:medium)-[r:isDistributedto]-()  // Adjust 'USES' to the actual relationship type
RETURN m.name AS Medium, COUNT(r) AS RelationshipCount
ORDER BY RelationshipCount DESC;
"""

# Run the query
results = graph.run(query)

results

Medium,RelationshipCount
book,60
article,15
film,12


#### Example 1: what groups are books distributed to?

#### Example 2: what groups are articles distributed to?

In [53]:
query = """
MATCH (m:medium {name: "book"})-[:isDistributedto]->(g:group)
RETURN m, g.name
"""

# Run the query
results = graph.run(query)

# Print the results
for result in results:
    print(f"Medium: {result['m']['name']}, Group: {result['g.name']}")

Medium: book, Group: professional scribe
Medium: book, Group: young readers
Medium: book, Group: bisexuality
Medium: book, Group: artists ' book simply means a book made by an artist
Medium: book, Group: women
Medium: book, Group: men
Medium: book, Group: household
Medium: book, Group: nuclear family
Medium: book, Group: popular minister
Medium: book, Group: programmers
Medium: book, Group: muhammad
Medium: book, Group: us
Medium: book, Group: yemen
Medium: book, Group: human rights
Medium: book, Group: politics
Medium: book, Group: teaching
Medium: book, Group: learning
Medium: book, Group: students
Medium: book, Group: faculty
Medium: book, Group: religion
Medium: book, Group: dutch emigration
Medium: book, Group: student
Medium: book, Group: rural workers
Medium: book, Group: allies
Medium: book, Group: primary school teacher
Medium: book, Group: class
Medium: book, Group: reader
Medium: book, Group: bacteria
Medium: book, Group: fungi
Medium: book, Group: parasites
Medium: book, Gr

#### Example 2: what groups are articles distributed to?

In [54]:
query = """
MATCH (m:medium {name: "article"})-[:isDistributedto]->(g:group)
RETURN m, g.name
"""

# Run the query
results = graph.run(query)

# Print the results
for result in results:
    print(f"Medium: {result['m']['name']}, Group: {result['g.name']}")

Medium: article, Group: turkic peoples
Medium: article, Group: fish
Medium: article, Group: families
Medium: article, Group: women
Medium: article, Group: feminist
Medium: article, Group: patient
Medium: article, Group: insubres
Medium: article, Group: conflicts
Medium: article, Group: switchboard operator
Medium: article, Group: baseball player
Medium: article, Group: early years workers
Medium: article, Group: parents
Medium: article, Group: pre-school children
Medium: article, Group: small pharmacy chains
Medium: article, Group: authors


#### Example 3: what groups are film distributed to?

In [59]:
query = """
MATCH (m:medium {name: "film"})-[:isDistributedto]->(g:group)
RETURN m, g.name
"""

# Run the query
results = graph.run(query)

# Print the results
for result in results:
    print(f"Medium: {result['m']['name']}, Group: {result['g.name']}")

Medium: film, Group: critics
Medium: film, Group: directors
Medium: film, Group: contemporary philosophers
Medium: film, Group: racketeering
Medium: film, Group: trade unionism
Medium: film, Group: people
Medium: film, Group: peers
Medium: film, Group: friends
Medium: film, Group: thompson
Medium: film, Group: severine
Medium: film, Group: class
Medium: film, Group: cadaver


In [None]:
#https://neo4j.com/docs/graph-data-science/current/algorithms/betweenness-centrality/#