In [1]:
from pymongo import MongoClient

# Connect to mongodb and create a new database
client = MongoClient("mongodb://localhost:27017/")
database = client["MedGraph"]

# Base pubmed url: https://pubmed.ncbi.nlm.nih.gov/ + paper_id

In [5]:
import pymongo, networkx

def build_graph_from_meshterms(collection: pymongo.collection.Collection):
    graph = networkx.Graph()
    nodes_list = []
    edges_list = []

    for document in collection.find():
        current_pmid = document["_id"]
        graph.add_node(current_pmid)
        
        for document2 in collection.find():
            count = 0
            if current_pmid != document2["_id"]:
                for key in document["mesh_terms"]:
                    if key in document2["mesh_terms"]: count += 1
            
            if count > 0:
                graph.add_edge(current_pmid, document2["_id"], weight = count)    

    return graph

In [17]:
import networkx, json

def build_graph_from_labels():
    with open("entity_documents.json", "r") as f:
        data = json.load(f)

        pmid_to_labels = {}
        for pmid, values in data.items():
            pmid_to_labels[pmid] = set()
            
            for label in values.keys():
                entity = label.split(", ")[0]
                pmid_to_labels[pmid].add(entity)
                
    graph = networkx.Graph()
    graph.add_nodes_from(pmid_to_labels.keys())
    
    for pmid1 in pmid_to_labels:
        for pmid2 in pmid_to_labels:
            if pmid1 < pmid2:
                common_labels = pmid_to_labels[pmid1].intersection(pmid_to_labels[pmid2])        
                for item in common_labels:
                    graph.add_edge(pmid1, pmid2, weight = item)
    
    return graph     

In [6]:
# Usage example for creating a graph from mongodb table
collection = database["Dataset2000Entries"]
gx = build_graph_from_meshterms(collection)

In [None]:
import json

with open("graph-webapp/entity_documents.json" , "r") as file:
    data = json.load(file)
    
paper_list = list(data.keys())
ner_words = set()

for paper in paper_list:
    ner_words.update(list(data[paper].keys()))
    
nodes = []
for entity in ner_words:
    papers = []
    unpack = entity.split(", ")
    
    for paper in paper_list:
        if entity in data[paper]:
            papers.append(paper)
    node = {"id": entity, "name": unpack[0], "category": unpack[1], "papers": papers}
    nodes.append(node)
        
edges = []
for entity1 in nodes:
    for entity2 in nodes:
        if entity1["id"] != entity2["id"]:
            papers1 = set(entity1["papers"])
            papers2 = set(entity2["papers"])
            
            intersection = papers1 & papers2
            if len(intersection) > 0:
                edge = {"source": entity1["id"], "target": entity2["id"], "weight": len(intersection), "common_papers": list(intersection)}
                edges.append(edge)
        
entities_graph = {}    
entities_graph["nodes"] = nodes
entities_graph["links"] = edges

with open("graph-webapp/entities_graph.json" , "w") as file:
    json.dump(entities_graph, file, indent = 4)