In [1]:
# Fetching PubMed article metadata
from Bio import Entrez
from Bio import Medline

# Graph creation and visualisation
import networkx as nx
from pyvis.network import Network
from pyvis import network as net

# Data processing
import numpy as np
import pandas as pd

# Miscellaneous
import os
from tqdm import tqdm
import re

## Working with pubmed abstracts

In [2]:
Entrez.email = 'akishirsath@gmail.com'

In [3]:
def removeSpecialChar(string):
    """
    Removes special characters from given string
    """
    special_characters = ['!','#','$','%','@','[',']',' ',']','_', '/', "*"]
    
    for i in special_characters:
        string = string.replace(i,' ').strip()
    
    return string

In [4]:
def fetchMeSH(pmid):
    """Returns MeSH term associated with PMID"""
    
    handle = Entrez.efetch(
      db="pubmed", 
      id=pmid, 
      rettype="medline", 
      retmode="text")

    records = Medline.parse(handle)
    records = list(records)[0]
    
    return ", ".join(records.get('MH'))

In [5]:
pmids = [33424848, 33400058, 32868092, 33236131, 32921216]

In [6]:
mesh_list = list()

for pmid in tqdm(pmids):
    mesh_terms=fetchMeSH(pmid)
    mesh_list.append((mesh_terms))

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:06<00:00,  1.33s/it]


In [7]:
len(mesh_list)

5

In [8]:
unique_words = set()
temp_list = list()
articles = dict()

for idx, entry in enumerate(mesh_list):
    temp_dict = dict()
    for string in entry.split(","):
        string = removeSpecialChar(string).lower()
        strip_string = string.strip().split(" ")
        if len(strip_string)>1:
            temp_dict[strip_string[0]]=strip_string[1:]
            for words in strip_string:
                unique_words.add(words)
        else:
            temp_dict[strip_string[0]]="NA"
            unique_words.add(strip_string[0])
            
    articles[f"article_{idx}"]=temp_dict


In [9]:
articles

{'article_0': {'animals': 'NA',
  'antibody-dependent': ['enhancement'],
  'covid-19': ['', 'prevention', '&', 'control'],
  'humans': 'NA',
  'sars-cov-2': ['physiology'],
  'vaccines': 'NA'},
 'article_1': {'animals': 'NA',
  'antiviral': ['agents', 'adverse', 'effects', '', 'therapeutic', 'use'],
  'covid-19': ['vaccines', 'adverse', 'effects', '', 'therapeutic', 'use'],
  'host-pathogen': ['interactions'],
  'humans': 'NA',
  'immunization': 'NA',
  'passive': ['adverse', 'effects'],
  'predictive': ['value', 'of', 'tests'],
  'sars-cov-2': ['', 'drug', 'effects', 'pathogenicity'],
  'treatment': ['outcome']},
 'article_2': {'angiotensin-converting': ['enzyme',
   'inhibitors',
   'therapeutic',
   'use'],
  'antiviral': ['agents', '', 'therapeutic', 'use'],
  'covid-19': ['vaccines'],
  'combined': ['modality', 'therapy'],
  'humans': 'NA',
  'respiratory': ['therapy', '', 'methods'],
  'sars-cov-2': ['immunology', 'pathogenicity']},
 'article_3': {'antiviral': ['agents', '', 'the

In [10]:
articles.get('article_0').get('antibody-dependent')

['enhancement']

In [11]:
len(unique_words)

59

# Creating graphs

In [12]:
def creatGraph(dictionary, root_name):
    A = nx.Graph()

    for prime_node in dictionary.keys():
        A.add_edge(root_name, prime_node)
        
        value = dictionary.get(prime_node)
        
        try:
            if (len(value)>1) and (value!="NA"):
                for ele in value:
                    if (len(ele)>1):
                        A.add_edge(prime_node, ele)
        except:
            pass
    
    return A

In [13]:
graphs = list()
for article in articles.keys():
    graph = creatGraph(articles[article], article)
    graphs.append(graph)

In [14]:
nwt = net.Network(notebook=True, height='800px', width='800px', directed=True)
nwt.from_nx(graphs[0])
nwt.show('graph.html')

In [15]:
nwt = net.Network(notebook=True, height='800px', width='800px', directed=True)
nwt.from_nx(graphs[3])
nwt.show('graph.html')