# THIS NOTEBOOK CONTAINS AN EXAMPLE OF A NODE CENTRALITY ANALYSIS ALGORITHM, IN THIS CASE EIGENVECTOR CENTRALITY, APPLIED TO A Protein Contact Network OF THE SARSCOV2 SPIKE PROTEIN

In [6]:
#handle different path separators
from sys import platform
if platform == "linux" or platform == "linux2":
    # linux
    add_slash_to_path = '/'
elif platform == "darwin":
    # OS X
    add_slash_to_path = '/'
elif platform == "win32":
    # Windows...
    add_slash_to_path = '\\' 

In [9]:
import os
try:
    from pcn.pcn_miner import pcn_miner, pcn_pymol_scripts #installed with pip
except:
    try: 
        import sys                #git cloned
        cwd = os.getcwd()
        exd = os.path.abspath(os.path.join(cwd, os.pardir))
        pcnd =  os.path.abspath(os.path.join(exd, os.pardir)) + add_slash_to_path + "pcn"
        sys.path.append(pcnd)
        from pcn_miner import pcn_miner, pcn_pymol_scripts 
    except:
        raise ImportError("PCN-Miner is not correctly installed.")

import numpy as np
import subprocess
import networkx as nx

In [2]:
output_path = ""
adj_path = "Adj\\"

In [3]:
protein = "6vxx"
protein_path = "{}.pdb".format(protein)
atoms = pcn_miner.readPDBFile(protein_path) #read 
coordinates = pcn_miner.getResidueCoordinates(atoms)
coordinates

Start Reading PDB

2022-04-19 10:55:32.814468
End Reading PDB

2022-04-19 10:55:32.858383


array([['ALA27 A',
        array([' 172.298', ' 252.181', ' 223.613'], dtype='<U8')],
       ['TYR28 A',
        array([' 174.968', ' 250.129', ' 221.763'], dtype='<U8')],
       ['THR29 A',
        array([' 177.648', ' 250.850', ' 219.220'], dtype='<U8')],
       ...,
       ['LEU1145 C',
        array([' 213.878', ' 212.413', ' 120.414'], dtype='<U8')],
       ['ASP1146 C',
        array([' 216.645', ' 214.563', ' 118.918'], dtype='<U8')],
       ['SER1147 C',
        array([' 218.576', ' 211.497', ' 117.584'], dtype='<U8')]],
      dtype=object)

In [4]:
dict_residue_name = pcn_miner.associateResidueName(coordinates)
residue_names = np.array(list (dict_residue_name.items()))
residue_names

array([['0', 'ALA27 A'],
       ['1', 'TYR28 A'],
       ['2', 'THR29 A'],
       ...,
       ['2913', 'LEU1145 C'],
       ['2914', 'ASP1146 C'],
       ['2915', 'SER1147 C']], dtype='<U9')

In [5]:
A = pcn_miner.adjacent_matrix(output_path, coordinates, protein, 4, 8)
A

saved distances matrix████████████████████████████████████████████████████████████████████████████████| Current progress: 100.00%
saved edge list
saved adj matrix


array([[0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]])

In [6]:
G = nx.from_numpy_array(A)

In [7]:
residue_names_1 = np.array(residue_names[:, 1], dtype = str)        
centrality_measures = pcn_miner.eigenvector_c(G, residue_names_1)
centrality_measures

Top 10 nodes by eigenvector centrality
('LEU1034 C', 0.09506922262250982)
('LEU1034 B', 0.09506922237552018)
('LEU1034 A', 0.09506921406112946)
('GLY885 C', 0.09111584665259138)
('GLY885 B', 0.09111584643992332)
('GLY885 A', 0.09111583804575356)
('GLY880 C', 0.08625413250126258)
('GLY880 B', 0.08625413239662934)
('GLY880 A', 0.08625412327540509)
('GLY889 C', 0.0847093903298337)


{'ALA27 A': 4.35914800403262e-06,
 'TYR28 A': 6.730764503156645e-06,
 'THR29 A': 9.47273520646497e-06,
 'ASN30 A': 9.843320932690619e-06,
 'SER31 A': 2.187032390115034e-05,
 'PHE32 A': 1.3867799149180712e-05,
 'THR33 A': 1.9179674082009236e-05,
 'ARG34 A': 2.499990964734135e-05,
 'GLY35 A': 3.693905653347723e-05,
 'VAL36 A': 3.137534417204451e-05,
 'TYR37 A': 2.785327767404353e-05,
 'TYR38 A': 2.3701811841088146e-05,
 'PRO39 A': 1.7659705798443336e-05,
 'ASP40 A': 7.160267709154822e-06,
 'LYS41 A': 3.711620541083524e-05,
 'VAL42 A': 4.45536113850784e-05,
 'PHE43 A': 4.450937350773079e-05,
 'ARG44 A': 4.058170999794514e-05,
 'SER45 A': 2.598128953510606e-05,
 'SER46 A': 3.3096651904404844e-05,
 'VAL47 A': 2.548116932771877e-05,
 'LEU48 A': 6.903001773753035e-05,
 'HIS49 A': 4.07933850722076e-05,
 'SER50 A': 8.893184678445804e-05,
 'THR51 A': 5.447559529822766e-05,
 'GLN52 A': 4.706199654310946e-05,
 'ASP53 A': 5.069214119922092e-05,
 'LEU54 A': 3.354075722813364e-05,
 'PHE55 A': 4.25353

In [8]:
pcn_miner.save_centralities(output_path, centrality_measures, protein, "eigenvector_centrality")

In [9]:
pcn_pymol_scripts.pymol_plot_centralities(output_path, centrality_measures, protein_path, "eigenvector_centrality")

 PyMOL not running, entering library mode (experimental)
 Save: Please wait -- writing session file...
 Save: wrote "Centralities\eigenvector_centrality\Sessions\6vxx_eigenvector_centrality_session.pse".


In [10]:
filepath = "Centralities\eigenvector_centrality\Sessions\{}_eigenvector_centrality_session.pse".format(protein)
if platform == "win32":
    os.startfile(filepath)
else:
    subprocess.run(["pymol", filepath])