# THIS NOTEBOOK CONTAINS AN EXAMPLE OF A COMMUNITY DETECTION ALGORITHM, IN THIS CASE LOUVAIN, APPLIED TO A Protein Contact Network OF THE SARSCOV2 SPIKE PROTEIN

In [6]:
from pcn_miner import pcn_miner, pcn_pymol_scripts
import numpy as np
import networkx as nx
import os

In [25]:
adj_path = "Adj\\"
output_path = ""

In [26]:
protein = "6vxx"
protein_path = "{}.pdb".format(protein)
atoms = pcn_miner.readPDBFile(protein_path) #read 
coordinates = pcn_miner.getResidueCoordinates(atoms)
coordinates

Start Reading PDB

2022-04-17 18:09:22.861942
End Reading PDB

2022-04-17 18:09:22.910747


array([['ALA27 A',
        array([' 172.298', ' 252.181', ' 223.613'], dtype='<U8')],
       ['TYR28 A',
        array([' 174.968', ' 250.129', ' 221.763'], dtype='<U8')],
       ['THR29 A',
        array([' 177.648', ' 250.850', ' 219.220'], dtype='<U8')],
       ...,
       ['LEU1145 C',
        array([' 213.878', ' 212.413', ' 120.414'], dtype='<U8')],
       ['ASP1146 C',
        array([' 216.645', ' 214.563', ' 118.918'], dtype='<U8')],
       ['SER1147 C',
        array([' 218.576', ' 211.497', ' 117.584'], dtype='<U8')]],
      dtype=object)

In [27]:
dict_residue_name = pcn_miner.associateResidueName(coordinates)
residue_names = np.array(list (dict_residue_name.items()))
residue_names

array([['0', 'ALA27 A'],
       ['1', 'TYR28 A'],
       ['2', 'THR29 A'],
       ...,
       ['2913', 'LEU1145 C'],
       ['2914', 'ASP1146 C'],
       ['2915', 'SER1147 C']], dtype='<U9')

In [28]:
A = pcn_miner.adjacent_matrix(output_path, coordinates, protein, 4, 8)
A

saved distances matrix████████████████████████████████████████████████████████████████████████████████| Current progress: 100.00%
saved edge list
saved adj matrix


array([[0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]])

In [17]:
G = nx.from_numpy_array(A)
nx.draw(G)

In [18]:
louvain_labels = pcn_miner.louvain(G)
louvain_labels

Start Louvain

2022-04-17 18:02:28.782146
number of Louvain communities: 20
End Louvain

2022-04-17 18:02:29.348492


array([[10.],
       [10.],
       [10.],
       ...,
       [ 4.],
       [ 4.],
       [ 4.]])

In [19]:
n_coms = int(max(louvain_labels)+1)
n_coms

20

In [20]:
pcn_miner.save_labels(output_path, louvain_labels, residue_names, protein,  method="louvain")

0.0 229
1.0 228
2.0 216
3.0 216
4.0 185
5.0 179
6.0 168
7.0 166
8.0 165
9.0 158
10.0 145
11.0 144
12.0 116
13.0 106
14.0 91
15.0 90
16.0 87
17.0 82
18.0 75
19.0 70
229
Communities 0:  ['LEU699 A', 'GLY700 A', 'ALA701 A', 'GLU702 A', 'ASN703 A', 'SER704 A', 'VAL705 A', 'ALA706 A', 'TYR707 A', 'SER708 A', 'ASN709 A', 'ASN710 A', 'SER711 A', 'ILE712 A', 'ALA713 A', 'ILE714 A', 'PRO715 A', 'THR716 A', 'ASN717 A', 'PHE718 A', 'THR719 A', 'SER1037 A', 'LYS1038 A', 'ARG1039 A', 'VAL1040 A', 'ASP1041 A', 'PHE1042 A', 'CYS1043 A', 'LYS1045 A', 'GLY1046 A', 'TYR1047 A', 'HIS1048 A', 'THR1066 A', 'TYR1067 A', 'VAL1068 A', 'PRO1069 A', 'ALA1070 A', 'GLN1071 A', 'GLU1072 A', 'LYS1073 A', 'ASN1074 A', 'PHE1075 A', 'THR1076 A', 'THR1077 A', 'ALA1078 A', 'PRO1079 A', 'ALA1080 A', 'ILE1081 A', 'CYS1082 A', 'HIS1083 A', 'ASP1084 A', 'GLY1085 A', 'LYS1086 A', 'ALA1087 A', 'HIS1088 A', 'PHE1089 A', 'PRO1090 A', 'ARG1091 A', 'GLU1092 A', 'GLY1093 A', 'VAL1094 A', 'PHE1095 A', 'VAL1096 A', 'SER1097 A', 'ASN

{'ALA27 A': 10,
 'TYR28 A': 10,
 'THR29 A': 10,
 'ASN30 A': 10,
 'SER31 A': 10,
 'PHE32 A': 10,
 'THR33 A': 10,
 'ARG34 A': 10,
 'GLY35 A': 10,
 'VAL36 A': 10,
 'TYR37 A': 10,
 'TYR38 A': 10,
 'PRO39 A': 10,
 'ASP40 A': 18,
 'LYS41 A': 18,
 'VAL42 A': 18,
 'PHE43 A': 18,
 'ARG44 A': 10,
 'SER45 A': 10,
 'SER46 A': 10,
 'VAL47 A': 10,
 'LEU48 A': 10,
 'HIS49 A': 10,
 'SER50 A': 10,
 'THR51 A': 10,
 'GLN52 A': 10,
 'ASP53 A': 10,
 'LEU54 A': 10,
 'PHE55 A': 10,
 'LEU56 A': 10,
 'PRO57 A': 10,
 'PHE58 A': 10,
 'PHE59 A': 10,
 'SER60 A': 10,
 'ASN61 A': 10,
 'VAL62 A': 10,
 'THR63 A': 10,
 'TRP64 A': 10,
 'PHE65 A': 10,
 'HIS66 A': 10,
 'ALA67 A': 10,
 'ILE68 A': 10,
 'HIS69 A': 10,
 'ASP80 A': 10,
 'ASN81 A': 19,
 'PRO82 A': 19,
 'VAL83 A': 19,
 'LEU84 A': 19,
 'PRO85 A': 19,
 'PHE86 A': 19,
 'ASN87 A': 10,
 'ASP88 A': 10,
 'GLY89 A': 10,
 'VAL90 A': 10,
 'TYR91 A': 10,
 'PHE92 A': 10,
 'ALA93 A': 10,
 'SER94 A': 10,
 'THR95 A': 10,
 'GLU96 A': 10,
 'LYS97 A': 10,
 'SER98 A': 10,
 'ASN99 

In [21]:
pcn_pymol_scripts.pymol_plot(protein_path, output_path, "Communities", "louvain", n_coms)

 PyMOL not running, entering library mode (experimental)
ALA27 A hotpink
TYR28 A hotpink
THR29 A hotpink
ASN30 A hotpink
SER31 A hotpink
PHE32 A hotpink
THR33 A hotpink
ARG34 A hotpink
GLY35 A hotpink
VAL36 A hotpink
TYR37 A hotpink
TYR38 A hotpink
PRO39 A hotpink
ASP40 A teal
LYS41 A teal
VAL42 A teal
PHE43 A teal
ARG44 A hotpink
SER45 A hotpink
SER46 A hotpink
VAL47 A hotpink
LEU48 A hotpink
HIS49 A hotpink
SER50 A hotpink
THR51 A hotpink
GLN52 A hotpink
ASP53 A hotpink
LEU54 A hotpink
PHE55 A hotpink
LEU56 A hotpink
PRO57 A hotpink
PHE58 A hotpink
PHE59 A hotpink
SER60 A hotpink
ASN61 A hotpink
VAL62 A hotpink
THR63 A hotpink
TRP64 A hotpink
PHE65 A hotpink
HIS66 A hotpink
ALA67 A hotpink
ILE68 A hotpink
HIS69 A hotpink
ASP80 A hotpink
ASN81 A ruby
PRO82 A ruby
VAL83 A ruby
LEU84 A ruby
PRO85 A ruby
PHE86 A ruby
ASN87 A hotpink
ASP88 A hotpink
GLY89 A hotpink
VAL90 A hotpink
TYR91 A hotpink
PHE92 A hotpink
ALA93 A hotpink
SER94 A hotpink
THR95 A hotpink
GLU96 A hotpink
LYS97 A hotpi

In [22]:
#windows only
os.startfile("louvain\Sessions\{}_Communities_louvain_ncoms{}_session.pse".format(protein, n_coms))

FileNotFoundError: [WinError 2] Impossibile trovare il file specificato: '\\louvain\\Sessions\\6vxx_Communities_louvain_ncoms20_session.pse'