In [1]:
import pandas as pd
import networkx as nx
import numpy as np

from networkx.algorithms import bipartite

In [2]:
from services.pdb import pdbfile, pdbmolecule, pdbligand, pfam, go_terms
from services.uniprot import pdb2uniprot

In [11]:
def projected_graph(Graph):
    nx.set_node_attributes(Graph, bipartite.color(Graph), name='color')

    top = [i for i in Graph.nodes if Graph.nodes[i]['color'] == 1]
    projected = bipartite.overlap_weighted_projected_graph(Graph, top)
    return projected

In [4]:
pf = pd.read_csv('./data/pocket_feature_scores.csv', header=None, names=['pocket_0', 'pocket_1', 'weight'])

In [5]:
all_pockets = pf['pocket_0'].unique().tolist()

In [6]:
structures, ligands = zip( *[i.split('_') for i in all_pockets] )

In [7]:
structures = list(set(structures))
ligands = list(set(ligands))

In [8]:
print(len(structures), len(ligands))

3777 1333


In [28]:
pdb_uniprot_mapping = pdb2uniprot(structures[:100])
PUG = nx.from_pandas_edgelist(pdb_uniprot_mapping, source='From', target='To')

In [29]:
proj = projected_graph(PUG)
list(proj.edges(data=True))

[('3zly', '3zlw', {'weight': 1.0})]

In [19]:
pdb_pfam_mapping = pfam(','.join(structures[:100]))
pdb_pfam_df = pd.DataFrame(pdb_pfam_mapping)
PFG = nx.from_pandas_edgelist(pdb_pfam_df, source='structureId', target='pfamAcc')

In [20]:
proj_2 = projected_graph(PFG)
list(proj_2.edges(data=True))

[('4CY8', '3IHG', {'weight': 1.0}),
 ('3KN5', '3ZLY', {'weight': 1.0}),
 ('3KN5', '4I5H', {'weight': 1.0}),
 ('3KN5', '4DYM', {'weight': 1.0}),
 ('3KN5', '3T8O', {'weight': 0.5}),
 ('3KN5', '3ZLW', {'weight': 1.0}),
 ('3LST', '3REO', {'weight': 0.5}),
 ('4IAW', '3APW', {'weight': 1.0}),
 ('2C31', '4K9P', {'weight': 1.0}),
 ('5JY1', '4HP8', {'weight': 1.0}),
 ('4I5H', '3ZLY', {'weight': 1.0}),
 ('4I5H', '4DYM', {'weight': 1.0}),
 ('4I5H', '3T8O', {'weight': 0.5}),
 ('4I5H', '3ZLW', {'weight': 1.0}),
 ('2HZI', '4E5W', {'weight': 1.0}),
 ('2HZI', '2NRU', {'weight': 1.0}),
 ('2HZI', '3DQW', {'weight': 1.0}),
 ('2HZI', '2EVA', {'weight': 1.0}),
 ('2HZI', '2Y6O', {'weight': 1.0}),
 ('2HZI', '3VO3', {'weight': 1.0}),
 ('2HZI', '5CNO', {'weight': 1.0}),
 ('2HZI', '5E8X', {'weight': 1.0}),
 ('2HZI', '2B7A', {'weight': 1.0}),
 ('2HZI', '3C1X', {'weight': 1.0}),
 ('2B7A', '4E5W', {'weight': 1.0}),
 ('2B7A', '2NRU', {'weight': 1.0}),
 ('2B7A', '3DQW', {'weight': 1.0}),
 ('2B7A', '2EVA', {'weight':

In [27]:
proj_2['3zly'.upper()]

AtlasView({'3KN5': {'weight': 1.0}, '4I5H': {'weight': 1.0}, '3T8O': {'weight': 0.5}, '4DYM': {'weight': 1.0}, '3ZLW': {'weight': 1.0}})

In [44]:
test = go_terms(','.join(['3th5','2fju' ]))

In [45]:
test

[{'id': 'GO:0001764',
  'structureId': '3TH5',
  'chainId': 'A',
  'detail': {'name': 'neuron migration',
   'definition': 'The characteristic movement of an immature neuron from germinal zones to specific positions where they will reside as they mature.',
   'synonyms': 'neuron chemotaxis, neuron guidance, neuronal migration',
   'ontology': 'B'}},
 {'id': 'GO:0001934',
  'structureId': '3TH5',
  'chainId': 'A',
  'detail': {'name': 'positive regulation of protein phosphorylation',
   'definition': 'Any process that activates or increases the frequency, rate or extent of addition of phosphate groups to amino acids within a protein.',
   'synonyms': 'activation of protein amino acid phosphorylation, positive regulation of protein amino acid phosphorylation, stimulation of protein amino acid phosphorylation, up regulation of protein amino acid phosphorylation, up-regulation of protein amino acid phosphorylation, upregulation of protein amino acid phosphorylation',
   'ontology': 'B'}},
