In [1]:
%reload_ext autoreload

"""
This script runs classifier training over the entire training data and then
output predictions over the interactome.

Usage:
  build_data.py [--interpro] [--pfam] [--mf] [--cc] [--bp]
                [--update_features] [--update_mapping]
                [--induce] [--verbose] [--n_jobs=J] [--backend=B]
  build_data.py -h | --help

Options:
  -h --help     Show this screen.
  --interpro    Use interpro domains in features.
  --pfam        Use Pfam domains in features.
  --mf          Use Molecular Function Gene Ontology in features.
  --cc          Use Cellular Compartment Gene Ontology in features.
  --bp          Use Biological Process Gene Ontology in features.
  --induce      Use ULCA inducer over Gene Ontology.
  --verbose     Print intermediate output for debugging.
  --n_jobs=J            Number of processes to run in parallel [default: 1]
  --backend=B           Python concurrent backend [default: multiprocessing]
  --update_features     Delete old feature cache and create a new one.
  --update_mapping      Delete old accession mapping and create a new one.
"""

import os
import pandas as pd

from pyppi.base import PPI, parse_args
from pyppi.data import bioplex_network_path, pina2_network_path
from pyppi.data import bioplex_v4, pina2, innate_curated, innate_imported
from pyppi.data import innate_i_network_path, innate_c_network_path
from pyppi.data import interactome_network_path, full_training_network_path
from pyppi.data import kegg_network_path, hprd_network_path
from pyppi.data import load_uniprot_accession_map, save_uniprot_accession_map
from pyppi.data import testing_network_path, training_network_path
from pyppi.data import save_network_to_path
from pyppi.data import save_ptm_labels
from pyppi.data import ppi_features_path, accession_features_path
from pyppi.data import annotation_extractor_path
from pyppi.data import pickle_pd_object

from pyppi.data_mining.features import AnnotationExtractor
from pyppi.data_mining.generic import bioplex_func, mitab_func, pina_func
from pyppi.data_mining.generic import generic_to_dataframe
from pyppi.data_mining.hprd import hprd_to_dataframe
from pyppi.data_mining.tools import process_interactions, LABEL
from pyppi.data_mining.tools import remove_intersection, remove_labels
from pyppi.data_mining.tools import map_network_accessions
from pyppi.data_mining.uniprot import UniProt, get_active_instance
from pyppi.data_mining.kegg import download_pathway_ids, pathways_to_dataframe

# -------------------------------------------------------------------------- #
#                     MODIFY THESE TO SUIT YOUR NEEDS
# -------------------------------------------------------------------------- #
args = {
    'n_jobs': 4,
    'induce': True,
    'verbose': True,
    'selection': [
        UniProt.data_types().GO_MF.value,
        UniProt.data_types().GO_BP.value,
        UniProt.data_types().GO_CC.value,
        UniProt.data_types().INTERPRO.value,
        UniProt.data_types().PFAM.value
    ],
    'update_features': True,
    'update_mapping': True,
    'backend': 'multiprocessing'
}
n_jobs = args['n_jobs']
induce = args['induce']
verbose = args['verbose']
selection = args['selection']
update_features = args['update_features']
update_mapping = args['update_mapping']
backend = args['backend']
# -------------------------------------------------------------------------- #

print("Downloading from KEGG and loading UniProt database instance...")
pathways = download_pathway_ids('hsa')
uniprot = get_active_instance(verbose=True)
data_types = UniProt.data_types()

load obo file /Users/daniel/.pyppi/go.obo
/Users/daniel/.pyppi/go.obo: fmt(1.2) rel(2017-07-14) 48,971 GO Terms
Downloading from KEGG and loading UniProt database instance...
First time loading on UniProt instance. Make take a few moments


In [2]:
# Construct all the networks
print("Building KEGG interactions...")
kegg = pathways_to_dataframe(
    pathway_ids=pathways,
    map_to_uniprot=True,
    drop_nan=True,
    allow_self_edges=True,
    allow_duplicates=False
)

Building KEGG interactions...


In [3]:
print("Building HPRD interactions...")
hprd = hprd_to_dataframe(
    drop_nan=True,
    allow_self_edges=True,
    allow_duplicates=False
)

Building HPRD interactions...


In [4]:
print("Building Interactome interactions...")
bioplex = generic_to_dataframe(
    f_input=bioplex_v4(),
    parsing_func=bioplex_func,
    drop_nan=True,
    allow_self_edges=True,
    allow_duplicates=False
)

Building Interactome interactions...


In [5]:
pina2 = generic_to_dataframe(
    f_input=pina2(),
    parsing_func=pina_func,
    drop_nan=True,
    allow_self_edges=True,
    allow_duplicates=False
)

In [6]:
innate_c = generic_to_dataframe(
    f_input=innate_curated(),
    parsing_func=mitab_func,
    drop_nan=True,
    allow_self_edges=True,
    allow_duplicates=False
)

In [7]:
innate_i = generic_to_dataframe(
    f_input=innate_imported(),
    parsing_func=mitab_func,
    drop_nan=True,
    allow_self_edges=True,
    allow_duplicates=False
)

In [8]:
print("Mapping to most recent uniprot accessions...")
# Get a set of all the unique uniprot accessions
networks = [kegg, hprd, bioplex, pina2, innate_i, innate_c]
sources = set(p for df in networks for p in df.source.values)
targets = set(p for df in networks for p in df.target.values)
accessions = list(sources | targets)

if update_mapping:
    accession_mapping = uniprot.batch_map(accessions)
    save_uniprot_accession_map(accession_mapping)
else:
    try:
        accession_mapping = load_uniprot_accession_map()
    except IOError:
        accession_mapping = uniprot.batch_map(accessions)
        save_uniprot_accession_map(accession_mapping)

Mapping to most recent uniprot accessions...
Record for P54726 not found.
Record for Q3TAA7 not found.
Record for P46978 not found.
Record for P50540 not found.
Record for Q8BJS8 not found.
Record for Q6P7W2 not found.
Record for Q9D2C7 not found.
Record for O88621 not found.
Record for Q60972 not found.
Record for P82347 not found.
Record for Q64521 not found.
Record for Q9JII6 not found.
Record for Q8BIA4 not found.
Record for B2RUJ5 not found.
Record for Q69ZQ2 not found.
Record for Q61114 not found.
Record for Q60665 not found.
Record for O55047 not found.
Record for Q62277 not found.
Record for Q8BG17 not found.
Record for Q29460 not found.
Record for Q9QZ05 not found.
Record for P28575 not found.
Record for Q9D8X2 not found.
Record for P58462 not found.
Record for Q08501 not found.
Record for D4IM81 not found.
Record for Q5D1E7 not found.
Record for Q8BG51 not found.
Record for Q91V12 not found.
Record for O88196 not found.
Record for Q9Z2V5 not found.
Record for Q9ERI2 not found

Record for Q91X79 not found.
Record for Q8BPX9 not found.
Record for Q921Q7 not found.
Record for P47810 not found.
Record for P62748 not found.
Record for Q9CQT5 not found.
Record for Q8K3F6 not found.
Record for Q9DAX2 not found.
Record for P43021 not found.
Record for P41105 not found.
Record for Q61361 not found.
Record for Q8BHK9 not found.
Record for Q8VYH6 not found.
Record for O70209 not found.
Record for Q3TRR0 not found.
Record for Q8BJH1 not found.
Record for Q923U0 not found.
Record for P97440 not found.
Record for P34022 not found.
Record for P45700 not found.
Record for Q9D646 not found.
Record for Q63714 not found.
Record for P48024 not found.
Record for P19123 not found.
Record for Q8R001 not found.
Record for Q9CQN4 not found.
Record for P54775 not found.
Record for P04247 not found.
Record for Q9Z172 not found.
Record for Q6Y7W8 not found.
Record for P28867 not found.
Record for Q9R229 not found.
Record for Q9Z0Y2 not found.
Record for Q7SIB2 not found.
Record for Q8C

Record for Q5XJY5 not found.
Record for O55057 not found.
Record for P00517 not found.
Record for Q9D3D9 not found.
Record for Q9CU65 not found.
Record for P13516 not found.
Record for P20029 not found.
Record for Q64691 not found.
Record for P32037 not found.
Record for O35214 not found.
Record for Q8CEF1 not found.
Record for Q62176 not found.
Record for Q64471 not found.
Record for P42866 not found.
Record for Q9JHU4 not found.
Record for O08788 not found.
Record for P62880 not found.
Record for Q8K5B2 not found.
Record for Q00560 not found.
Record for Q99PV5 not found.
Record for Q9D6N5 not found.
Record for Q7TSN2 not found.
Record for P61205 not found.
Record for O35678 not found.
Record for P97315 not found.
Record for P03953 not found.
Record for Q62205 not found.
Record for P50636 not found.
Record for P11087 not found.
Record for Q9CWT6 not found.
Record for P52592 not found.
Record for Q9D0R2 not found.
Record for P29699 not found.
Record for P07758 not found.
Record for Q3T

Record for P48754 not found.
Record for Q4KMM3 not found.
Record for Q9R190 not found.
Record for Q8VDQ1 not found.
Record for P23359 not found.
Record for O89094 not found.
Record for Q6P5E4 not found.
Record for Q8VEM8 not found.
Record for O35451 not found.
Record for P11714 not found.
Record for Q4VBE8 not found.
Record for P61161 not found.
Record for Q3TBT3 not found.
Record for Q7TNS8 not found.
Record for P70121 not found.
Record for Q7TSH3 not found.
Record for Q80ZF8 not found.
Record for Q9DC69 not found.
Record for O55017 not found.
Record for Q8R1M2 not found.
Record for Q5SSF7 not found.
Record for Q9WU39 not found.
Record for O70460 not found.
Record for O09127 not found.
Record for P27870 not found.
Record for Q9D0R8 not found.
Record for Q99K51 not found.
Record for Q80YQ2 not found.
Record for Q8BGG7 not found.
Record for Q9QZR5 not found.
Record for P14602 not found.
Record for Q99MJ9 not found.
Record for Q9JME5 not found.
Record for P80318 not found.
Record for P62

Record for A0ZZW3 not found.
Record for Q9CQY7 not found.
Record for Q5DTT2 not found.
Record for Q8BL97 not found.
Record for Q8BIQ3 not found.
Record for Q8CFI7 not found.
Record for Q9JI74 not found.
Record for Q569Z5 not found.
Record for Q9D0M3 not found.
Record for Q8R1G2 not found.
Record for P53810 not found.
Record for Q8CJ70 not found.
Record for Q9Z0N2 not found.
Record for Q91VH1 not found.
Record for Q61606 not found.
Record for P52850 not found.
Record for P41242 not found.
Record for Q5RJ54 not found.
Record for Q03358 not found.
Record for Q91YQ3 not found.
Record for P48281 not found.
Record for Q8BG81 not found.
Record for Q9ET26 not found.
Record for Q08943 not found.
Record for P00860 not found.
Record for P97288 not found.
Record for Q9CPS7 not found.
Record for O88572 not found.
Record for O35728 not found.
Record for Q9QZ06 not found.
Record for Q8BIJ7 not found.
Record for Q02788 not found.
Record for Q920I9 not found.
Record for Q9EPR5 not found.
Record for P27

Record for P63139 not found.
Record for Q149L7 not found.
Record for Q922J3 not found.
Record for Q8K349 not found.
Record for C0HKE5 not found.
Record for C0HKE6 not found.
Record for C0HKE7 not found.
Record for C0HKE8 not found.
Record for C0HKE9 not found.
Record for C0HKE1 not found.
Record for C0HKE2 not found.
Record for C0HKE3 not found.
Record for C0HKE4 not found.
Record for O70258 not found.
Record for P08030 not found.
Record for Q62087 not found.
Record for P09405 not found.
Record for P05059 not found.
Record for P57680 not found.
Record for Q9DBX6 not found.
Record for Q9QZN4 not found.
Record for Q91WK5 not found.
Record for O70343 not found.
Record for P16546 not found.
Record for Q8CFU8 not found.
Record for Q62356 not found.
Record for P05977 not found.
Record for Q7TQJ8 not found.
Record for Q8BP71 not found.
Record for P30282 not found.
Record for Q8VEE1 not found.
Record for Q28153 not found.
Record for Q8R5H6 not found.
Record for P02721 not found.
Record for Q9W

Record for Q99KE1 not found.
Record for Q8VHR0 not found.
Record for P08152 not found.
Record for Q9CY18 not found.
Record for O35594 not found.
Record for Q9QY30 not found.
Record for P34928 not found.
Record for P70207 not found.
Record for Q9CQ40 not found.
Record for O08746 not found.
Record for Q9D706 not found.
Record for Q8MEU5 not found.
Record for P01835 not found.
Record for Q8K207 not found.
Record for Q9D6J1 not found.
Record for O35136 not found.
Record for Q99N13 not found.
Record for Q9V6L0 not found.
Record for O08583 not found.
Record for Q8K1C9 not found.
Record for Q9WVP6 not found.
Record for Q61539 not found.
Record for O35457 not found.
Record for P70458 not found.
Record for Q96329 not found.
Record for Q99JA4 not found.
Record for Q8R2V2 not found.
Record for P11609 not found.
Record for P11983 not found.
Record for P0C605 not found.
Record for P07978 not found.
Record for Q8BJU0 not found.
Record for Q5I1X5 not found.
Record for P61620 not found.
Record for P97

In [9]:
print("Mapping each network to the most recent uniprot accessions...")
kegg = map_network_accessions(
    interactions=kegg, accession_map=accession_mapping,
    drop_nan=True, allow_self_edges=True,
    allow_duplicates=False, min_counts=None, merge=False
)

Mapping each network to the most recent uniprot accessions...


In [10]:
hprd = map_network_accessions(
    interactions=hprd, accession_map=accession_mapping,
    drop_nan=True, allow_self_edges=True,
    allow_duplicates=False, min_counts=None, merge=False
)

In [11]:
pina2 = map_network_accessions(
    interactions=pina2, accession_map=accession_mapping,
    drop_nan=True, allow_self_edges=True,
    allow_duplicates=False, min_counts=None, merge=False
)

In [12]:
bioplex = map_network_accessions(
    interactions=bioplex, accession_map=accession_mapping,
    drop_nan=True, allow_self_edges=True,
    allow_duplicates=False, min_counts=None, merge=False
)

In [13]:
innate_c = map_network_accessions(
    interactions=innate_c, accession_map=accession_mapping,
    drop_nan=True, allow_self_edges=True,
    allow_duplicates=False, min_counts=None, merge=False
)

In [None]:
innate_i = map_network_accessions(
    interactions=innate_i, accession_map=accession_mapping,
    drop_nan=True, allow_self_edges=True,
    allow_duplicates=False, min_counts=None, merge=False
)
networks = [hprd, kegg, bioplex, pina2, innate_i, innate_c]

In [None]:
print("Building features for each protein and PPI...")
ae = AnnotationExtractor(
    induce=True,
    selection=selection,
    n_jobs=3,
    verbose=True,
    cache=not update_features,
    backend='threading',
)

cond_1 = os.path.isfile(ppi_features_path)
cond_2 = os.path.isfile(accession_features_path)
if update_features or (not cond_1) or (not cond_2):
    tuple_gens = [zip(n.source, n.target) for n in networks]
    ppis = [(a, b) for tuples in tuple_gens for (a, b) in tuples]
    ae.fit(ppis)

    # Sanity check
    unique_ppis = set()
    unique_acc = set()
    for df in networks:
        ppis = list(zip(df.source, df.target))
        unique_ppis |= set(PPI(a, b) for (a, b) in ppis)
        unique_acc |= set(a for a, _ in ppis) | set(b for _, b in ppis)
    assert ae.accession_vocabulary.shape[0] == len(unique_acc)
    assert ae.ppi_vocabulary.shape[0] == len(unique_ppis)
    ae.cache()

Building features for each protein and PPI...
Downloading features for each PPI...
Acquiring data for accession 1/17926..
Acquiring data for accession 2/17926..
Acquiring data for accession 3/17926..
Acquiring data for accession 4/17926..
Acquiring data for accession 5/17926..
Acquiring data for accession 6/17926..
Acquiring data for accession 7/17926..
Acquiring data for accession 8/17926..
Acquiring data for accession 9/17926..
Acquiring data for accession 10/17926..
Acquiring data for accession 11/17926..
Acquiring data for accession 12/17926..
Acquiring data for accession 13/17926..
Acquiring data for accession 14/17926..
Acquiring data for accession 15/17926..
Acquiring data for accession 16/17926..
Acquiring data for accession 17/17926..
Acquiring data for accession 18/17926..
Acquiring data for accession 19/17926..
Acquiring data for accession 20/17926..
Acquiring data for accession 21/17926..
Acquiring data for accession 22/17926..
Acquiring data for accession 23/17926..
Acquir

Acquiring data for accession 470/17926..
Acquiring data for accession 471/17926..
Acquiring data for accession 472/17926..
Acquiring data for accession 473/17926..
Acquiring data for accession 474/17926..
Acquiring data for accession 475/17926..
Acquiring data for accession 476/17926..
Acquiring data for accession 477/17926..
Acquiring data for accession 478/17926..
Acquiring data for accession 479/17926..
Acquiring data for accession 480/17926..
Acquiring data for accession 481/17926..
Acquiring data for accession 482/17926..
Acquiring data for accession 483/17926..
Acquiring data for accession 484/17926..
Acquiring data for accession 485/17926..
Acquiring data for accession 486/17926..
Acquiring data for accession 487/17926..
Acquiring data for accession 488/17926..
Acquiring data for accession 489/17926..
Acquiring data for accession 490/17926..
Acquiring data for accession 491/17926..
Acquiring data for accession 492/17926..
Acquiring data for accession 493/17926..
Acquiring data f

Acquiring data for accession 1478/17926..
Acquiring data for accession 1479/17926..
Acquiring data for accession 1480/17926..
Acquiring data for accession 1481/17926..
Acquiring data for accession 1482/17926..
Acquiring data for accession 1483/17926..
Acquiring data for accession 1484/17926..
Acquiring data for accession 1485/17926..
Acquiring data for accession 1486/17926..
Acquiring data for accession 1487/17926..
Acquiring data for accession 1488/17926..
Acquiring data for accession 1489/17926..
Acquiring data for accession 1490/17926..
Acquiring data for accession 1491/17926..
Acquiring data for accession 1492/17926..
Acquiring data for accession 1493/17926..
Acquiring data for accession 1494/17926..
Acquiring data for accession 1495/17926..
Acquiring data for accession 1496/17926..
Acquiring data for accession 1497/17926..
Acquiring data for accession 1498/17926..
Acquiring data for accession 1499/17926..
Acquiring data for accession 1500/17926..
Acquiring data for accession 1501/

Acquiring data for accession 2465/17926..
Acquiring data for accession 2466/17926..
Acquiring data for accession 2467/17926..
Acquiring data for accession 2468/17926..
Acquiring data for accession 2469/17926..
Acquiring data for accession 2470/17926..
Acquiring data for accession 2471/17926..
Acquiring data for accession 2472/17926..
Acquiring data for accession 2473/17926..
Acquiring data for accession 2474/17926..
Acquiring data for accession 2475/17926..
Acquiring data for accession 2476/17926..
Acquiring data for accession 2477/17926..
Acquiring data for accession 2478/17926..
Acquiring data for accession 2479/17926..
Acquiring data for accession 2480/17926..
Acquiring data for accession 2481/17926..
Acquiring data for accession 2482/17926..
Acquiring data for accession 2483/17926..
Acquiring data for accession 2484/17926..
Acquiring data for accession 2485/17926..
Acquiring data for accession 2486/17926..
Acquiring data for accession 2487/17926..
Acquiring data for accession 2488/

Acquiring data for accession 3470/17926..
Acquiring data for accession 3471/17926..
Acquiring data for accession 3472/17926..
Acquiring data for accession 3473/17926..
Acquiring data for accession 3474/17926..
Acquiring data for accession 3475/17926..
Acquiring data for accession 3476/17926..
Acquiring data for accession 3477/17926..
Acquiring data for accession 3478/17926..
Acquiring data for accession 3479/17926..
Acquiring data for accession 3480/17926..
Acquiring data for accession 3481/17926..
Acquiring data for accession 3482/17926..
Acquiring data for accession 3483/17926..
Acquiring data for accession 3484/17926..
Acquiring data for accession 3485/17926..
Acquiring data for accession 3486/17926..
Acquiring data for accession 3487/17926..
Acquiring data for accession 3488/17926..
Acquiring data for accession 3489/17926..
Acquiring data for accession 3490/17926..
Acquiring data for accession 3491/17926..
Acquiring data for accession 3492/17926..
Acquiring data for accession 3493/

Acquiring data for accession 4468/17926..
Acquiring data for accession 4469/17926..
Acquiring data for accession 4470/17926..
Acquiring data for accession 4471/17926..
Acquiring data for accession 4472/17926..
Acquiring data for accession 4473/17926..
Acquiring data for accession 4474/17926..
Acquiring data for accession 4475/17926..
Acquiring data for accession 4476/17926..
Acquiring data for accession 4477/17926..
Acquiring data for accession 4478/17926..
Acquiring data for accession 4479/17926..
Acquiring data for accession 4480/17926..
Acquiring data for accession 4481/17926..
Acquiring data for accession 4482/17926..
Acquiring data for accession 4483/17926..
Acquiring data for accession 4484/17926..
Acquiring data for accession 4485/17926..
Acquiring data for accession 4486/17926..
Acquiring data for accession 4487/17926..
Acquiring data for accession 4488/17926..
Acquiring data for accession 4489/17926..
Acquiring data for accession 4490/17926..
Acquiring data for accession 4491/

Acquiring data for accession 5465/17926..
Acquiring data for accession 5466/17926..
Acquiring data for accession 5467/17926..
Acquiring data for accession 5468/17926..
Acquiring data for accession 5469/17926..
Acquiring data for accession 5470/17926..
Acquiring data for accession 5471/17926..
Acquiring data for accession 5472/17926..
Acquiring data for accession 5473/17926..
Acquiring data for accession 5474/17926..
Acquiring data for accession 5475/17926..
Acquiring data for accession 5476/17926..
Acquiring data for accession 5477/17926..
Acquiring data for accession 5478/17926..
Acquiring data for accession 5479/17926..
Acquiring data for accession 5480/17926..
Acquiring data for accession 5481/17926..
Acquiring data for accession 5482/17926..
Acquiring data for accession 5483/17926..
Acquiring data for accession 5484/17926..
Acquiring data for accession 5485/17926..
Acquiring data for accession 5486/17926..
Acquiring data for accession 5487/17926..
Acquiring data for accession 5488/

Acquiring data for accession 6463/17926..
Acquiring data for accession 6464/17926..
Acquiring data for accession 6465/17926..
Acquiring data for accession 6466/17926..
Acquiring data for accession 6467/17926..
Acquiring data for accession 6468/17926..
Acquiring data for accession 6469/17926..
Acquiring data for accession 6470/17926..
Acquiring data for accession 6471/17926..
Acquiring data for accession 6472/17926..
Acquiring data for accession 6473/17926..
Acquiring data for accession 6474/17926..
Acquiring data for accession 6475/17926..
Acquiring data for accession 6476/17926..
Acquiring data for accession 6477/17926..
Acquiring data for accession 6478/17926..
Acquiring data for accession 6479/17926..
Acquiring data for accession 6480/17926..
Acquiring data for accession 6481/17926..
Acquiring data for accession 6482/17926..
Acquiring data for accession 6483/17926..
Acquiring data for accession 6484/17926..
Acquiring data for accession 6485/17926..
Acquiring data for accession 6486/

Acquiring data for accession 7459/17926..
Acquiring data for accession 7460/17926..
Acquiring data for accession 7461/17926..
Acquiring data for accession 7462/17926..
Acquiring data for accession 7463/17926..
Acquiring data for accession 7464/17926..
Acquiring data for accession 7465/17926..
Acquiring data for accession 7466/17926..
Acquiring data for accession 7467/17926..
Acquiring data for accession 7468/17926..
Acquiring data for accession 7469/17926..
Acquiring data for accession 7470/17926..
Acquiring data for accession 7471/17926..
Acquiring data for accession 7472/17926..
Acquiring data for accession 7473/17926..
Acquiring data for accession 7474/17926..
Acquiring data for accession 7475/17926..
Acquiring data for accession 7476/17926..
Acquiring data for accession 7477/17926..
Acquiring data for accession 7478/17926..
Acquiring data for accession 7479/17926..
Acquiring data for accession 7480/17926..
Acquiring data for accession 7481/17926..
Acquiring data for accession 7482/

Acquiring data for accession 8458/17926..
Acquiring data for accession 8459/17926..
Acquiring data for accession 8460/17926..
Acquiring data for accession 8461/17926..
Acquiring data for accession 8462/17926..
Acquiring data for accession 8463/17926..
Acquiring data for accession 8464/17926..
Acquiring data for accession 8465/17926..
Acquiring data for accession 8466/17926..
Acquiring data for accession 8467/17926..
Acquiring data for accession 8468/17926..
Acquiring data for accession 8469/17926..
Acquiring data for accession 8470/17926..
Acquiring data for accession 8471/17926..
Acquiring data for accession 8472/17926..
Acquiring data for accession 8473/17926..
Acquiring data for accession 8474/17926..
Acquiring data for accession 8475/17926..
Acquiring data for accession 8476/17926..
Acquiring data for accession 8477/17926..
Acquiring data for accession 8478/17926..
Acquiring data for accession 8479/17926..
Acquiring data for accession 8480/17926..
Acquiring data for accession 8481/

Acquiring data for accession 9461/17926..
Acquiring data for accession 9462/17926..
Acquiring data for accession 9463/17926..
Acquiring data for accession 9464/17926..
Acquiring data for accession 9465/17926..
Acquiring data for accession 9466/17926..
Acquiring data for accession 9467/17926..
Acquiring data for accession 9468/17926..
Acquiring data for accession 9469/17926..
Acquiring data for accession 9470/17926..
Acquiring data for accession 9471/17926..
Acquiring data for accession 9472/17926..
Acquiring data for accession 9473/17926..
Acquiring data for accession 9474/17926..
Acquiring data for accession 9475/17926..
Acquiring data for accession 9476/17926..
Acquiring data for accession 9477/17926..
Acquiring data for accession 9478/17926..
Acquiring data for accession 9479/17926..
Acquiring data for accession 9480/17926..
Acquiring data for accession 9481/17926..
Acquiring data for accession 9482/17926..
Acquiring data for accession 9483/17926..
Acquiring data for accession 9484/

Acquiring data for accession 10467/17926..
Acquiring data for accession 10468/17926..
Acquiring data for accession 10469/17926..
Acquiring data for accession 10470/17926..
Acquiring data for accession 10471/17926..
Acquiring data for accession 10472/17926..
Acquiring data for accession 10473/17926..
Acquiring data for accession 10474/17926..
Acquiring data for accession 10475/17926..
Acquiring data for accession 10476/17926..
Acquiring data for accession 10477/17926..
Acquiring data for accession 10478/17926..
Acquiring data for accession 10479/17926..
Acquiring data for accession 10480/17926..
Acquiring data for accession 10481/17926..
Acquiring data for accession 10482/17926..
Acquiring data for accession 10483/17926..
Acquiring data for accession 10484/17926..
Acquiring data for accession 10485/17926..
Acquiring data for accession 10486/17926..
Acquiring data for accession 10487/17926..
Acquiring data for accession 10488/17926..
Acquiring data for accession 10489/17926..
Acquiring d

Acquiring data for accession 11462/17926..
Acquiring data for accession 11463/17926..
Acquiring data for accession 11464/17926..
Acquiring data for accession 11465/17926..
Acquiring data for accession 11466/17926..
Acquiring data for accession 11467/17926..
Acquiring data for accession 11468/17926..
Acquiring data for accession 11469/17926..
Acquiring data for accession 11470/17926..
Acquiring data for accession 11471/17926..
Acquiring data for accession 11472/17926..
Acquiring data for accession 11473/17926..
Acquiring data for accession 11474/17926..
Acquiring data for accession 11475/17926..
Acquiring data for accession 11476/17926..
Acquiring data for accession 11477/17926..
Acquiring data for accession 11478/17926..
Acquiring data for accession 11479/17926..
Acquiring data for accession 11480/17926..
Acquiring data for accession 11481/17926..
Acquiring data for accession 11482/17926..
Acquiring data for accession 11483/17926..
Acquiring data for accession 11484/17926..
Acquiring d

Acquiring data for accession 12457/17926..
Acquiring data for accession 12458/17926..
Acquiring data for accession 12459/17926..
Acquiring data for accession 12460/17926..
Acquiring data for accession 12461/17926..
Acquiring data for accession 12462/17926..
Acquiring data for accession 12463/17926..
Acquiring data for accession 12464/17926..
Acquiring data for accession 12465/17926..
Acquiring data for accession 12466/17926..
Acquiring data for accession 12467/17926..
Acquiring data for accession 12468/17926..
Acquiring data for accession 12469/17926..
Acquiring data for accession 12470/17926..
Acquiring data for accession 12471/17926..
Acquiring data for accession 12472/17926..
Acquiring data for accession 12473/17926..
Acquiring data for accession 12474/17926..
Acquiring data for accession 12475/17926..
Acquiring data for accession 12476/17926..
Acquiring data for accession 12477/17926..
Acquiring data for accession 12478/17926..
Acquiring data for accession 12479/17926..
Acquiring d

Acquiring data for accession 13458/17926..
Acquiring data for accession 13459/17926..
Acquiring data for accession 13460/17926..
Acquiring data for accession 13461/17926..
Acquiring data for accession 13462/17926..
Acquiring data for accession 13463/17926..
Acquiring data for accession 13464/17926..
Acquiring data for accession 13465/17926..
Acquiring data for accession 13466/17926..
Acquiring data for accession 13467/17926..
Acquiring data for accession 13468/17926..
Acquiring data for accession 13469/17926..
Acquiring data for accession 13470/17926..
Acquiring data for accession 13471/17926..
Acquiring data for accession 13472/17926..
Acquiring data for accession 13473/17926..
Acquiring data for accession 13474/17926..
Acquiring data for accession 13475/17926..
Acquiring data for accession 13476/17926..
Acquiring data for accession 13477/17926..
Acquiring data for accession 13478/17926..
Acquiring data for accession 13479/17926..
Acquiring data for accession 13480/17926..
Acquiring d

Acquiring data for accession 14445/17926..
Acquiring data for accession 14446/17926..
Acquiring data for accession 14447/17926..
Acquiring data for accession 14448/17926..
Acquiring data for accession 14449/17926..
Acquiring data for accession 14450/17926..
Acquiring data for accession 14451/17926..
Acquiring data for accession 14452/17926..
Acquiring data for accession 14453/17926..
Acquiring data for accession 14454/17926..
Acquiring data for accession 14455/17926..
Acquiring data for accession 14456/17926..
Acquiring data for accession 14457/17926..
Acquiring data for accession 14458/17926..
Acquiring data for accession 14459/17926..
Acquiring data for accession 14460/17926..
Acquiring data for accession 14461/17926..
Acquiring data for accession 14462/17926..
Acquiring data for accession 14463/17926..
Acquiring data for accession 14464/17926..
Acquiring data for accession 14465/17926..
Acquiring data for accession 14466/17926..
Acquiring data for accession 14467/17926..
Acquiring d

Acquiring data for accession 15456/17926..
Acquiring data for accession 15457/17926..
Acquiring data for accession 15458/17926..
Acquiring data for accession 15459/17926..
Acquiring data for accession 15460/17926..
Acquiring data for accession 15461/17926..
Acquiring data for accession 15462/17926..
Acquiring data for accession 15463/17926..
Acquiring data for accession 15464/17926..
Acquiring data for accession 15465/17926..
Acquiring data for accession 15466/17926..
Acquiring data for accession 15467/17926..
Acquiring data for accession 15468/17926..
Acquiring data for accession 15469/17926..
Acquiring data for accession 15470/17926..
Acquiring data for accession 15471/17926..
Acquiring data for accession 15472/17926..
Acquiring data for accession 15473/17926..
Acquiring data for accession 15474/17926..
Acquiring data for accession 15475/17926..
Acquiring data for accession 15476/17926..
Acquiring data for accession 15477/17926..
Acquiring data for accession 15478/17926..
Acquiring d

Acquiring data for accession 16456/17926..
Acquiring data for accession 16457/17926..
Acquiring data for accession 16458/17926..
Acquiring data for accession 16459/17926..
Acquiring data for accession 16460/17926..
Acquiring data for accession 16461/17926..
Acquiring data for accession 16462/17926..
Acquiring data for accession 16463/17926..
Acquiring data for accession 16464/17926..
Acquiring data for accession 16465/17926..
Acquiring data for accession 16466/17926..
Acquiring data for accession 16467/17926..
Acquiring data for accession 16468/17926..
Acquiring data for accession 16469/17926..
Acquiring data for accession 16470/17926..
Acquiring data for accession 16471/17926..
Acquiring data for accession 16472/17926..
Acquiring data for accession 16473/17926..
Acquiring data for accession 16474/17926..
Acquiring data for accession 16475/17926..
Acquiring data for accession 16476/17926..
Acquiring data for accession 16477/17926..
Acquiring data for accession 16478/17926..
Acquiring d

[Parallel(n_jobs=1)]: Done 17926 out of 17926 | elapsed:   16.1s finished


In [None]:
print("Saving raw networks...")
save_network_to_path(kegg, kegg_network_path)
save_network_to_path(hprd, hprd_network_path)
save_network_to_path(pina2, pina2_network_path)
save_network_to_path(bioplex, bioplex_network_path)
save_network_to_path(innate_i, innate_i_network_path)
save_network_to_path(innate_c, innate_c_network_path)

In [None]:
print("Building and saving processed networks...")
hprd_test_labels = ['dephosphorylation', 'phosphorylation']
hprd_train_labels = set([l for l in hprd[LABEL] if l not in hprd_test_labels])
train_hprd = remove_labels(hprd, hprd_test_labels)
training = pd.concat([kegg, train_hprd], ignore_index=True)
testing = remove_intersection(remove_labels(hprd, hprd_train_labels), kegg)
full_training = pd.concat([training, testing], ignore_index=True)

testing = process_interactions(
    interactions=testing, drop_nan=True,
    allow_duplicates=False, allow_self_edges=True,
    exclude_labels=None, min_counts=5, merge=True
)
training = process_interactions(
    interactions=training,
    drop_nan=True, allow_duplicates=False, allow_self_edges=True,
    exclude_labels=None, min_counts=5, merge=True
)
full_training = process_interactions(
    interactions=full_training,
    drop_nan=True, allow_duplicates=False, allow_self_edges=True,
    exclude_labels=None, min_counts=5, merge=True
)

labels = list(training[LABEL]) + list(testing[LABEL])
ptm_labels = set(l for merged in labels for l in merged.split(','))
save_ptm_labels(ptm_labels)

interactome_networks = [bioplex, pina2, innate_i, innate_c]
interactome = pd.concat(interactome_networks, ignore_index=True)
interactome = process_interactions(
    interactions=interactome, drop_nan=True,
    allow_duplicates=False, allow_self_edges=True,
    exclude_labels=None, min_counts=None, merge=True
)
save_network_to_path(interactome, interactome_network_path)
save_network_to_path(training, training_network_path)
save_network_to_path(testing, testing_network_path)
save_network_to_path(full_training, full_training_network_path)